diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,20518 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.998022412656559, + "eval_steps": 569, + "global_step": 1137, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0026367831245880024, + "grad_norm": 42.03252029418945, + "learning_rate": 8e-07, + "log_odds_chosen": 0.5063995122909546, + "log_odds_ratio": -0.5107995867729187, + "logits/chosen": -0.22655925154685974, + "logits/rejected": -0.2056657373905182, + "logps/chosen": -3.680752992630005, + "logps/rejected": -4.1695122718811035, + "loss": 10.4272, + "nll_loss": 2.555725574493408, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.36807531118392944, + "rewards/margins": 0.04887596145272255, + "rewards/rejected": -0.4169512689113617, + "step": 1 + }, + { + "epoch": 0.005273566249176005, + "grad_norm": 41.015140533447266, + "learning_rate": 1.6e-06, + "log_odds_chosen": 0.1609557569026947, + "log_odds_ratio": -0.6170504093170166, + "logits/chosen": -0.30842357873916626, + "logits/rejected": -0.3016989827156067, + "logps/chosen": -4.0163726806640625, + "logps/rejected": -4.173500061035156, + "loss": 10.9832, + "nll_loss": 2.6840877532958984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4016372561454773, + "rewards/margins": 0.01571274921298027, + "rewards/rejected": -0.41734999418258667, + "step": 2 + }, + { + "epoch": 0.007910349373764008, + "grad_norm": 39.962039947509766, + "learning_rate": 2.4e-06, + "log_odds_chosen": 0.5912856459617615, + "log_odds_ratio": -0.4688023030757904, + "logits/chosen": -0.3613507151603699, + "logits/rejected": -0.3267977833747864, + "logps/chosen": -4.372102737426758, + "logps/rejected": -4.95222282409668, + "loss": 10.5225, + "nll_loss": 2.583754062652588, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4372102618217468, + "rewards/margins": 0.0580119863152504, + "rewards/rejected": -0.495222270488739, + "step": 3 + }, + { + "epoch": 0.01054713249835201, + "grad_norm": 40.72674560546875, + "learning_rate": 3.2e-06, + "log_odds_chosen": 0.4822363257408142, + "log_odds_ratio": -0.5072091817855835, + "logits/chosen": -0.3044830560684204, + "logits/rejected": -0.28497734665870667, + "logps/chosen": -3.9995956420898438, + "logps/rejected": -4.468729019165039, + "loss": 10.2201, + "nll_loss": 2.5043091773986816, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3999595642089844, + "rewards/margins": 0.04691329225897789, + "rewards/rejected": -0.44687288999557495, + "step": 4 + }, + { + "epoch": 0.013183915622940013, + "grad_norm": 37.658546447753906, + "learning_rate": 4e-06, + "log_odds_chosen": 0.44297361373901367, + "log_odds_ratio": -0.5074143409729004, + "logits/chosen": -0.3302440047264099, + "logits/rejected": -0.3137558102607727, + "logps/chosen": -3.6166789531707764, + "logps/rejected": -4.0442304611206055, + "loss": 9.9725, + "nll_loss": 2.4423866271972656, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3616679012775421, + "rewards/margins": 0.04275514930486679, + "rewards/rejected": -0.4044230580329895, + "step": 5 + }, + { + "epoch": 0.015820698747528016, + "grad_norm": 41.972869873046875, + "learning_rate": 4.8e-06, + "log_odds_chosen": 0.5972580909729004, + "log_odds_ratio": -0.46147802472114563, + "logits/chosen": -0.30286896228790283, + "logits/rejected": -0.287975013256073, + "logps/chosen": -4.17189884185791, + "logps/rejected": -4.753922462463379, + "loss": 10.2496, + "nll_loss": 2.516244888305664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.41718989610671997, + "rewards/margins": 0.0582023561000824, + "rewards/rejected": -0.47539228200912476, + "step": 6 + }, + { + "epoch": 0.01845748187211602, + "grad_norm": 40.42627716064453, + "learning_rate": 5.6e-06, + "log_odds_chosen": 0.7452040314674377, + "log_odds_ratio": -0.4109686017036438, + "logits/chosen": -0.2889333963394165, + "logits/rejected": -0.279765248298645, + "logps/chosen": -3.668855667114258, + "logps/rejected": -4.401315689086914, + "loss": 10.3841, + "nll_loss": 2.5549163818359375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36688560247421265, + "rewards/margins": 0.07324595004320145, + "rewards/rejected": -0.4401315450668335, + "step": 7 + }, + { + "epoch": 0.02109426499670402, + "grad_norm": 45.6840934753418, + "learning_rate": 6.4e-06, + "log_odds_chosen": 0.6613704562187195, + "log_odds_ratio": -0.4452037811279297, + "logits/chosen": -0.10419394075870514, + "logits/rejected": -0.09568381309509277, + "logps/chosen": -4.207592487335205, + "logps/rejected": -4.860063552856445, + "loss": 10.319, + "nll_loss": 2.5352323055267334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.42075929045677185, + "rewards/margins": 0.06524712592363358, + "rewards/rejected": -0.48600637912750244, + "step": 8 + }, + { + "epoch": 0.023731048121292023, + "grad_norm": 36.51398849487305, + "learning_rate": 7.2e-06, + "log_odds_chosen": 0.5675552487373352, + "log_odds_ratio": -0.47734999656677246, + "logits/chosen": -0.3029658794403076, + "logits/rejected": -0.2848775386810303, + "logps/chosen": -3.1933164596557617, + "logps/rejected": -3.740905284881592, + "loss": 10.0633, + "nll_loss": 2.468083381652832, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.31933167576789856, + "rewards/margins": 0.054758865386247635, + "rewards/rejected": -0.3740905225276947, + "step": 9 + }, + { + "epoch": 0.026367831245880026, + "grad_norm": 40.30665969848633, + "learning_rate": 8e-06, + "log_odds_chosen": 0.508488118648529, + "log_odds_ratio": -0.4933461546897888, + "logits/chosen": -0.21680974960327148, + "logits/rejected": -0.19147902727127075, + "logps/chosen": -3.317249298095703, + "logps/rejected": -3.810041904449463, + "loss": 9.8161, + "nll_loss": 2.4046783447265625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33172494173049927, + "rewards/margins": 0.04927929490804672, + "rewards/rejected": -0.3810042142868042, + "step": 10 + }, + { + "epoch": 0.02900461437046803, + "grad_norm": 42.89982604980469, + "learning_rate": 7.992901508429459e-06, + "log_odds_chosen": 0.6581701040267944, + "log_odds_ratio": -0.42420172691345215, + "logits/chosen": -0.1814369112253189, + "logits/rejected": -0.17391075193881989, + "logps/chosen": -3.6342430114746094, + "logps/rejected": -4.276468753814697, + "loss": 9.8359, + "nll_loss": 2.416558265686035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36342427134513855, + "rewards/margins": 0.06422261893749237, + "rewards/rejected": -0.4276469051837921, + "step": 11 + }, + { + "epoch": 0.03164139749505603, + "grad_norm": 37.45441818237305, + "learning_rate": 7.985803016858917e-06, + "log_odds_chosen": 0.5965335369110107, + "log_odds_ratio": -0.4536648690700531, + "logits/chosen": -0.27103012800216675, + "logits/rejected": -0.2577633261680603, + "logps/chosen": -3.327211856842041, + "logps/rejected": -3.900315046310425, + "loss": 9.3915, + "nll_loss": 2.3025002479553223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33272117376327515, + "rewards/margins": 0.057310331612825394, + "rewards/rejected": -0.39003151655197144, + "step": 12 + }, + { + "epoch": 0.034278180619644036, + "grad_norm": 40.52745819091797, + "learning_rate": 7.978704525288376e-06, + "log_odds_chosen": 0.32027924060821533, + "log_odds_ratio": -0.5618823170661926, + "logits/chosen": -0.26642847061157227, + "logits/rejected": -0.21151301264762878, + "logps/chosen": -4.011443138122559, + "logps/rejected": -4.31336784362793, + "loss": 9.2044, + "nll_loss": 2.244905471801758, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4011443257331848, + "rewards/margins": 0.03019241988658905, + "rewards/rejected": -0.43133679032325745, + "step": 13 + }, + { + "epoch": 0.03691496374423204, + "grad_norm": 34.13896179199219, + "learning_rate": 7.971606033717835e-06, + "log_odds_chosen": 0.8332005739212036, + "log_odds_ratio": -0.36455652117729187, + "logits/chosen": -0.34497541189193726, + "logits/rejected": -0.3069722056388855, + "logps/chosen": -2.7453174591064453, + "logps/rejected": -3.534782886505127, + "loss": 8.7255, + "nll_loss": 2.144930601119995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.274531751871109, + "rewards/margins": 0.0789465457201004, + "rewards/rejected": -0.3534783124923706, + "step": 14 + }, + { + "epoch": 0.03955174686882004, + "grad_norm": 39.49089431762695, + "learning_rate": 7.964507542147292e-06, + "log_odds_chosen": 0.5139614343643188, + "log_odds_ratio": -0.4868500828742981, + "logits/chosen": -0.2310849130153656, + "logits/rejected": -0.18353192508220673, + "logps/chosen": -3.5987987518310547, + "logps/rejected": -4.098852157592773, + "loss": 9.3649, + "nll_loss": 2.292545795440674, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.35987988114356995, + "rewards/margins": 0.05000537261366844, + "rewards/rejected": -0.4098852574825287, + "step": 15 + }, + { + "epoch": 0.04218852999340804, + "grad_norm": 36.65208435058594, + "learning_rate": 7.957409050576753e-06, + "log_odds_chosen": 0.5320571064949036, + "log_odds_ratio": -0.4787905514240265, + "logits/chosen": -0.2220565676689148, + "logits/rejected": -0.17649269104003906, + "logps/chosen": -3.046041250228882, + "logps/rejected": -3.5599379539489746, + "loss": 8.9619, + "nll_loss": 2.192599058151245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30460411310195923, + "rewards/margins": 0.051389675587415695, + "rewards/rejected": -0.3559938073158264, + "step": 16 + }, + { + "epoch": 0.04482531311799604, + "grad_norm": 37.12849807739258, + "learning_rate": 7.95031055900621e-06, + "log_odds_chosen": 0.4501737654209137, + "log_odds_ratio": -0.5116550326347351, + "logits/chosen": -0.2232360541820526, + "logits/rejected": -0.19859637320041656, + "logps/chosen": -2.8962063789367676, + "logps/rejected": -3.327422857284546, + "loss": 8.5734, + "nll_loss": 2.0921807289123535, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.28962063789367676, + "rewards/margins": 0.04312165826559067, + "rewards/rejected": -0.332742303609848, + "step": 17 + }, + { + "epoch": 0.047462096242584045, + "grad_norm": 41.37774658203125, + "learning_rate": 7.943212067435669e-06, + "log_odds_chosen": 0.6656603813171387, + "log_odds_ratio": -0.44296392798423767, + "logits/chosen": -0.0800028070807457, + "logits/rejected": -0.06333664804697037, + "logps/chosen": -3.446854829788208, + "logps/rejected": -4.09830379486084, + "loss": 8.5558, + "nll_loss": 2.0946435928344727, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.34468552470207214, + "rewards/margins": 0.06514490395784378, + "rewards/rejected": -0.40983039140701294, + "step": 18 + }, + { + "epoch": 0.05009887936717205, + "grad_norm": 40.35208511352539, + "learning_rate": 7.936113575865128e-06, + "log_odds_chosen": 0.5608517527580261, + "log_odds_ratio": -0.47481366991996765, + "logits/chosen": -0.14373314380645752, + "logits/rejected": -0.12235800921916962, + "logps/chosen": -3.744997978210449, + "logps/rejected": -4.289097309112549, + "loss": 8.5693, + "nll_loss": 2.0948424339294434, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3744997978210449, + "rewards/margins": 0.05440989136695862, + "rewards/rejected": -0.4289097189903259, + "step": 19 + }, + { + "epoch": 0.05273566249176005, + "grad_norm": 35.29018783569336, + "learning_rate": 7.929015084294587e-06, + "log_odds_chosen": 0.399916410446167, + "log_odds_ratio": -0.52656090259552, + "logits/chosen": -0.2085961252450943, + "logits/rejected": -0.178694948554039, + "logps/chosen": -3.017723560333252, + "logps/rejected": -3.3984627723693848, + "loss": 8.2289, + "nll_loss": 2.004572868347168, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3017723560333252, + "rewards/margins": 0.038073912262916565, + "rewards/rejected": -0.33984625339508057, + "step": 20 + }, + { + "epoch": 0.055372445616348055, + "grad_norm": 33.76988220214844, + "learning_rate": 7.921916592724046e-06, + "log_odds_chosen": 0.5948774814605713, + "log_odds_ratio": -0.4468332827091217, + "logits/chosen": -0.23783668875694275, + "logits/rejected": -0.13789881765842438, + "logps/chosen": -2.8810017108917236, + "logps/rejected": -3.4495158195495605, + "loss": 8.3768, + "nll_loss": 2.0495107173919678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2881001830101013, + "rewards/margins": 0.05685139447450638, + "rewards/rejected": -0.3449515700340271, + "step": 21 + }, + { + "epoch": 0.05800922874093606, + "grad_norm": 36.270206451416016, + "learning_rate": 7.914818101153505e-06, + "log_odds_chosen": 0.4766322374343872, + "log_odds_ratio": -0.4933544993400574, + "logits/chosen": -0.12822745740413666, + "logits/rejected": -0.10519365966320038, + "logps/chosen": -3.1237192153930664, + "logps/rejected": -3.577584743499756, + "loss": 8.0839, + "nll_loss": 1.9716460704803467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31237196922302246, + "rewards/margins": 0.04538654536008835, + "rewards/rejected": -0.35775846242904663, + "step": 22 + }, + { + "epoch": 0.06064601186552406, + "grad_norm": 36.74298095703125, + "learning_rate": 7.907719609582964e-06, + "log_odds_chosen": 0.6195639371871948, + "log_odds_ratio": -0.440764844417572, + "logits/chosen": -0.16736823320388794, + "logits/rejected": -0.14166246354579926, + "logps/chosen": -3.133723497390747, + "logps/rejected": -3.726114273071289, + "loss": 7.7208, + "nll_loss": 1.8861268758773804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31337234377861023, + "rewards/margins": 0.05923907458782196, + "rewards/rejected": -0.3726114332675934, + "step": 23 + }, + { + "epoch": 0.06328279499011207, + "grad_norm": 31.225141525268555, + "learning_rate": 7.900621118012423e-06, + "log_odds_chosen": 0.2814294099807739, + "log_odds_ratio": -0.568894624710083, + "logits/chosen": -0.27078360319137573, + "logits/rejected": -0.26242709159851074, + "logps/chosen": -2.6045753955841064, + "logps/rejected": -2.865044593811035, + "loss": 7.9579, + "nll_loss": 1.9325947761535645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2604575455188751, + "rewards/margins": 0.026046905666589737, + "rewards/rejected": -0.28650444746017456, + "step": 24 + }, + { + "epoch": 0.06591957811470006, + "grad_norm": 35.55276870727539, + "learning_rate": 7.893522626441881e-06, + "log_odds_chosen": 0.5024213790893555, + "log_odds_ratio": -0.4903920292854309, + "logits/chosen": -0.20812958478927612, + "logits/rejected": -0.17434272170066833, + "logps/chosen": -3.215907335281372, + "logps/rejected": -3.6927273273468018, + "loss": 7.617, + "nll_loss": 1.8552184104919434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.32159075140953064, + "rewards/margins": 0.047681987285614014, + "rewards/rejected": -0.36927270889282227, + "step": 25 + }, + { + "epoch": 0.06855636123928807, + "grad_norm": 38.64487838745117, + "learning_rate": 7.886424134871339e-06, + "log_odds_chosen": 0.4476067125797272, + "log_odds_ratio": -0.51563960313797, + "logits/chosen": -0.0307003203779459, + "logits/rejected": -0.011324996128678322, + "logps/chosen": -3.4396426677703857, + "logps/rejected": -3.8734965324401855, + "loss": 7.9103, + "nll_loss": 1.926013469696045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34396427869796753, + "rewards/margins": 0.043385379016399384, + "rewards/rejected": -0.3873496651649475, + "step": 26 + }, + { + "epoch": 0.07119314436387607, + "grad_norm": 31.943300247192383, + "learning_rate": 7.8793256433008e-06, + "log_odds_chosen": 0.36695021390914917, + "log_odds_ratio": -0.5485100746154785, + "logits/chosen": -0.3074036240577698, + "logits/rejected": -0.25627627968788147, + "logps/chosen": -2.7426533699035645, + "logps/rejected": -3.0948386192321777, + "loss": 7.64, + "nll_loss": 1.855139970779419, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2742653489112854, + "rewards/margins": 0.0352184996008873, + "rewards/rejected": -0.3094838261604309, + "step": 27 + }, + { + "epoch": 0.07382992748846408, + "grad_norm": 32.09175491333008, + "learning_rate": 7.872227151730256e-06, + "log_odds_chosen": 0.6224421262741089, + "log_odds_ratio": -0.43833571672439575, + "logits/chosen": -0.2338484525680542, + "logits/rejected": -0.19638372957706451, + "logps/chosen": -2.5678858757019043, + "logps/rejected": -3.1542539596557617, + "loss": 7.3637, + "nll_loss": 1.7970834970474243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25678858160972595, + "rewards/margins": 0.05863682180643082, + "rewards/rejected": -0.31542539596557617, + "step": 28 + }, + { + "epoch": 0.07646671061305207, + "grad_norm": 35.37105941772461, + "learning_rate": 7.865128660159715e-06, + "log_odds_chosen": 0.4396553933620453, + "log_odds_ratio": -0.5348554849624634, + "logits/chosen": -0.12305408716201782, + "logits/rejected": -0.096021369099617, + "logps/chosen": -2.859783411026001, + "logps/rejected": -3.2838082313537598, + "loss": 6.9916, + "nll_loss": 1.6944115161895752, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2859783172607422, + "rewards/margins": 0.042402494698762894, + "rewards/rejected": -0.328380823135376, + "step": 29 + }, + { + "epoch": 0.07910349373764008, + "grad_norm": 33.27079772949219, + "learning_rate": 7.858030168589174e-06, + "log_odds_chosen": 0.4660126864910126, + "log_odds_ratio": -0.519025981426239, + "logits/chosen": -0.2649393379688263, + "logits/rejected": -0.227127343416214, + "logps/chosen": -2.5917606353759766, + "logps/rejected": -3.015207052230835, + "loss": 6.691, + "nll_loss": 1.6208468675613403, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2591760754585266, + "rewards/margins": 0.04234464839100838, + "rewards/rejected": -0.3015207052230835, + "step": 30 + }, + { + "epoch": 0.08174027686222808, + "grad_norm": 32.39740753173828, + "learning_rate": 7.850931677018633e-06, + "log_odds_chosen": 0.20572400093078613, + "log_odds_ratio": -0.6036237478256226, + "logits/chosen": -0.21612390875816345, + "logits/rejected": -0.195010244846344, + "logps/chosen": -2.8395891189575195, + "logps/rejected": -3.036059856414795, + "loss": 7.402, + "nll_loss": 1.7901456356048584, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.28395891189575195, + "rewards/margins": 0.01964707300066948, + "rewards/rejected": -0.3036060035228729, + "step": 31 + }, + { + "epoch": 0.08437705998681608, + "grad_norm": 31.24039649963379, + "learning_rate": 7.843833185448092e-06, + "log_odds_chosen": 0.22066299617290497, + "log_odds_ratio": -0.5980744957923889, + "logits/chosen": -0.25658881664276123, + "logits/rejected": -0.2568542957305908, + "logps/chosen": -2.5169811248779297, + "logps/rejected": -2.7204983234405518, + "loss": 7.0351, + "nll_loss": 1.6989648342132568, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2516981363296509, + "rewards/margins": 0.0203517135232687, + "rewards/rejected": -0.27204984426498413, + "step": 32 + }, + { + "epoch": 0.08701384311140409, + "grad_norm": 31.878759384155273, + "learning_rate": 7.836734693877551e-06, + "log_odds_chosen": 0.6456508636474609, + "log_odds_ratio": -0.43579402565956116, + "logits/chosen": -0.23061901330947876, + "logits/rejected": -0.19082020223140717, + "logps/chosen": -2.4211320877075195, + "logps/rejected": -3.021599292755127, + "loss": 6.5759, + "nll_loss": 1.6003878116607666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24211320281028748, + "rewards/margins": 0.060046710073947906, + "rewards/rejected": -0.3021599054336548, + "step": 33 + }, + { + "epoch": 0.08965062623599208, + "grad_norm": 28.477542877197266, + "learning_rate": 7.82963620230701e-06, + "log_odds_chosen": 0.33213815093040466, + "log_odds_ratio": -0.5480948090553284, + "logits/chosen": -0.3933866322040558, + "logits/rejected": -0.337604820728302, + "logps/chosen": -2.40647029876709, + "logps/rejected": -2.713749408721924, + "loss": 6.5337, + "nll_loss": 1.5786075592041016, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2406470775604248, + "rewards/margins": 0.030727902427315712, + "rewards/rejected": -0.2713749408721924, + "step": 34 + }, + { + "epoch": 0.0922874093605801, + "grad_norm": 31.25120735168457, + "learning_rate": 7.822537710736469e-06, + "log_odds_chosen": 0.4866493344306946, + "log_odds_ratio": -0.49146533012390137, + "logits/chosen": -0.24240362644195557, + "logits/rejected": -0.19784504175186157, + "logps/chosen": -2.4882521629333496, + "logps/rejected": -2.9402499198913574, + "loss": 6.5205, + "nll_loss": 1.5809718370437622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24882523715496063, + "rewards/margins": 0.04519975930452347, + "rewards/rejected": -0.2940249741077423, + "step": 35 + }, + { + "epoch": 0.09492419248516809, + "grad_norm": 30.688030242919922, + "learning_rate": 7.815439219165928e-06, + "log_odds_chosen": 0.5005679726600647, + "log_odds_ratio": -0.47987300157546997, + "logits/chosen": -0.21071836352348328, + "logits/rejected": -0.17459529638290405, + "logps/chosen": -2.2092747688293457, + "logps/rejected": -2.6628780364990234, + "loss": 5.9388, + "nll_loss": 1.4367212057113647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22092747688293457, + "rewards/margins": 0.04536033049225807, + "rewards/rejected": -0.26628780364990234, + "step": 36 + }, + { + "epoch": 0.0975609756097561, + "grad_norm": 29.692096710205078, + "learning_rate": 7.808340727595385e-06, + "log_odds_chosen": 0.42486512660980225, + "log_odds_ratio": -0.5341512560844421, + "logits/chosen": -0.25842246413230896, + "logits/rejected": -0.2083197683095932, + "logps/chosen": -2.398265838623047, + "logps/rejected": -2.7930338382720947, + "loss": 6.3507, + "nll_loss": 1.5342501401901245, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23982658982276917, + "rewards/margins": 0.03947678953409195, + "rewards/rejected": -0.2793033719062805, + "step": 37 + }, + { + "epoch": 0.1001977587343441, + "grad_norm": 30.48540496826172, + "learning_rate": 7.801242236024844e-06, + "log_odds_chosen": 0.4453636705875397, + "log_odds_ratio": -0.5074580907821655, + "logits/chosen": -0.21573767066001892, + "logits/rejected": -0.18589571118354797, + "logps/chosen": -2.112009048461914, + "logps/rejected": -2.5102884769439697, + "loss": 5.7095, + "nll_loss": 1.3766206502914429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21120090782642365, + "rewards/margins": 0.039827942848205566, + "rewards/rejected": -0.251028835773468, + "step": 38 + }, + { + "epoch": 0.10283454185893211, + "grad_norm": 30.601762771606445, + "learning_rate": 7.794143744454303e-06, + "log_odds_chosen": 0.42712199687957764, + "log_odds_ratio": -0.5272745490074158, + "logits/chosen": -0.21021857857704163, + "logits/rejected": -0.15227194130420685, + "logps/chosen": -2.536367416381836, + "logps/rejected": -2.927422046661377, + "loss": 6.0723, + "nll_loss": 1.465356469154358, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2536367177963257, + "rewards/margins": 0.03910548612475395, + "rewards/rejected": -0.29274219274520874, + "step": 39 + }, + { + "epoch": 0.1054713249835201, + "grad_norm": 30.587663650512695, + "learning_rate": 7.787045252883762e-06, + "log_odds_chosen": 0.3872503340244293, + "log_odds_ratio": -0.540022075176239, + "logits/chosen": -0.20031091570854187, + "logits/rejected": -0.16761323809623718, + "logps/chosen": -2.5356223583221436, + "logps/rejected": -2.9007105827331543, + "loss": 6.0481, + "nll_loss": 1.4580141305923462, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.25356224179267883, + "rewards/margins": 0.03650882840156555, + "rewards/rejected": -0.2900710701942444, + "step": 40 + }, + { + "epoch": 0.10810810810810811, + "grad_norm": 28.171480178833008, + "learning_rate": 7.77994676131322e-06, + "log_odds_chosen": 0.5199911594390869, + "log_odds_ratio": -0.4783152937889099, + "logits/chosen": -0.29706722497940063, + "logits/rejected": -0.26550137996673584, + "logps/chosen": -2.006732940673828, + "logps/rejected": -2.4726297855377197, + "loss": 5.4301, + "nll_loss": 1.3096997737884521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20067329704761505, + "rewards/margins": 0.0465896800160408, + "rewards/rejected": -0.24726298451423645, + "step": 41 + }, + { + "epoch": 0.11074489123269611, + "grad_norm": 31.35000228881836, + "learning_rate": 7.77284826974268e-06, + "log_odds_chosen": 0.34326064586639404, + "log_odds_ratio": -0.55986088514328, + "logits/chosen": -0.12557223439216614, + "logits/rejected": -0.1102723777294159, + "logps/chosen": -2.5316214561462402, + "logps/rejected": -2.8401966094970703, + "loss": 5.5317, + "nll_loss": 1.3269493579864502, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.253162145614624, + "rewards/margins": 0.030857522040605545, + "rewards/rejected": -0.28401967883110046, + "step": 42 + }, + { + "epoch": 0.1133816743572841, + "grad_norm": 28.0591983795166, + "learning_rate": 7.765749778172138e-06, + "log_odds_chosen": 0.31989234685897827, + "log_odds_ratio": -0.5515447854995728, + "logits/chosen": -0.2060682773590088, + "logits/rejected": -0.17958971858024597, + "logps/chosen": -2.2945690155029297, + "logps/rejected": -2.5878024101257324, + "loss": 5.7537, + "nll_loss": 1.383264422416687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22945690155029297, + "rewards/margins": 0.02932334691286087, + "rewards/rejected": -0.25878024101257324, + "step": 43 + }, + { + "epoch": 0.11601845748187212, + "grad_norm": 29.306196212768555, + "learning_rate": 7.758651286601597e-06, + "log_odds_chosen": 0.22514420747756958, + "log_odds_ratio": -0.5939244031906128, + "logits/chosen": -0.14782042801380157, + "logits/rejected": -0.13932658731937408, + "logps/chosen": -2.3671927452087402, + "logps/rejected": -2.5678796768188477, + "loss": 5.5461, + "nll_loss": 1.3271353244781494, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2367192804813385, + "rewards/margins": 0.020068688318133354, + "rewards/rejected": -0.2567879557609558, + "step": 44 + }, + { + "epoch": 0.11865524060646011, + "grad_norm": 27.434146881103516, + "learning_rate": 7.751552795031056e-06, + "log_odds_chosen": 0.39294224977493286, + "log_odds_ratio": -0.5219178199768066, + "logits/chosen": -0.31487947702407837, + "logits/rejected": -0.2837178707122803, + "logps/chosen": -2.429870128631592, + "logps/rejected": -2.7943930625915527, + "loss": 5.7072, + "nll_loss": 1.3745999336242676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2429870069026947, + "rewards/margins": 0.03645233064889908, + "rewards/rejected": -0.2794393301010132, + "step": 45 + }, + { + "epoch": 0.12129202373104812, + "grad_norm": 26.80518913269043, + "learning_rate": 7.744454303460515e-06, + "log_odds_chosen": 0.27651965618133545, + "log_odds_ratio": -0.5777378082275391, + "logits/chosen": -0.2640863358974457, + "logits/rejected": -0.24117253720760345, + "logps/chosen": -2.340364456176758, + "logps/rejected": -2.596097469329834, + "loss": 5.7167, + "nll_loss": 1.371401071548462, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.23403644561767578, + "rewards/margins": 0.02557331509888172, + "rewards/rejected": -0.25960975885391235, + "step": 46 + }, + { + "epoch": 0.12392880685563612, + "grad_norm": 24.878803253173828, + "learning_rate": 7.737355811889972e-06, + "log_odds_chosen": 0.23489218950271606, + "log_odds_ratio": -0.590002179145813, + "logits/chosen": -0.3505256772041321, + "logits/rejected": -0.3077712953090668, + "logps/chosen": -2.2546756267547607, + "logps/rejected": -2.4655075073242188, + "loss": 5.5538, + "nll_loss": 1.3294399976730347, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22546756267547607, + "rewards/margins": 0.021083198487758636, + "rewards/rejected": -0.2465507686138153, + "step": 47 + }, + { + "epoch": 0.12656558998022413, + "grad_norm": 26.178964614868164, + "learning_rate": 7.730257320319431e-06, + "log_odds_chosen": 0.21647870540618896, + "log_odds_ratio": -0.5960606336593628, + "logits/chosen": -0.2653028964996338, + "logits/rejected": -0.22831164300441742, + "logps/chosen": -2.2759790420532227, + "logps/rejected": -2.4663686752319336, + "loss": 5.1854, + "nll_loss": 1.2367491722106934, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2275979220867157, + "rewards/margins": 0.019038967788219452, + "rewards/rejected": -0.24663689732551575, + "step": 48 + }, + { + "epoch": 0.12920237310481214, + "grad_norm": 22.515853881835938, + "learning_rate": 7.72315882874889e-06, + "log_odds_chosen": 0.4059191346168518, + "log_odds_ratio": -0.5174556374549866, + "logits/chosen": -0.4336870610713959, + "logits/rejected": -0.3817170560359955, + "logps/chosen": -1.9080209732055664, + "logps/rejected": -2.260662794113159, + "loss": 5.2487, + "nll_loss": 1.2604243755340576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19080209732055664, + "rewards/margins": 0.035264186561107635, + "rewards/rejected": -0.22606629133224487, + "step": 49 + }, + { + "epoch": 0.13183915622940012, + "grad_norm": 22.30443572998047, + "learning_rate": 7.716060337178349e-06, + "log_odds_chosen": 0.35167044401168823, + "log_odds_ratio": -0.5408763289451599, + "logits/chosen": -0.3750547766685486, + "logits/rejected": -0.35194963216781616, + "logps/chosen": -2.000838279724121, + "logps/rejected": -2.3132987022399902, + "loss": 5.5558, + "nll_loss": 1.3348667621612549, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.20008382201194763, + "rewards/margins": 0.03124604932963848, + "rewards/rejected": -0.23132985830307007, + "step": 50 + }, + { + "epoch": 0.13447593935398813, + "grad_norm": 25.707345962524414, + "learning_rate": 7.708961845607808e-06, + "log_odds_chosen": 0.5400259494781494, + "log_odds_ratio": -0.5052967667579651, + "logits/chosen": -0.23230978846549988, + "logits/rejected": -0.21948783099651337, + "logps/chosen": -2.189897298812866, + "logps/rejected": -2.696154832839966, + "loss": 4.6871, + "nll_loss": 1.1212360858917236, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21898972988128662, + "rewards/margins": 0.05062573403120041, + "rewards/rejected": -0.2696154713630676, + "step": 51 + }, + { + "epoch": 0.13711272247857614, + "grad_norm": 24.343521118164062, + "learning_rate": 7.701863354037267e-06, + "log_odds_chosen": 0.1825106143951416, + "log_odds_ratio": -0.6162492036819458, + "logits/chosen": -0.2806670367717743, + "logits/rejected": -0.20971013605594635, + "logps/chosen": -2.2124099731445312, + "logps/rejected": -2.375056505203247, + "loss": 5.2171, + "nll_loss": 1.2426531314849854, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22124099731445312, + "rewards/margins": 0.016264665871858597, + "rewards/rejected": -0.23750565946102142, + "step": 52 + }, + { + "epoch": 0.13974950560316415, + "grad_norm": 23.199983596801758, + "learning_rate": 7.694764862466726e-06, + "log_odds_chosen": 0.4220227897167206, + "log_odds_ratio": -0.50751793384552, + "logits/chosen": -0.3427724838256836, + "logits/rejected": -0.2741890251636505, + "logps/chosen": -2.321899890899658, + "logps/rejected": -2.708897352218628, + "loss": 5.2747, + "nll_loss": 1.2679214477539062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23218998312950134, + "rewards/margins": 0.038699738681316376, + "rewards/rejected": -0.2708897292613983, + "step": 53 + }, + { + "epoch": 0.14238628872775214, + "grad_norm": 23.460235595703125, + "learning_rate": 7.687666370896184e-06, + "log_odds_chosen": 0.3479459285736084, + "log_odds_ratio": -0.558416485786438, + "logits/chosen": -0.334983229637146, + "logits/rejected": -0.3043859899044037, + "logps/chosen": -2.2959542274475098, + "logps/rejected": -2.6057186126708984, + "loss": 4.7528, + "nll_loss": 1.1323628425598145, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22959542274475098, + "rewards/margins": 0.03097643330693245, + "rewards/rejected": -0.26057183742523193, + "step": 54 + }, + { + "epoch": 0.14502307185234015, + "grad_norm": 21.800100326538086, + "learning_rate": 7.680567879325643e-06, + "log_odds_chosen": 0.5213094353675842, + "log_odds_ratio": -0.47090500593185425, + "logits/chosen": -0.38391953706741333, + "logits/rejected": -0.34351029992103577, + "logps/chosen": -1.900581955909729, + "logps/rejected": -2.3608274459838867, + "loss": 4.8139, + "nll_loss": 1.1563955545425415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19005818665027618, + "rewards/margins": 0.046024568378925323, + "rewards/rejected": -0.2360827624797821, + "step": 55 + }, + { + "epoch": 0.14765985497692816, + "grad_norm": 22.829553604125977, + "learning_rate": 7.6734693877551e-06, + "log_odds_chosen": 0.3803178668022156, + "log_odds_ratio": -0.5260686874389648, + "logits/chosen": -0.317103773355484, + "logits/rejected": -0.2755615711212158, + "logps/chosen": -2.044250249862671, + "logps/rejected": -2.3848509788513184, + "loss": 4.6378, + "nll_loss": 1.106835961341858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20442502200603485, + "rewards/margins": 0.03406006097793579, + "rewards/rejected": -0.23848509788513184, + "step": 56 + }, + { + "epoch": 0.15029663810151614, + "grad_norm": 22.079294204711914, + "learning_rate": 7.666370896184561e-06, + "log_odds_chosen": 0.39094048738479614, + "log_odds_ratio": -0.5204463005065918, + "logits/chosen": -0.3188185691833496, + "logits/rejected": -0.2769733965396881, + "logps/chosen": -1.9722585678100586, + "logps/rejected": -2.312635660171509, + "loss": 4.45, + "nll_loss": 1.0604636669158936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19722585380077362, + "rewards/margins": 0.03403770551085472, + "rewards/rejected": -0.23126356303691864, + "step": 57 + }, + { + "epoch": 0.15293342122610415, + "grad_norm": 22.725160598754883, + "learning_rate": 7.659272404614018e-06, + "log_odds_chosen": 0.36315977573394775, + "log_odds_ratio": -0.5431260466575623, + "logits/chosen": -0.35522258281707764, + "logits/rejected": -0.3284539580345154, + "logps/chosen": -2.1801340579986572, + "logps/rejected": -2.5099594593048096, + "loss": 4.5113, + "nll_loss": 1.0735070705413818, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21801342070102692, + "rewards/margins": 0.03298252820968628, + "rewards/rejected": -0.2509959638118744, + "step": 58 + }, + { + "epoch": 0.15557020435069216, + "grad_norm": 20.704824447631836, + "learning_rate": 7.652173913043479e-06, + "log_odds_chosen": 0.30395179986953735, + "log_odds_ratio": -0.5613071918487549, + "logits/chosen": -0.40550240874290466, + "logits/rejected": -0.36863192915916443, + "logps/chosen": -2.019667625427246, + "logps/rejected": -2.285858154296875, + "loss": 4.3846, + "nll_loss": 1.0400309562683105, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2019667625427246, + "rewards/margins": 0.02661903388798237, + "rewards/rejected": -0.22858580946922302, + "step": 59 + }, + { + "epoch": 0.15820698747528017, + "grad_norm": 20.56707000732422, + "learning_rate": 7.645075421472936e-06, + "log_odds_chosen": 0.1940009742975235, + "log_odds_ratio": -0.6048257946968079, + "logits/chosen": -0.4033902585506439, + "logits/rejected": -0.3881780207157135, + "logps/chosen": -2.059615135192871, + "logps/rejected": -2.232116460800171, + "loss": 4.3671, + "nll_loss": 1.0312800407409668, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.20596152544021606, + "rewards/margins": 0.017250144854187965, + "rewards/rejected": -0.22321167588233948, + "step": 60 + }, + { + "epoch": 0.16084377059986815, + "grad_norm": 19.30805015563965, + "learning_rate": 7.637976929902395e-06, + "log_odds_chosen": 0.286902517080307, + "log_odds_ratio": -0.5759099721908569, + "logits/chosen": -0.51356440782547, + "logits/rejected": -0.4340498149394989, + "logps/chosen": -2.036076307296753, + "logps/rejected": -2.2877118587493896, + "loss": 4.7367, + "nll_loss": 1.1265827417373657, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.20360763370990753, + "rewards/margins": 0.0251635629683733, + "rewards/rejected": -0.22877119481563568, + "step": 61 + }, + { + "epoch": 0.16348055372445616, + "grad_norm": 18.822423934936523, + "learning_rate": 7.630878438331854e-06, + "log_odds_chosen": 0.1922874003648758, + "log_odds_ratio": -0.615166962146759, + "logits/chosen": -0.48963281512260437, + "logits/rejected": -0.4330398440361023, + "logps/chosen": -1.8455352783203125, + "logps/rejected": -2.005401134490967, + "loss": 4.0266, + "nll_loss": 0.9451347589492798, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18455354869365692, + "rewards/margins": 0.015986589714884758, + "rewards/rejected": -0.20054014027118683, + "step": 62 + }, + { + "epoch": 0.16611733684904417, + "grad_norm": 19.095998764038086, + "learning_rate": 7.623779946761314e-06, + "log_odds_chosen": 0.21310049295425415, + "log_odds_ratio": -0.598873496055603, + "logits/chosen": -0.537045955657959, + "logits/rejected": -0.4830659329891205, + "logps/chosen": -1.9617701768875122, + "logps/rejected": -2.1539487838745117, + "loss": 4.0892, + "nll_loss": 0.9624119997024536, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19617702066898346, + "rewards/margins": 0.019217852503061295, + "rewards/rejected": -0.21539486944675446, + "step": 63 + }, + { + "epoch": 0.16875411997363216, + "grad_norm": 18.428142547607422, + "learning_rate": 7.616681455190772e-06, + "log_odds_chosen": 0.41756024956703186, + "log_odds_ratio": -0.523665189743042, + "logits/chosen": -0.5694464445114136, + "logits/rejected": -0.5305702686309814, + "logps/chosen": -1.8997293710708618, + "logps/rejected": -2.250817060470581, + "loss": 4.0725, + "nll_loss": 0.9657527208328247, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18997293710708618, + "rewards/margins": 0.035108763724565506, + "rewards/rejected": -0.22508171200752258, + "step": 64 + }, + { + "epoch": 0.17139090309822017, + "grad_norm": 20.251195907592773, + "learning_rate": 7.60958296362023e-06, + "log_odds_chosen": 0.28324076533317566, + "log_odds_ratio": -0.5701109170913696, + "logits/chosen": -0.4854946434497833, + "logits/rejected": -0.4697068929672241, + "logps/chosen": -2.150897264480591, + "logps/rejected": -2.4024617671966553, + "loss": 3.3262, + "nll_loss": 0.7745460271835327, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21508973836898804, + "rewards/margins": 0.025156456977128983, + "rewards/rejected": -0.24024619162082672, + "step": 65 + }, + { + "epoch": 0.17402768622280818, + "grad_norm": 16.271448135375977, + "learning_rate": 7.6024844720496895e-06, + "log_odds_chosen": 0.35990890860557556, + "log_odds_ratio": -0.5400576591491699, + "logits/chosen": -0.7607088088989258, + "logits/rejected": -0.7190062403678894, + "logps/chosen": -1.8075987100601196, + "logps/rejected": -2.117114543914795, + "loss": 4.0742, + "nll_loss": 0.9645355343818665, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18075986206531525, + "rewards/margins": 0.030951591208577156, + "rewards/rejected": -0.21171148121356964, + "step": 66 + }, + { + "epoch": 0.17666446934739619, + "grad_norm": 17.613418579101562, + "learning_rate": 7.595385980479148e-06, + "log_odds_chosen": 0.33518755435943604, + "log_odds_ratio": -0.5439249277114868, + "logits/chosen": -0.6029800176620483, + "logits/rejected": -0.5759132504463196, + "logps/chosen": -2.030747175216675, + "logps/rejected": -2.32686185836792, + "loss": 3.4952, + "nll_loss": 0.8194155693054199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20307472348213196, + "rewards/margins": 0.029611455276608467, + "rewards/rejected": -0.23268617689609528, + "step": 67 + }, + { + "epoch": 0.17930125247198417, + "grad_norm": 16.98110580444336, + "learning_rate": 7.5882874889086065e-06, + "log_odds_chosen": 0.3332338333129883, + "log_odds_ratio": -0.5504011511802673, + "logits/chosen": -0.6441569924354553, + "logits/rejected": -0.6330841779708862, + "logps/chosen": -1.8422008752822876, + "logps/rejected": -2.1244189739227295, + "loss": 3.2622, + "nll_loss": 0.760502815246582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1842200756072998, + "rewards/margins": 0.02822180651128292, + "rewards/rejected": -0.21244189143180847, + "step": 68 + }, + { + "epoch": 0.18193803559657218, + "grad_norm": 15.15079116821289, + "learning_rate": 7.581188997338065e-06, + "log_odds_chosen": 0.4982764720916748, + "log_odds_ratio": -0.4950292706489563, + "logits/chosen": -0.7367129921913147, + "logits/rejected": -0.6525557041168213, + "logps/chosen": -1.8149136304855347, + "logps/rejected": -2.24688982963562, + "loss": 3.6231, + "nll_loss": 0.856282651424408, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18149137496948242, + "rewards/margins": 0.04319760575890541, + "rewards/rejected": -0.22468897700309753, + "step": 69 + }, + { + "epoch": 0.1845748187211602, + "grad_norm": 15.011249542236328, + "learning_rate": 7.574090505767524e-06, + "log_odds_chosen": 0.3177182972431183, + "log_odds_ratio": -0.5522245764732361, + "logits/chosen": -0.7826690673828125, + "logits/rejected": -0.7186380624771118, + "logps/chosen": -1.934792160987854, + "logps/rejected": -2.2128076553344727, + "loss": 3.9311, + "nll_loss": 0.9275611639022827, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19347921013832092, + "rewards/margins": 0.02780154161155224, + "rewards/rejected": -0.2212807685136795, + "step": 70 + }, + { + "epoch": 0.1872116018457482, + "grad_norm": 14.627659797668457, + "learning_rate": 7.566992014196982e-06, + "log_odds_chosen": 0.38591426610946655, + "log_odds_ratio": -0.5270495414733887, + "logits/chosen": -0.7557522058486938, + "logits/rejected": -0.7283233404159546, + "logps/chosen": -1.9029791355133057, + "logps/rejected": -2.241738796234131, + "loss": 3.7175, + "nll_loss": 0.8766695261001587, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1902979165315628, + "rewards/margins": 0.0338759645819664, + "rewards/rejected": -0.2241738736629486, + "step": 71 + }, + { + "epoch": 0.18984838497033618, + "grad_norm": 14.73865795135498, + "learning_rate": 7.559893522626442e-06, + "log_odds_chosen": 0.1897004246711731, + "log_odds_ratio": -0.6099416017532349, + "logits/chosen": -0.7527236938476562, + "logits/rejected": -0.7232762575149536, + "logps/chosen": -1.9559646844863892, + "logps/rejected": -2.1254239082336426, + "loss": 3.8146, + "nll_loss": 0.8926578164100647, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19559648633003235, + "rewards/margins": 0.01694592274725437, + "rewards/rejected": -0.21254241466522217, + "step": 72 + }, + { + "epoch": 0.1924851680949242, + "grad_norm": 13.457784652709961, + "learning_rate": 7.5527950310559e-06, + "log_odds_chosen": 0.5551372170448303, + "log_odds_ratio": -0.46496695280075073, + "logits/chosen": -0.8097226619720459, + "logits/rejected": -0.7585113048553467, + "logps/chosen": -1.8708417415618896, + "logps/rejected": -2.3605213165283203, + "loss": 3.5045, + "nll_loss": 0.8296377062797546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1870841681957245, + "rewards/margins": 0.04896795004606247, + "rewards/rejected": -0.23605212569236755, + "step": 73 + }, + { + "epoch": 0.1951219512195122, + "grad_norm": 13.562307357788086, + "learning_rate": 7.545696539485359e-06, + "log_odds_chosen": 0.28339290618896484, + "log_odds_ratio": -0.5632253885269165, + "logits/chosen": -0.7512806057929993, + "logits/rejected": -0.7221646904945374, + "logps/chosen": -1.907462477684021, + "logps/rejected": -2.1500632762908936, + "loss": 3.4105, + "nll_loss": 0.7963072657585144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1907462477684021, + "rewards/margins": 0.024260083213448524, + "rewards/rejected": -0.21500633656978607, + "step": 74 + }, + { + "epoch": 0.19775873434410018, + "grad_norm": 13.646141052246094, + "learning_rate": 7.538598047914818e-06, + "log_odds_chosen": 0.5973946452140808, + "log_odds_ratio": -0.4542398750782013, + "logits/chosen": -0.7698791027069092, + "logits/rejected": -0.7156640887260437, + "logps/chosen": -1.7305474281311035, + "logps/rejected": -2.2397546768188477, + "loss": 3.325, + "nll_loss": 0.7858337759971619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17305472493171692, + "rewards/margins": 0.05092073976993561, + "rewards/rejected": -0.22397547960281372, + "step": 75 + }, + { + "epoch": 0.2003955174686882, + "grad_norm": 13.815109252929688, + "learning_rate": 7.531499556344276e-06, + "log_odds_chosen": 0.0341314971446991, + "log_odds_ratio": -0.6800359487533569, + "logits/chosen": -0.7650290131568909, + "logits/rejected": -0.717841386795044, + "logps/chosen": -2.1082820892333984, + "logps/rejected": -2.138892889022827, + "loss": 3.5816, + "nll_loss": 0.8274069428443909, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21082819998264313, + "rewards/margins": 0.00306110642850399, + "rewards/rejected": -0.21388930082321167, + "step": 76 + }, + { + "epoch": 0.2030323005932762, + "grad_norm": 12.866390228271484, + "learning_rate": 7.524401064773736e-06, + "log_odds_chosen": 0.18123148381710052, + "log_odds_ratio": -0.6105220317840576, + "logits/chosen": -0.8196970820426941, + "logits/rejected": -0.7854353189468384, + "logps/chosen": -1.993229866027832, + "logps/rejected": -2.1471104621887207, + "loss": 3.7596, + "nll_loss": 0.8788573741912842, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19932299852371216, + "rewards/margins": 0.015388060361146927, + "rewards/rejected": -0.21471107006072998, + "step": 77 + }, + { + "epoch": 0.20566908371786422, + "grad_norm": 13.226037979125977, + "learning_rate": 7.517302573203194e-06, + "log_odds_chosen": 0.26490336656570435, + "log_odds_ratio": -0.577185332775116, + "logits/chosen": -0.8312850594520569, + "logits/rejected": -0.7897067070007324, + "logps/chosen": -1.751774549484253, + "logps/rejected": -1.9779562950134277, + "loss": 2.9881, + "nll_loss": 0.6893104314804077, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1751774549484253, + "rewards/margins": 0.022618159651756287, + "rewards/rejected": -0.19779562950134277, + "step": 78 + }, + { + "epoch": 0.2083058668424522, + "grad_norm": 12.413025856018066, + "learning_rate": 7.510204081632653e-06, + "log_odds_chosen": 0.20791912078857422, + "log_odds_ratio": -0.6025508046150208, + "logits/chosen": -0.8755151629447937, + "logits/rejected": -0.8113049268722534, + "logps/chosen": -1.8642596006393433, + "logps/rejected": -2.0363807678222656, + "loss": 3.5737, + "nll_loss": 0.8331717252731323, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18642596900463104, + "rewards/margins": 0.01721210777759552, + "rewards/rejected": -0.20363807678222656, + "step": 79 + }, + { + "epoch": 0.2109426499670402, + "grad_norm": 12.264641761779785, + "learning_rate": 7.503105590062112e-06, + "log_odds_chosen": 0.45694607496261597, + "log_odds_ratio": -0.516982913017273, + "logits/chosen": -0.9060852527618408, + "logits/rejected": -0.8781797885894775, + "logps/chosen": -1.6740509271621704, + "logps/rejected": -2.0151681900024414, + "loss": 3.312, + "nll_loss": 0.7763122320175171, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16740509867668152, + "rewards/margins": 0.03411173075437546, + "rewards/rejected": -0.20151683688163757, + "step": 80 + }, + { + "epoch": 0.21357943309162822, + "grad_norm": 12.642634391784668, + "learning_rate": 7.49600709849157e-06, + "log_odds_chosen": 0.24207936227321625, + "log_odds_ratio": -0.5933969020843506, + "logits/chosen": -0.9877604246139526, + "logits/rejected": -0.9305794835090637, + "logps/chosen": -1.8099536895751953, + "logps/rejected": -2.010237455368042, + "loss": 4.1916, + "nll_loss": 0.9885674715042114, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.180995374917984, + "rewards/margins": 0.02002836763858795, + "rewards/rejected": -0.20102375745773315, + "step": 81 + }, + { + "epoch": 0.21621621621621623, + "grad_norm": 12.362634658813477, + "learning_rate": 7.488908606921029e-06, + "log_odds_chosen": 0.2158462405204773, + "log_odds_ratio": -0.5972849130630493, + "logits/chosen": -0.9262905716896057, + "logits/rejected": -0.875029981136322, + "logps/chosen": -1.7606620788574219, + "logps/rejected": -1.9388712644577026, + "loss": 2.9317, + "nll_loss": 0.6732062697410583, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17606621980667114, + "rewards/margins": 0.017820894718170166, + "rewards/rejected": -0.1938871145248413, + "step": 82 + }, + { + "epoch": 0.2188529993408042, + "grad_norm": 11.505016326904297, + "learning_rate": 7.4818101153504875e-06, + "log_odds_chosen": 0.3416220247745514, + "log_odds_ratio": -0.5673665404319763, + "logits/chosen": -0.9135525822639465, + "logits/rejected": -0.8375486135482788, + "logps/chosen": -1.6488395929336548, + "logps/rejected": -1.938331127166748, + "loss": 2.8808, + "nll_loss": 0.6634570360183716, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16488397121429443, + "rewards/margins": 0.02894916944205761, + "rewards/rejected": -0.1938331425189972, + "step": 83 + }, + { + "epoch": 0.22148978246539222, + "grad_norm": 12.884148597717285, + "learning_rate": 7.474711623779946e-06, + "log_odds_chosen": 0.4032338261604309, + "log_odds_ratio": -0.5203972458839417, + "logits/chosen": -0.9477750658988953, + "logits/rejected": -0.9046843647956848, + "logps/chosen": -1.8901233673095703, + "logps/rejected": -2.2456259727478027, + "loss": 3.3532, + "nll_loss": 0.7862505912780762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.189012348651886, + "rewards/margins": 0.035550251603126526, + "rewards/rejected": -0.2245626151561737, + "step": 84 + }, + { + "epoch": 0.22412656558998023, + "grad_norm": 11.539673805236816, + "learning_rate": 7.467613132209405e-06, + "log_odds_chosen": 0.13325880467891693, + "log_odds_ratio": -0.6450076103210449, + "logits/chosen": -0.9659244418144226, + "logits/rejected": -0.900475263595581, + "logps/chosen": -1.8413825035095215, + "logps/rejected": -1.9536837339401245, + "loss": 3.0989, + "nll_loss": 0.7102184891700745, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1841382384300232, + "rewards/margins": 0.011230124160647392, + "rewards/rejected": -0.19536837935447693, + "step": 85 + }, + { + "epoch": 0.2267633487145682, + "grad_norm": 12.20140552520752, + "learning_rate": 7.460514640638864e-06, + "log_odds_chosen": 0.178244948387146, + "log_odds_ratio": -0.6124775409698486, + "logits/chosen": -0.9916198253631592, + "logits/rejected": -0.9214251041412354, + "logps/chosen": -1.8683059215545654, + "logps/rejected": -2.014869213104248, + "loss": 3.8124, + "nll_loss": 0.8918424844741821, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18683059513568878, + "rewards/margins": 0.014656316488981247, + "rewards/rejected": -0.20148691534996033, + "step": 86 + }, + { + "epoch": 0.22940013183915622, + "grad_norm": 12.308550834655762, + "learning_rate": 7.453416149068322e-06, + "log_odds_chosen": 0.07327182590961456, + "log_odds_ratio": -0.6613773107528687, + "logits/chosen": -0.9867488741874695, + "logits/rejected": -0.9688645601272583, + "logps/chosen": -1.8495866060256958, + "logps/rejected": -1.9149181842803955, + "loss": 3.2487, + "nll_loss": 0.7460330128669739, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18495866656303406, + "rewards/margins": 0.006533162668347359, + "rewards/rejected": -0.19149181246757507, + "step": 87 + }, + { + "epoch": 0.23203691496374423, + "grad_norm": 11.27050495147705, + "learning_rate": 7.446317657497782e-06, + "log_odds_chosen": 0.2877511978149414, + "log_odds_ratio": -0.5665717124938965, + "logits/chosen": -1.0289583206176758, + "logits/rejected": -0.9562087059020996, + "logps/chosen": -1.7832393646240234, + "logps/rejected": -2.0177910327911377, + "loss": 2.9845, + "nll_loss": 0.689464271068573, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1783239245414734, + "rewards/margins": 0.02345517836511135, + "rewards/rejected": -0.2017790973186493, + "step": 88 + }, + { + "epoch": 0.23467369808833224, + "grad_norm": 11.31041145324707, + "learning_rate": 7.43921916592724e-06, + "log_odds_chosen": 0.2017640769481659, + "log_odds_ratio": -0.6055243015289307, + "logits/chosen": -1.0026249885559082, + "logits/rejected": -0.9408939480781555, + "logps/chosen": -1.8777215480804443, + "logps/rejected": -2.05841064453125, + "loss": 3.0338, + "nll_loss": 0.6978951096534729, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18777216970920563, + "rewards/margins": 0.01806892268359661, + "rewards/rejected": -0.2058410793542862, + "step": 89 + }, + { + "epoch": 0.23731048121292023, + "grad_norm": 11.861611366271973, + "learning_rate": 7.432120674356698e-06, + "log_odds_chosen": 0.3642200827598572, + "log_odds_ratio": -0.5369521379470825, + "logits/chosen": -1.0210542678833008, + "logits/rejected": -0.9698840975761414, + "logps/chosen": -1.7520647048950195, + "logps/rejected": -2.046531915664673, + "loss": 3.3146, + "nll_loss": 0.7749478816986084, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1752064824104309, + "rewards/margins": 0.02944672666490078, + "rewards/rejected": -0.20465320348739624, + "step": 90 + }, + { + "epoch": 0.23994726433750824, + "grad_norm": 11.018975257873535, + "learning_rate": 7.425022182786158e-06, + "log_odds_chosen": 0.36533892154693604, + "log_odds_ratio": -0.534146785736084, + "logits/chosen": -0.9736407995223999, + "logits/rejected": -0.9505970478057861, + "logps/chosen": -1.6512267589569092, + "logps/rejected": -1.9399025440216064, + "loss": 2.5182, + "nll_loss": 0.5761348009109497, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16512268781661987, + "rewards/margins": 0.028867574408650398, + "rewards/rejected": -0.19399026036262512, + "step": 91 + }, + { + "epoch": 0.24258404746209625, + "grad_norm": 10.942045211791992, + "learning_rate": 7.417923691215616e-06, + "log_odds_chosen": 0.2782820463180542, + "log_odds_ratio": -0.587384819984436, + "logits/chosen": -1.009594202041626, + "logits/rejected": -0.9647431373596191, + "logps/chosen": -1.606307029724121, + "logps/rejected": -1.8420860767364502, + "loss": 2.7104, + "nll_loss": 0.6188517808914185, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16063068807125092, + "rewards/margins": 0.023577921092510223, + "rewards/rejected": -0.18420860171318054, + "step": 92 + }, + { + "epoch": 0.24522083058668426, + "grad_norm": 11.825458526611328, + "learning_rate": 7.410825199645076e-06, + "log_odds_chosen": 0.19983872771263123, + "log_odds_ratio": -0.605749785900116, + "logits/chosen": -1.0021772384643555, + "logits/rejected": -0.9581748247146606, + "logps/chosen": -1.8522577285766602, + "logps/rejected": -2.021942615509033, + "loss": 3.1105, + "nll_loss": 0.7170413732528687, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18522579967975616, + "rewards/margins": 0.016968462616205215, + "rewards/rejected": -0.2021942436695099, + "step": 93 + }, + { + "epoch": 0.24785761371127224, + "grad_norm": 10.83332633972168, + "learning_rate": 7.403726708074534e-06, + "log_odds_chosen": 0.16420339047908783, + "log_odds_ratio": -0.6234915256500244, + "logits/chosen": -1.0329029560089111, + "logits/rejected": -0.9871702194213867, + "logps/chosen": -1.8671634197235107, + "logps/rejected": -2.0037567615509033, + "loss": 3.0731, + "nll_loss": 0.7059216499328613, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18671634793281555, + "rewards/margins": 0.013659341260790825, + "rewards/rejected": -0.20037567615509033, + "step": 94 + }, + { + "epoch": 0.2504943968358603, + "grad_norm": 11.22410774230957, + "learning_rate": 7.3966282165039926e-06, + "log_odds_chosen": 0.1453307718038559, + "log_odds_ratio": -0.628348171710968, + "logits/chosen": -1.0054070949554443, + "logits/rejected": -0.9577959775924683, + "logps/chosen": -1.8484101295471191, + "logps/rejected": -1.9695494174957275, + "loss": 3.3057, + "nll_loss": 0.7635930776596069, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18484100699424744, + "rewards/margins": 0.012113936245441437, + "rewards/rejected": -0.19695493578910828, + "step": 95 + }, + { + "epoch": 0.25313117996044826, + "grad_norm": 10.52470588684082, + "learning_rate": 7.3895297249334515e-06, + "log_odds_chosen": 0.392825186252594, + "log_odds_ratio": -0.5319700241088867, + "logits/chosen": -1.032947063446045, + "logits/rejected": -0.9472732543945312, + "logps/chosen": -1.6224782466888428, + "logps/rejected": -1.9548505544662476, + "loss": 2.8907, + "nll_loss": 0.6694746017456055, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16224783658981323, + "rewards/margins": 0.03323723375797272, + "rewards/rejected": -0.19548507034778595, + "step": 96 + }, + { + "epoch": 0.25576796308503624, + "grad_norm": 11.16720962524414, + "learning_rate": 7.38243123336291e-06, + "log_odds_chosen": 0.28609395027160645, + "log_odds_ratio": -0.5633273720741272, + "logits/chosen": -1.0286046266555786, + "logits/rejected": -0.9637259244918823, + "logps/chosen": -1.7094495296478271, + "logps/rejected": -1.9480677843093872, + "loss": 3.3181, + "nll_loss": 0.7731999158859253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1709449589252472, + "rewards/margins": 0.023861827328801155, + "rewards/rejected": -0.194806769490242, + "step": 97 + }, + { + "epoch": 0.2584047462096243, + "grad_norm": 11.650616645812988, + "learning_rate": 7.375332741792368e-06, + "log_odds_chosen": 0.4847567677497864, + "log_odds_ratio": -0.5067976117134094, + "logits/chosen": -1.0179671049118042, + "logits/rejected": -0.9550538659095764, + "logps/chosen": -1.6734848022460938, + "logps/rejected": -2.0797762870788574, + "loss": 3.3886, + "nll_loss": 0.7964633107185364, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1673484742641449, + "rewards/margins": 0.04062914848327637, + "rewards/rejected": -0.20797762274742126, + "step": 98 + }, + { + "epoch": 0.26104152933421226, + "grad_norm": 11.267426490783691, + "learning_rate": 7.368234250221827e-06, + "log_odds_chosen": 0.3002548813819885, + "log_odds_ratio": -0.5628231763839722, + "logits/chosen": -1.0801610946655273, + "logits/rejected": -0.9543227553367615, + "logps/chosen": -1.6145151853561401, + "logps/rejected": -1.8697000741958618, + "loss": 3.1738, + "nll_loss": 0.7371575236320496, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.161451518535614, + "rewards/margins": 0.02551848441362381, + "rewards/rejected": -0.18696999549865723, + "step": 99 + }, + { + "epoch": 0.26367831245880025, + "grad_norm": 10.404102325439453, + "learning_rate": 7.361135758651286e-06, + "log_odds_chosen": 0.09879818558692932, + "log_odds_ratio": -0.6502638459205627, + "logits/chosen": -0.9633969068527222, + "logits/rejected": -0.9362624883651733, + "logps/chosen": -1.6580994129180908, + "logps/rejected": -1.7272868156433105, + "loss": 2.69, + "nll_loss": 0.6074641346931458, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16580992937088013, + "rewards/margins": 0.006918755359947681, + "rewards/rejected": -0.17272868752479553, + "step": 100 + }, + { + "epoch": 0.2663150955833883, + "grad_norm": 10.600974082946777, + "learning_rate": 7.354037267080744e-06, + "log_odds_chosen": 0.29593682289123535, + "log_odds_ratio": -0.5641055107116699, + "logits/chosen": -1.0604907274246216, + "logits/rejected": -0.9663894176483154, + "logps/chosen": -1.7292687892913818, + "logps/rejected": -1.9780974388122559, + "loss": 2.912, + "nll_loss": 0.6715894937515259, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1729268729686737, + "rewards/margins": 0.024882866069674492, + "rewards/rejected": -0.19780975580215454, + "step": 101 + }, + { + "epoch": 0.26895187870797627, + "grad_norm": 11.276390075683594, + "learning_rate": 7.346938775510204e-06, + "log_odds_chosen": 0.36808863282203674, + "log_odds_ratio": -0.5319298505783081, + "logits/chosen": -1.072847843170166, + "logits/rejected": -0.9937724471092224, + "logps/chosen": -1.7114295959472656, + "logps/rejected": -2.0213842391967773, + "loss": 3.504, + "nll_loss": 0.8228154182434082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17114296555519104, + "rewards/margins": 0.03099547140300274, + "rewards/rejected": -0.20213845372200012, + "step": 102 + }, + { + "epoch": 0.27158866183256425, + "grad_norm": 10.443772315979004, + "learning_rate": 7.339840283939662e-06, + "log_odds_chosen": 0.634390115737915, + "log_odds_ratio": -0.4645514190196991, + "logits/chosen": -1.0298651456832886, + "logits/rejected": -0.9478103518486023, + "logps/chosen": -1.4557925462722778, + "logps/rejected": -1.982097864151001, + "loss": 2.5977, + "nll_loss": 0.6029644012451172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1455792486667633, + "rewards/margins": 0.052630532532930374, + "rewards/rejected": -0.19820979237556458, + "step": 103 + }, + { + "epoch": 0.2742254449571523, + "grad_norm": 10.725059509277344, + "learning_rate": 7.332741792369122e-06, + "log_odds_chosen": 0.23163697123527527, + "log_odds_ratio": -0.5890691876411438, + "logits/chosen": -1.0835130214691162, + "logits/rejected": -1.0381077527999878, + "logps/chosen": -1.6009089946746826, + "logps/rejected": -1.7853424549102783, + "loss": 3.0235, + "nll_loss": 0.6969616413116455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16009089350700378, + "rewards/margins": 0.018443342298269272, + "rewards/rejected": -0.17853423953056335, + "step": 104 + }, + { + "epoch": 0.27686222808174027, + "grad_norm": 10.879688262939453, + "learning_rate": 7.32564330079858e-06, + "log_odds_chosen": 0.26793116331100464, + "log_odds_ratio": -0.578827977180481, + "logits/chosen": -1.0219061374664307, + "logits/rejected": -0.9625946879386902, + "logps/chosen": -1.7450555562973022, + "logps/rejected": -1.9703395366668701, + "loss": 3.4718, + "nll_loss": 0.8100571632385254, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1745055615901947, + "rewards/margins": 0.022528400644659996, + "rewards/rejected": -0.19703397154808044, + "step": 105 + }, + { + "epoch": 0.2794990112063283, + "grad_norm": 11.196900367736816, + "learning_rate": 7.318544809228039e-06, + "log_odds_chosen": 0.29670941829681396, + "log_odds_ratio": -0.5586762428283691, + "logits/chosen": -1.0425081253051758, + "logits/rejected": -0.961692750453949, + "logps/chosen": -1.6713957786560059, + "logps/rejected": -1.9169774055480957, + "loss": 3.293, + "nll_loss": 0.7673801183700562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16713958978652954, + "rewards/margins": 0.02455814741551876, + "rewards/rejected": -0.19169773161411285, + "step": 106 + }, + { + "epoch": 0.2821357943309163, + "grad_norm": 10.09378433227539, + "learning_rate": 7.311446317657498e-06, + "log_odds_chosen": 0.18554000556468964, + "log_odds_ratio": -0.607377290725708, + "logits/chosen": -1.0471107959747314, + "logits/rejected": -0.9914209842681885, + "logps/chosen": -1.6027867794036865, + "logps/rejected": -1.75523042678833, + "loss": 2.7016, + "nll_loss": 0.6146624684333801, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16027866303920746, + "rewards/margins": 0.015244370326399803, + "rewards/rejected": -0.17552302777767181, + "step": 107 + }, + { + "epoch": 0.28477257745550427, + "grad_norm": 11.160083770751953, + "learning_rate": 7.304347826086956e-06, + "log_odds_chosen": 0.24081920087337494, + "log_odds_ratio": -0.5894144773483276, + "logits/chosen": -1.0816594362258911, + "logits/rejected": -0.9936416149139404, + "logps/chosen": -1.6756263971328735, + "logps/rejected": -1.8702760934829712, + "loss": 3.175, + "nll_loss": 0.7348129153251648, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16756264865398407, + "rewards/margins": 0.01946496218442917, + "rewards/rejected": -0.18702760338783264, + "step": 108 + }, + { + "epoch": 0.2874093605800923, + "grad_norm": 10.403040885925293, + "learning_rate": 7.297249334516415e-06, + "log_odds_chosen": 0.15198183059692383, + "log_odds_ratio": -0.6254088878631592, + "logits/chosen": -1.009718894958496, + "logits/rejected": -0.9877020120620728, + "logps/chosen": -1.6084790229797363, + "logps/rejected": -1.7322394847869873, + "loss": 2.6562, + "nll_loss": 0.6015002727508545, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16084790229797363, + "rewards/margins": 0.012376044876873493, + "rewards/rejected": -0.17322394251823425, + "step": 109 + }, + { + "epoch": 0.2900461437046803, + "grad_norm": 10.95654010772705, + "learning_rate": 7.2901508429458735e-06, + "log_odds_chosen": 0.19570016860961914, + "log_odds_ratio": -0.6077170372009277, + "logits/chosen": -1.0052906274795532, + "logits/rejected": -0.9451830983161926, + "logps/chosen": -1.676623821258545, + "logps/rejected": -1.8326776027679443, + "loss": 2.5621, + "nll_loss": 0.5797582268714905, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1676623672246933, + "rewards/margins": 0.015605399385094643, + "rewards/rejected": -0.1832677721977234, + "step": 110 + }, + { + "epoch": 0.2926829268292683, + "grad_norm": 11.047690391540527, + "learning_rate": 7.283052351375332e-06, + "log_odds_chosen": 0.27795833349227905, + "log_odds_ratio": -0.5818898677825928, + "logits/chosen": -1.0167365074157715, + "logits/rejected": -0.9470656514167786, + "logps/chosen": -1.6907787322998047, + "logps/rejected": -1.9295769929885864, + "loss": 2.6876, + "nll_loss": 0.613703191280365, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16907787322998047, + "rewards/margins": 0.023879820480942726, + "rewards/rejected": -0.19295769929885864, + "step": 111 + }, + { + "epoch": 0.2953197099538563, + "grad_norm": 9.781204223632812, + "learning_rate": 7.275953859804791e-06, + "log_odds_chosen": 0.37173882126808167, + "log_odds_ratio": -0.5375426411628723, + "logits/chosen": -1.0105595588684082, + "logits/rejected": -0.9397248029708862, + "logps/chosen": -1.6948808431625366, + "logps/rejected": -2.002723217010498, + "loss": 2.4168, + "nll_loss": 0.5504346489906311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1694880872964859, + "rewards/margins": 0.030784228816628456, + "rewards/rejected": -0.2002723217010498, + "step": 112 + }, + { + "epoch": 0.2979564930784443, + "grad_norm": 9.886716842651367, + "learning_rate": 7.26885536823425e-06, + "log_odds_chosen": 0.30898672342300415, + "log_odds_ratio": -0.5617222785949707, + "logits/chosen": -1.0012271404266357, + "logits/rejected": -0.9353881478309631, + "logps/chosen": -1.5099105834960938, + "logps/rejected": -1.7610509395599365, + "loss": 2.3777, + "nll_loss": 0.5382523536682129, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1509910672903061, + "rewards/margins": 0.0251140296459198, + "rewards/rejected": -0.17610511183738708, + "step": 113 + }, + { + "epoch": 0.3005932762030323, + "grad_norm": 11.912915229797363, + "learning_rate": 7.261756876663708e-06, + "log_odds_chosen": 0.4974936246871948, + "log_odds_ratio": -0.5436064600944519, + "logits/chosen": -1.089568853378296, + "logits/rejected": -0.9672592878341675, + "logps/chosen": -1.887178659439087, + "logps/rejected": -2.3397083282470703, + "loss": 3.9354, + "nll_loss": 0.9295009970664978, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18871785700321198, + "rewards/margins": 0.0452529713511467, + "rewards/rejected": -0.23397082090377808, + "step": 114 + }, + { + "epoch": 0.3032300593276203, + "grad_norm": 10.737141609191895, + "learning_rate": 7.254658385093168e-06, + "log_odds_chosen": 0.2533874213695526, + "log_odds_ratio": -0.5820384621620178, + "logits/chosen": -0.9657982587814331, + "logits/rejected": -0.8962007761001587, + "logps/chosen": -1.7736114263534546, + "logps/rejected": -1.9921071529388428, + "loss": 3.5912, + "nll_loss": 0.8395951390266418, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1773611307144165, + "rewards/margins": 0.021849587559700012, + "rewards/rejected": -0.1992107331752777, + "step": 115 + }, + { + "epoch": 0.3058668424522083, + "grad_norm": 10.761970520019531, + "learning_rate": 7.247559893522626e-06, + "log_odds_chosen": 0.3130797743797302, + "log_odds_ratio": -0.5591657161712646, + "logits/chosen": -0.9927374124526978, + "logits/rejected": -0.9318053722381592, + "logps/chosen": -1.5499687194824219, + "logps/rejected": -1.8103137016296387, + "loss": 2.553, + "nll_loss": 0.5823217034339905, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1549968719482422, + "rewards/margins": 0.026034509763121605, + "rewards/rejected": -0.18103139102458954, + "step": 116 + }, + { + "epoch": 0.3085036255767963, + "grad_norm": 11.074972152709961, + "learning_rate": 7.240461401952084e-06, + "log_odds_chosen": 0.2793109118938446, + "log_odds_ratio": -0.5649005174636841, + "logits/chosen": -1.0155889987945557, + "logits/rejected": -0.9369997978210449, + "logps/chosen": -1.757372498512268, + "logps/rejected": -1.9916359186172485, + "loss": 2.9984, + "nll_loss": 0.6931184530258179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17573726177215576, + "rewards/margins": 0.023426339030265808, + "rewards/rejected": -0.19916360080242157, + "step": 117 + }, + { + "epoch": 0.3111404087013843, + "grad_norm": 10.123946189880371, + "learning_rate": 7.233362910381544e-06, + "log_odds_chosen": 0.48958620429039, + "log_odds_ratio": -0.5133712887763977, + "logits/chosen": -0.988548994064331, + "logits/rejected": -0.906058669090271, + "logps/chosen": -1.6181275844573975, + "logps/rejected": -2.0276811122894287, + "loss": 3.082, + "nll_loss": 0.7191726565361023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16181275248527527, + "rewards/margins": 0.04095536097884178, + "rewards/rejected": -0.20276810228824615, + "step": 118 + }, + { + "epoch": 0.3137771918259723, + "grad_norm": 9.875753402709961, + "learning_rate": 7.226264418811002e-06, + "log_odds_chosen": 0.27764010429382324, + "log_odds_ratio": -0.5703195929527283, + "logits/chosen": -0.9880169630050659, + "logits/rejected": -0.9102073907852173, + "logps/chosen": -1.6844210624694824, + "logps/rejected": -1.9171109199523926, + "loss": 2.8216, + "nll_loss": 0.6483787894248962, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16844210028648376, + "rewards/margins": 0.023268993943929672, + "rewards/rejected": -0.19171112775802612, + "step": 119 + }, + { + "epoch": 0.31641397495056034, + "grad_norm": 11.56274700164795, + "learning_rate": 7.219165927240462e-06, + "log_odds_chosen": 0.36386311054229736, + "log_odds_ratio": -0.5368889570236206, + "logits/chosen": -0.8859333395957947, + "logits/rejected": -0.8559818267822266, + "logps/chosen": -1.591422438621521, + "logps/rejected": -1.8891998529434204, + "loss": 2.8655, + "nll_loss": 0.6626745462417603, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15914225578308105, + "rewards/margins": 0.02977774105966091, + "rewards/rejected": -0.18891999125480652, + "step": 120 + }, + { + "epoch": 0.3190507580751483, + "grad_norm": 10.839788436889648, + "learning_rate": 7.21206743566992e-06, + "log_odds_chosen": 0.2564637064933777, + "log_odds_ratio": -0.5788425803184509, + "logits/chosen": -1.0680105686187744, + "logits/rejected": -0.9847425222396851, + "logps/chosen": -1.6119345426559448, + "logps/rejected": -1.8226040601730347, + "loss": 3.0163, + "nll_loss": 0.6961902379989624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16119346022605896, + "rewards/margins": 0.021066950634121895, + "rewards/rejected": -0.1822603940963745, + "step": 121 + }, + { + "epoch": 0.3216875411997363, + "grad_norm": 10.007757186889648, + "learning_rate": 7.204968944099379e-06, + "log_odds_chosen": 0.2848505973815918, + "log_odds_ratio": -0.5624546408653259, + "logits/chosen": -1.0049948692321777, + "logits/rejected": -0.9579980969429016, + "logps/chosen": -1.6587774753570557, + "logps/rejected": -1.8923622369766235, + "loss": 2.8742, + "nll_loss": 0.6623063087463379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16587774455547333, + "rewards/margins": 0.023358486592769623, + "rewards/rejected": -0.18923622369766235, + "step": 122 + }, + { + "epoch": 0.32432432432432434, + "grad_norm": 10.4876070022583, + "learning_rate": 7.1978704525288375e-06, + "log_odds_chosen": 0.36797964572906494, + "log_odds_ratio": -0.5324498414993286, + "logits/chosen": -1.0323530435562134, + "logits/rejected": -0.9425293803215027, + "logps/chosen": -1.6165523529052734, + "logps/rejected": -1.9190058708190918, + "loss": 3.169, + "nll_loss": 0.7390009760856628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1616552472114563, + "rewards/margins": 0.030245332047343254, + "rewards/rejected": -0.1919005811214447, + "step": 123 + }, + { + "epoch": 0.3269611074489123, + "grad_norm": 9.802966117858887, + "learning_rate": 7.190771960958296e-06, + "log_odds_chosen": 0.13589303195476532, + "log_odds_ratio": -0.6351217031478882, + "logits/chosen": -1.019148826599121, + "logits/rejected": -0.9820281267166138, + "logps/chosen": -1.5778555870056152, + "logps/rejected": -1.6937252283096313, + "loss": 2.7745, + "nll_loss": 0.6301135420799255, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.157785564661026, + "rewards/margins": 0.011586972512304783, + "rewards/rejected": -0.1693725287914276, + "step": 124 + }, + { + "epoch": 0.3295978905735003, + "grad_norm": 10.657818794250488, + "learning_rate": 7.1836734693877545e-06, + "log_odds_chosen": 0.13183920085430145, + "log_odds_ratio": -0.6340517401695251, + "logits/chosen": -0.9704450368881226, + "logits/rejected": -0.9316169619560242, + "logps/chosen": -1.5081100463867188, + "logps/rejected": -1.6166660785675049, + "loss": 2.9152, + "nll_loss": 0.6654000282287598, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15081101655960083, + "rewards/margins": 0.010855593718588352, + "rewards/rejected": -0.161666601896286, + "step": 125 + }, + { + "epoch": 0.33223467369808835, + "grad_norm": 11.420533180236816, + "learning_rate": 7.176574977817213e-06, + "log_odds_chosen": 0.09573184698820114, + "log_odds_ratio": -0.6518878936767578, + "logits/chosen": -1.0469801425933838, + "logits/rejected": -0.981776773929596, + "logps/chosen": -1.6422678232192993, + "logps/rejected": -1.7189463376998901, + "loss": 3.4448, + "nll_loss": 0.7960150837898254, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.16422678530216217, + "rewards/margins": 0.007667848840355873, + "rewards/rejected": -0.1718946397304535, + "step": 126 + }, + { + "epoch": 0.3348714568226763, + "grad_norm": 10.23043441772461, + "learning_rate": 7.169476486246672e-06, + "log_odds_chosen": 0.2894551753997803, + "log_odds_ratio": -0.5663176774978638, + "logits/chosen": -1.011500358581543, + "logits/rejected": -0.9723880887031555, + "logps/chosen": -1.5216894149780273, + "logps/rejected": -1.7557767629623413, + "loss": 2.8609, + "nll_loss": 0.6585900187492371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15216895937919617, + "rewards/margins": 0.02340874634683132, + "rewards/rejected": -0.17557770013809204, + "step": 127 + }, + { + "epoch": 0.3375082399472643, + "grad_norm": 9.419966697692871, + "learning_rate": 7.16237799467613e-06, + "log_odds_chosen": 0.5059428811073303, + "log_odds_ratio": -0.5042520761489868, + "logits/chosen": -0.993411123752594, + "logits/rejected": -0.8944643139839172, + "logps/chosen": -1.553409457206726, + "logps/rejected": -1.9687423706054688, + "loss": 2.466, + "nll_loss": 0.5660730600357056, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15534095466136932, + "rewards/margins": 0.04153328761458397, + "rewards/rejected": -0.1968742460012436, + "step": 128 + }, + { + "epoch": 0.34014502307185235, + "grad_norm": 11.141188621520996, + "learning_rate": 7.15527950310559e-06, + "log_odds_chosen": 0.28306683897972107, + "log_odds_ratio": -0.5711504817008972, + "logits/chosen": -0.9414308667182922, + "logits/rejected": -0.8877083659172058, + "logps/chosen": -1.7126646041870117, + "logps/rejected": -1.9460575580596924, + "loss": 3.4852, + "nll_loss": 0.8141910433769226, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17126646637916565, + "rewards/margins": 0.023339303210377693, + "rewards/rejected": -0.1946057677268982, + "step": 129 + }, + { + "epoch": 0.34278180619644033, + "grad_norm": 10.034116744995117, + "learning_rate": 7.148181011535048e-06, + "log_odds_chosen": 0.40334582328796387, + "log_odds_ratio": -0.5304621458053589, + "logits/chosen": -1.0032262802124023, + "logits/rejected": -0.9937978982925415, + "logps/chosen": -1.5170884132385254, + "logps/rejected": -1.8561803102493286, + "loss": 2.1653, + "nll_loss": 0.4882797300815582, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15170885622501373, + "rewards/margins": 0.033909182995557785, + "rewards/rejected": -0.18561802804470062, + "step": 130 + }, + { + "epoch": 0.34541858932102837, + "grad_norm": 11.854081153869629, + "learning_rate": 7.141082519964508e-06, + "log_odds_chosen": 0.26253604888916016, + "log_odds_ratio": -0.5790401697158813, + "logits/chosen": -1.0138558149337769, + "logits/rejected": -0.8745430707931519, + "logps/chosen": -1.7500555515289307, + "logps/rejected": -1.9733306169509888, + "loss": 3.988, + "nll_loss": 0.9391055703163147, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17500554025173187, + "rewards/margins": 0.022327521815896034, + "rewards/rejected": -0.19733306765556335, + "step": 131 + }, + { + "epoch": 0.34805537244561635, + "grad_norm": 10.411006927490234, + "learning_rate": 7.133984028393966e-06, + "log_odds_chosen": 0.1804034411907196, + "log_odds_ratio": -0.6206458806991577, + "logits/chosen": -1.014014720916748, + "logits/rejected": -0.9626904129981995, + "logps/chosen": -1.5718120336532593, + "logps/rejected": -1.71071457862854, + "loss": 2.8215, + "nll_loss": 0.6433163285255432, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15718120336532593, + "rewards/margins": 0.01389027014374733, + "rewards/rejected": -0.17107146978378296, + "step": 132 + }, + { + "epoch": 0.35069215557020433, + "grad_norm": 10.153091430664062, + "learning_rate": 7.126885536823425e-06, + "log_odds_chosen": 0.36744678020477295, + "log_odds_ratio": -0.5389147996902466, + "logits/chosen": -1.0156335830688477, + "logits/rejected": -0.9082814455032349, + "logps/chosen": -1.5546211004257202, + "logps/rejected": -1.8483304977416992, + "loss": 2.9649, + "nll_loss": 0.6873387098312378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1554621160030365, + "rewards/margins": 0.02937093749642372, + "rewards/rejected": -0.18483306467533112, + "step": 133 + }, + { + "epoch": 0.35332893869479237, + "grad_norm": 10.791301727294922, + "learning_rate": 7.119787045252884e-06, + "log_odds_chosen": 0.2302854359149933, + "log_odds_ratio": -0.5921584367752075, + "logits/chosen": -1.0107197761535645, + "logits/rejected": -0.9321932196617126, + "logps/chosen": -1.6761658191680908, + "logps/rejected": -1.8660277128219604, + "loss": 3.1031, + "nll_loss": 0.7165559530258179, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1676165759563446, + "rewards/margins": 0.018986180424690247, + "rewards/rejected": -0.18660277128219604, + "step": 134 + }, + { + "epoch": 0.35596572181938035, + "grad_norm": 10.180030822753906, + "learning_rate": 7.112688553682342e-06, + "log_odds_chosen": 0.1279923915863037, + "log_odds_ratio": -0.6372387409210205, + "logits/chosen": -1.052191972732544, + "logits/rejected": -0.9900830984115601, + "logps/chosen": -1.6719307899475098, + "logps/rejected": -1.7741820812225342, + "loss": 2.889, + "nll_loss": 0.6585352420806885, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16719307005405426, + "rewards/margins": 0.010225137695670128, + "rewards/rejected": -0.17741820216178894, + "step": 135 + }, + { + "epoch": 0.35860250494396834, + "grad_norm": 11.69919490814209, + "learning_rate": 7.105590062111801e-06, + "log_odds_chosen": 0.2658090889453888, + "log_odds_ratio": -0.5728399753570557, + "logits/chosen": -0.9620678424835205, + "logits/rejected": -0.8741431832313538, + "logps/chosen": -1.8435702323913574, + "logps/rejected": -2.0720698833465576, + "loss": 3.7334, + "nll_loss": 0.8760726451873779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18435701727867126, + "rewards/margins": 0.022849969565868378, + "rewards/rejected": -0.20720697939395905, + "step": 136 + }, + { + "epoch": 0.3612392880685564, + "grad_norm": 11.232748985290527, + "learning_rate": 7.0984915705412596e-06, + "log_odds_chosen": 0.11886944621801376, + "log_odds_ratio": -0.6484676599502563, + "logits/chosen": -0.9772336483001709, + "logits/rejected": -0.9394592046737671, + "logps/chosen": -1.7975430488586426, + "logps/rejected": -1.8997228145599365, + "loss": 3.1832, + "nll_loss": 0.730952262878418, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17975430190563202, + "rewards/margins": 0.010217983275651932, + "rewards/rejected": -0.18997228145599365, + "step": 137 + }, + { + "epoch": 0.36387607119314436, + "grad_norm": 9.921364784240723, + "learning_rate": 7.0913930789707185e-06, + "log_odds_chosen": 0.2520274221897125, + "log_odds_ratio": -0.5773578882217407, + "logits/chosen": -1.0217664241790771, + "logits/rejected": -0.9497706890106201, + "logps/chosen": -1.5283839702606201, + "logps/rejected": -1.7314984798431396, + "loss": 2.6404, + "nll_loss": 0.6023762226104736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15283840894699097, + "rewards/margins": 0.02031143754720688, + "rewards/rejected": -0.17314985394477844, + "step": 138 + }, + { + "epoch": 0.36651285431773234, + "grad_norm": 10.041219711303711, + "learning_rate": 7.0842945874001765e-06, + "log_odds_chosen": 0.5415450930595398, + "log_odds_ratio": -0.499300479888916, + "logits/chosen": -1.0301642417907715, + "logits/rejected": -0.9690549969673157, + "logps/chosen": -1.467930793762207, + "logps/rejected": -1.917942762374878, + "loss": 2.6209, + "nll_loss": 0.6052953004837036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14679308235645294, + "rewards/margins": 0.04500119388103485, + "rewards/rejected": -0.1917942762374878, + "step": 139 + }, + { + "epoch": 0.3691496374423204, + "grad_norm": 10.652571678161621, + "learning_rate": 7.077196095829636e-06, + "log_odds_chosen": 0.3802804946899414, + "log_odds_ratio": -0.535650908946991, + "logits/chosen": -1.0809695720672607, + "logits/rejected": -1.0145455598831177, + "logps/chosen": -1.6542302370071411, + "logps/rejected": -1.9699711799621582, + "loss": 3.3817, + "nll_loss": 0.7918611764907837, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16542303562164307, + "rewards/margins": 0.03157408535480499, + "rewards/rejected": -0.19699713587760925, + "step": 140 + }, + { + "epoch": 0.37178642056690836, + "grad_norm": 9.796138763427734, + "learning_rate": 7.070097604259094e-06, + "log_odds_chosen": 0.4387213885784149, + "log_odds_ratio": -0.5061071515083313, + "logits/chosen": -1.0042519569396973, + "logits/rejected": -0.9524378776550293, + "logps/chosen": -1.5022239685058594, + "logps/rejected": -1.863775372505188, + "loss": 2.5383, + "nll_loss": 0.5839626789093018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15022240579128265, + "rewards/margins": 0.036155134439468384, + "rewards/rejected": -0.18637755513191223, + "step": 141 + }, + { + "epoch": 0.3744232036914964, + "grad_norm": 9.984831809997559, + "learning_rate": 7.062999112688554e-06, + "log_odds_chosen": 0.3700769245624542, + "log_odds_ratio": -0.5443153381347656, + "logits/chosen": -0.9836577773094177, + "logits/rejected": -0.8929394483566284, + "logps/chosen": -1.4981740713119507, + "logps/rejected": -1.8099955320358276, + "loss": 2.5823, + "nll_loss": 0.5911481976509094, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14981740713119507, + "rewards/margins": 0.031182145699858665, + "rewards/rejected": -0.1809995472431183, + "step": 142 + }, + { + "epoch": 0.3770599868160844, + "grad_norm": 9.898751258850098, + "learning_rate": 7.055900621118012e-06, + "log_odds_chosen": 0.20131006836891174, + "log_odds_ratio": -0.6076530814170837, + "logits/chosen": -0.9782786965370178, + "logits/rejected": -0.9188566207885742, + "logps/chosen": -1.6172821521759033, + "logps/rejected": -1.7827471494674683, + "loss": 2.7246, + "nll_loss": 0.620381772518158, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16172820329666138, + "rewards/margins": 0.016546515747904778, + "rewards/rejected": -0.1782747209072113, + "step": 143 + }, + { + "epoch": 0.37969676994067236, + "grad_norm": 12.325860023498535, + "learning_rate": 7.04880212954747e-06, + "log_odds_chosen": 0.15997439622879028, + "log_odds_ratio": -0.6231362819671631, + "logits/chosen": -0.9544463157653809, + "logits/rejected": -0.938663125038147, + "logps/chosen": -1.5355095863342285, + "logps/rejected": -1.668229579925537, + "loss": 3.154, + "nll_loss": 0.7261742949485779, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15355095267295837, + "rewards/margins": 0.013271997682750225, + "rewards/rejected": -0.16682296991348267, + "step": 144 + }, + { + "epoch": 0.3823335530652604, + "grad_norm": 10.043176651000977, + "learning_rate": 7.04170363797693e-06, + "log_odds_chosen": 0.4247394800186157, + "log_odds_ratio": -0.5165842175483704, + "logits/chosen": -0.9888424277305603, + "logits/rejected": -0.9225731492042542, + "logps/chosen": -1.6933906078338623, + "logps/rejected": -2.0490479469299316, + "loss": 2.5336, + "nll_loss": 0.5817536115646362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16933906078338623, + "rewards/margins": 0.03556573763489723, + "rewards/rejected": -0.20490480959415436, + "step": 145 + }, + { + "epoch": 0.3849703361898484, + "grad_norm": 11.028958320617676, + "learning_rate": 7.034605146406388e-06, + "log_odds_chosen": 0.2050904929637909, + "log_odds_ratio": -0.5977884531021118, + "logits/chosen": -1.0422849655151367, + "logits/rejected": -0.9397130012512207, + "logps/chosen": -1.6625914573669434, + "logps/rejected": -1.8326083421707153, + "loss": 3.2628, + "nll_loss": 0.7559204697608948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16625916957855225, + "rewards/margins": 0.017001673579216003, + "rewards/rejected": -0.18326082825660706, + "step": 146 + }, + { + "epoch": 0.38760711931443637, + "grad_norm": 10.177428245544434, + "learning_rate": 7.027506654835847e-06, + "log_odds_chosen": 0.10629764944314957, + "log_odds_ratio": -0.6427984237670898, + "logits/chosen": -1.002443790435791, + "logits/rejected": -0.9608904719352722, + "logps/chosen": -1.641714096069336, + "logps/rejected": -1.7274234294891357, + "loss": 2.886, + "nll_loss": 0.6572229862213135, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16417142748832703, + "rewards/margins": 0.00857093557715416, + "rewards/rejected": -0.1727423369884491, + "step": 147 + }, + { + "epoch": 0.3902439024390244, + "grad_norm": 10.105752944946289, + "learning_rate": 7.020408163265306e-06, + "log_odds_chosen": 0.3024708032608032, + "log_odds_ratio": -0.5647812485694885, + "logits/chosen": -0.9816720485687256, + "logits/rejected": -0.9124754667282104, + "logps/chosen": -1.5694180727005005, + "logps/rejected": -1.821027159690857, + "loss": 2.6204, + "nll_loss": 0.5986314415931702, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15694180130958557, + "rewards/margins": 0.025160912424325943, + "rewards/rejected": -0.1821027249097824, + "step": 148 + }, + { + "epoch": 0.3928806855636124, + "grad_norm": 10.754402160644531, + "learning_rate": 7.013309671694765e-06, + "log_odds_chosen": 0.26607027649879456, + "log_odds_ratio": -0.5796156525611877, + "logits/chosen": -0.8944635391235352, + "logits/rejected": -0.8566243052482605, + "logps/chosen": -1.5501165390014648, + "logps/rejected": -1.7602980136871338, + "loss": 2.4804, + "nll_loss": 0.5621330142021179, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15501166880130768, + "rewards/margins": 0.021018145605921745, + "rewards/rejected": -0.17602980136871338, + "step": 149 + }, + { + "epoch": 0.39551746868820037, + "grad_norm": 11.098362922668457, + "learning_rate": 7.0062111801242236e-06, + "log_odds_chosen": 0.2638225853443146, + "log_odds_ratio": -0.5734692811965942, + "logits/chosen": -0.9989528656005859, + "logits/rejected": -0.9287225604057312, + "logps/chosen": -1.4983904361724854, + "logps/rejected": -1.7102032899856567, + "loss": 3.3057, + "nll_loss": 0.7690660953521729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14983904361724854, + "rewards/margins": 0.02118128165602684, + "rewards/rejected": -0.17102032899856567, + "step": 150 + }, + { + "epoch": 0.3981542518127884, + "grad_norm": 9.652806282043457, + "learning_rate": 6.9991126885536825e-06, + "log_odds_chosen": 0.4299401640892029, + "log_odds_ratio": -0.5107702016830444, + "logits/chosen": -1.0003033876419067, + "logits/rejected": -0.9206913113594055, + "logps/chosen": -1.4160411357879639, + "logps/rejected": -1.749358057975769, + "loss": 2.3457, + "nll_loss": 0.5353503227233887, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14160412549972534, + "rewards/margins": 0.03333168476819992, + "rewards/rejected": -0.17493581771850586, + "step": 151 + }, + { + "epoch": 0.4007910349373764, + "grad_norm": 10.215150833129883, + "learning_rate": 6.9920141969831405e-06, + "log_odds_chosen": 0.426888108253479, + "log_odds_ratio": -0.5269747376441956, + "logits/chosen": -0.988075852394104, + "logits/rejected": -0.8986337184906006, + "logps/chosen": -1.643608570098877, + "logps/rejected": -1.999243140220642, + "loss": 2.8702, + "nll_loss": 0.6648507118225098, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16436085104942322, + "rewards/margins": 0.03556346148252487, + "rewards/rejected": -0.1999243199825287, + "step": 152 + }, + { + "epoch": 0.4034278180619644, + "grad_norm": 10.009817123413086, + "learning_rate": 6.984915705412599e-06, + "log_odds_chosen": 0.3125130832195282, + "log_odds_ratio": -0.5523189306259155, + "logits/chosen": -1.021907091140747, + "logits/rejected": -0.9471596479415894, + "logps/chosen": -1.5990777015686035, + "logps/rejected": -1.8522807359695435, + "loss": 2.6808, + "nll_loss": 0.6149685978889465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1599077731370926, + "rewards/margins": 0.025320306420326233, + "rewards/rejected": -0.18522807955741882, + "step": 153 + }, + { + "epoch": 0.4060646011865524, + "grad_norm": 9.670231819152832, + "learning_rate": 6.977817213842058e-06, + "log_odds_chosen": 0.2577518820762634, + "log_odds_ratio": -0.5812107920646667, + "logits/chosen": -0.9596846103668213, + "logits/rejected": -0.907728910446167, + "logps/chosen": -1.556383728981018, + "logps/rejected": -1.764423131942749, + "loss": 2.3379, + "nll_loss": 0.5263651013374329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15563836693763733, + "rewards/margins": 0.02080395817756653, + "rewards/rejected": -0.17644232511520386, + "step": 154 + }, + { + "epoch": 0.4087013843111404, + "grad_norm": 10.299683570861816, + "learning_rate": 6.970718722271516e-06, + "log_odds_chosen": 0.21095338463783264, + "log_odds_ratio": -0.6001787185668945, + "logits/chosen": -1.0265400409698486, + "logits/rejected": -0.9740742444992065, + "logps/chosen": -1.7002348899841309, + "logps/rejected": -1.8802061080932617, + "loss": 3.0064, + "nll_loss": 0.6915907263755798, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17002347111701965, + "rewards/margins": 0.017997119575738907, + "rewards/rejected": -0.18802061676979065, + "step": 155 + }, + { + "epoch": 0.41133816743572843, + "grad_norm": 10.699474334716797, + "learning_rate": 6.963620230700976e-06, + "log_odds_chosen": 0.11351797729730606, + "log_odds_ratio": -0.6480950117111206, + "logits/chosen": -1.0430080890655518, + "logits/rejected": -0.9336438179016113, + "logps/chosen": -1.5621956586837769, + "logps/rejected": -1.647605299949646, + "loss": 3.4295, + "nll_loss": 0.7925580739974976, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15621955692768097, + "rewards/margins": 0.008540966548025608, + "rewards/rejected": -0.1647605299949646, + "step": 156 + }, + { + "epoch": 0.4139749505603164, + "grad_norm": 11.156679153442383, + "learning_rate": 6.956521739130434e-06, + "log_odds_chosen": 0.29023119807243347, + "log_odds_ratio": -0.5665775537490845, + "logits/chosen": -1.0512886047363281, + "logits/rejected": -0.9587866067886353, + "logps/chosen": -1.603646993637085, + "logps/rejected": -1.841407060623169, + "loss": 3.1337, + "nll_loss": 0.7267646789550781, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16036470234394073, + "rewards/margins": 0.023776013404130936, + "rewards/rejected": -0.18414071202278137, + "step": 157 + }, + { + "epoch": 0.4166117336849044, + "grad_norm": 10.709914207458496, + "learning_rate": 6.949423247559894e-06, + "log_odds_chosen": 0.15108612179756165, + "log_odds_ratio": -0.622245728969574, + "logits/chosen": -0.9538211822509766, + "logits/rejected": -0.9229971170425415, + "logps/chosen": -1.5492205619812012, + "logps/rejected": -1.666764259338379, + "loss": 3.1062, + "nll_loss": 0.7143333554267883, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15492206811904907, + "rewards/margins": 0.01175436470657587, + "rewards/rejected": -0.16667643189430237, + "step": 158 + }, + { + "epoch": 0.41924851680949243, + "grad_norm": 10.369595527648926, + "learning_rate": 6.942324755989352e-06, + "log_odds_chosen": 0.32474178075790405, + "log_odds_ratio": -0.5587796568870544, + "logits/chosen": -1.0268337726593018, + "logits/rejected": -0.9584920406341553, + "logps/chosen": -1.7907629013061523, + "logps/rejected": -2.0647928714752197, + "loss": 2.965, + "nll_loss": 0.6853820085525513, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17907628417015076, + "rewards/margins": 0.027402998879551888, + "rewards/rejected": -0.2064792811870575, + "step": 159 + }, + { + "epoch": 0.4218852999340804, + "grad_norm": 11.290220260620117, + "learning_rate": 6.935226264418811e-06, + "log_odds_chosen": 0.07872498780488968, + "log_odds_ratio": -0.6619119644165039, + "logits/chosen": -0.9851783514022827, + "logits/rejected": -0.9211961030960083, + "logps/chosen": -1.7138417959213257, + "logps/rejected": -1.7746775150299072, + "loss": 3.4049, + "nll_loss": 0.7850258350372314, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.17138418555259705, + "rewards/margins": 0.006083549931645393, + "rewards/rejected": -0.17746774852275848, + "step": 160 + }, + { + "epoch": 0.4245220830586684, + "grad_norm": 9.70108413696289, + "learning_rate": 6.92812777284827e-06, + "log_odds_chosen": 0.22529563307762146, + "log_odds_ratio": -0.596996009349823, + "logits/chosen": -1.0223166942596436, + "logits/rejected": -0.9788596034049988, + "logps/chosen": -1.5264146327972412, + "logps/rejected": -1.7136309146881104, + "loss": 2.3278, + "nll_loss": 0.5222612023353577, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15264146029949188, + "rewards/margins": 0.018721627071499825, + "rewards/rejected": -0.17136308550834656, + "step": 161 + }, + { + "epoch": 0.42715886618325644, + "grad_norm": 10.300216674804688, + "learning_rate": 6.921029281277728e-06, + "log_odds_chosen": 0.18719755113124847, + "log_odds_ratio": -0.6063538193702698, + "logits/chosen": -1.0139224529266357, + "logits/rejected": -0.9442136883735657, + "logps/chosen": -1.4809929132461548, + "logps/rejected": -1.6269291639328003, + "loss": 2.6508, + "nll_loss": 0.602075457572937, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14809930324554443, + "rewards/margins": 0.01459362544119358, + "rewards/rejected": -0.16269291937351227, + "step": 162 + }, + { + "epoch": 0.4297956493078444, + "grad_norm": 9.955241203308105, + "learning_rate": 6.913930789707187e-06, + "log_odds_chosen": 0.3279910087585449, + "log_odds_ratio": -0.5519675016403198, + "logits/chosen": -1.0312724113464355, + "logits/rejected": -0.9439166784286499, + "logps/chosen": -1.6244471073150635, + "logps/rejected": -1.8925652503967285, + "loss": 2.7268, + "nll_loss": 0.6265023350715637, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16244471073150635, + "rewards/margins": 0.026811812072992325, + "rewards/rejected": -0.18925653398036957, + "step": 163 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 9.78976058959961, + "learning_rate": 6.906832298136646e-06, + "log_odds_chosen": 0.5778919458389282, + "log_odds_ratio": -0.48515623807907104, + "logits/chosen": -0.9530848264694214, + "logits/rejected": -0.9065274596214294, + "logps/chosen": -1.5215189456939697, + "logps/rejected": -2.0028321743011475, + "loss": 2.6913, + "nll_loss": 0.6243013143539429, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15215188264846802, + "rewards/margins": 0.04813132435083389, + "rewards/rejected": -0.2002832144498825, + "step": 164 + }, + { + "epoch": 0.43506921555702044, + "grad_norm": 10.509347915649414, + "learning_rate": 6.8997338065661045e-06, + "log_odds_chosen": 0.1852526068687439, + "log_odds_ratio": -0.619766354560852, + "logits/chosen": -1.0339908599853516, + "logits/rejected": -0.9722583293914795, + "logps/chosen": -1.6535886526107788, + "logps/rejected": -1.7956442832946777, + "loss": 2.839, + "nll_loss": 0.6477658152580261, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16535887122154236, + "rewards/margins": 0.014205573126673698, + "rewards/rejected": -0.1795644313097, + "step": 165 + }, + { + "epoch": 0.4377059986816084, + "grad_norm": 11.17563247680664, + "learning_rate": 6.8926353149955626e-06, + "log_odds_chosen": 0.2836954891681671, + "log_odds_ratio": -0.5793655514717102, + "logits/chosen": -1.0459768772125244, + "logits/rejected": -0.9338748455047607, + "logps/chosen": -1.7612978219985962, + "logps/rejected": -2.000579595565796, + "loss": 3.2258, + "nll_loss": 0.7485017776489258, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1761297881603241, + "rewards/margins": 0.023928172886371613, + "rewards/rejected": -0.2000579535961151, + "step": 166 + }, + { + "epoch": 0.44034278180619646, + "grad_norm": 10.128766059875488, + "learning_rate": 6.885536823425022e-06, + "log_odds_chosen": 0.18964844942092896, + "log_odds_ratio": -0.6131159067153931, + "logits/chosen": -0.9967179298400879, + "logits/rejected": -0.9552919864654541, + "logps/chosen": -1.8183844089508057, + "logps/rejected": -1.985978126525879, + "loss": 2.9409, + "nll_loss": 0.6739104390144348, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18183845281600952, + "rewards/margins": 0.016759376972913742, + "rewards/rejected": -0.19859781861305237, + "step": 167 + }, + { + "epoch": 0.44297956493078444, + "grad_norm": 9.803478240966797, + "learning_rate": 6.87843833185448e-06, + "log_odds_chosen": 0.35690954327583313, + "log_odds_ratio": -0.5366029143333435, + "logits/chosen": -1.0304672718048096, + "logits/rejected": -0.960822582244873, + "logps/chosen": -1.4720278978347778, + "logps/rejected": -1.7538082599639893, + "loss": 2.5161, + "nll_loss": 0.5753771662712097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14720278978347778, + "rewards/margins": 0.028178047388792038, + "rewards/rejected": -0.1753808557987213, + "step": 168 + }, + { + "epoch": 0.4456163480553724, + "grad_norm": 10.489614486694336, + "learning_rate": 6.87133984028394e-06, + "log_odds_chosen": 0.29822301864624023, + "log_odds_ratio": -0.5612882375717163, + "logits/chosen": -0.9769358038902283, + "logits/rejected": -0.9301931262016296, + "logps/chosen": -1.4727394580841064, + "logps/rejected": -1.703880786895752, + "loss": 2.2927, + "nll_loss": 0.5170445442199707, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1472739577293396, + "rewards/margins": 0.023114126175642014, + "rewards/rejected": -0.17038807272911072, + "step": 169 + }, + { + "epoch": 0.44825313117996046, + "grad_norm": 9.719270706176758, + "learning_rate": 6.864241348713398e-06, + "log_odds_chosen": 0.1573425531387329, + "log_odds_ratio": -0.6409673094749451, + "logits/chosen": -0.965396523475647, + "logits/rejected": -0.9352325201034546, + "logps/chosen": -1.5652350187301636, + "logps/rejected": -1.6846939325332642, + "loss": 2.1732, + "nll_loss": 0.47920310497283936, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15652349591255188, + "rewards/margins": 0.011945885606110096, + "rewards/rejected": -0.1684693992137909, + "step": 170 + }, + { + "epoch": 0.45088991430454844, + "grad_norm": 10.072135925292969, + "learning_rate": 6.857142857142856e-06, + "log_odds_chosen": 0.2361280769109726, + "log_odds_ratio": -0.5988917350769043, + "logits/chosen": -1.0373730659484863, + "logits/rejected": -0.97651207447052, + "logps/chosen": -1.563853144645691, + "logps/rejected": -1.746325135231018, + "loss": 2.8339, + "nll_loss": 0.6485767364501953, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15638533234596252, + "rewards/margins": 0.018247190862894058, + "rewards/rejected": -0.17463251948356628, + "step": 171 + }, + { + "epoch": 0.4535266974291364, + "grad_norm": 9.880722999572754, + "learning_rate": 6.850044365572316e-06, + "log_odds_chosen": 0.42882829904556274, + "log_odds_ratio": -0.5146551728248596, + "logits/chosen": -0.9842818379402161, + "logits/rejected": -0.956126868724823, + "logps/chosen": -1.4900033473968506, + "logps/rejected": -1.831408143043518, + "loss": 2.1344, + "nll_loss": 0.4821299910545349, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.149000346660614, + "rewards/margins": 0.03414047881960869, + "rewards/rejected": -0.1831408143043518, + "step": 172 + }, + { + "epoch": 0.45616348055372447, + "grad_norm": 9.939020156860352, + "learning_rate": 6.842945874001774e-06, + "log_odds_chosen": 0.4484916627407074, + "log_odds_ratio": -0.5012741088867188, + "logits/chosen": -1.0212557315826416, + "logits/rejected": -0.894973874092102, + "logps/chosen": -1.4865288734436035, + "logps/rejected": -1.8439892530441284, + "loss": 2.3634, + "nll_loss": 0.5407203435897827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14865288138389587, + "rewards/margins": 0.03574604168534279, + "rewards/rejected": -0.18439891934394836, + "step": 173 + }, + { + "epoch": 0.45880026367831245, + "grad_norm": 9.039196968078613, + "learning_rate": 6.835847382431233e-06, + "log_odds_chosen": 0.24636773765087128, + "log_odds_ratio": -0.5889366865158081, + "logits/chosen": -0.9732996225357056, + "logits/rejected": -0.9060419797897339, + "logps/chosen": -1.3600660562515259, + "logps/rejected": -1.5369794368743896, + "loss": 2.1096, + "nll_loss": 0.4685070514678955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13600660860538483, + "rewards/margins": 0.017691336572170258, + "rewards/rejected": -0.1536979377269745, + "step": 174 + }, + { + "epoch": 0.4614370468029005, + "grad_norm": 11.821978569030762, + "learning_rate": 6.828748890860692e-06, + "log_odds_chosen": 0.28139597177505493, + "log_odds_ratio": -0.5732556581497192, + "logits/chosen": -0.9052269458770752, + "logits/rejected": -0.867675244808197, + "logps/chosen": -1.664025902748108, + "logps/rejected": -1.8929924964904785, + "loss": 3.2741, + "nll_loss": 0.7611905336380005, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16640257835388184, + "rewards/margins": 0.022896669805049896, + "rewards/rejected": -0.18929925560951233, + "step": 175 + }, + { + "epoch": 0.46407382992748847, + "grad_norm": 10.669132232666016, + "learning_rate": 6.821650399290151e-06, + "log_odds_chosen": 0.2710084021091461, + "log_odds_ratio": -0.5712127685546875, + "logits/chosen": -0.9989784359931946, + "logits/rejected": -0.9607273936271667, + "logps/chosen": -1.7668498754501343, + "logps/rejected": -1.9909987449645996, + "loss": 3.1341, + "nll_loss": 0.7264118790626526, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17668499052524567, + "rewards/margins": 0.022414876148104668, + "rewards/rejected": -0.19909986853599548, + "step": 176 + }, + { + "epoch": 0.46671061305207645, + "grad_norm": 11.005374908447266, + "learning_rate": 6.814551907719609e-06, + "log_odds_chosen": 0.1589566171169281, + "log_odds_ratio": -0.6214722394943237, + "logits/chosen": -0.9853265285491943, + "logits/rejected": -0.9289055466651917, + "logps/chosen": -1.5828139781951904, + "logps/rejected": -1.7081506252288818, + "loss": 2.9991, + "nll_loss": 0.6876333951950073, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1582813858985901, + "rewards/margins": 0.012533656321465969, + "rewards/rejected": -0.17081505060195923, + "step": 177 + }, + { + "epoch": 0.4693473961766645, + "grad_norm": 10.49986743927002, + "learning_rate": 6.8074534161490685e-06, + "log_odds_chosen": 0.36675703525543213, + "log_odds_ratio": -0.5418417453765869, + "logits/chosen": -1.038415789604187, + "logits/rejected": -0.9667816162109375, + "logps/chosen": -1.541959285736084, + "logps/rejected": -1.8625450134277344, + "loss": 2.6794, + "nll_loss": 0.6156629920005798, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15419591963291168, + "rewards/margins": 0.032058581709861755, + "rewards/rejected": -0.18625450134277344, + "step": 178 + }, + { + "epoch": 0.47198417930125247, + "grad_norm": 9.203581809997559, + "learning_rate": 6.800354924578527e-06, + "log_odds_chosen": 0.17328761518001556, + "log_odds_ratio": -0.6320229768753052, + "logits/chosen": -0.9987534284591675, + "logits/rejected": -0.9479336738586426, + "logps/chosen": -1.4706135988235474, + "logps/rejected": -1.6253565549850464, + "loss": 2.2421, + "nll_loss": 0.49731987714767456, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14706136286258698, + "rewards/margins": 0.01547430083155632, + "rewards/rejected": -0.1625356525182724, + "step": 179 + }, + { + "epoch": 0.47462096242584045, + "grad_norm": 10.125992774963379, + "learning_rate": 6.7932564330079855e-06, + "log_odds_chosen": 0.26374509930610657, + "log_odds_ratio": -0.5806645154953003, + "logits/chosen": -1.076350212097168, + "logits/rejected": -1.0171654224395752, + "logps/chosen": -1.6135175228118896, + "logps/rejected": -1.8273677825927734, + "loss": 3.3221, + "nll_loss": 0.7724688053131104, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1613517552614212, + "rewards/margins": 0.021385014057159424, + "rewards/rejected": -0.18273678421974182, + "step": 180 + }, + { + "epoch": 0.4772577455504285, + "grad_norm": 10.200504302978516, + "learning_rate": 6.786157941437444e-06, + "log_odds_chosen": 0.2539365589618683, + "log_odds_ratio": -0.5794248580932617, + "logits/chosen": -0.9939213991165161, + "logits/rejected": -0.9339665174484253, + "logps/chosen": -1.4931640625, + "logps/rejected": -1.6908729076385498, + "loss": 2.5199, + "nll_loss": 0.5720276236534119, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14931640028953552, + "rewards/margins": 0.019770905375480652, + "rewards/rejected": -0.16908732056617737, + "step": 181 + }, + { + "epoch": 0.4798945286750165, + "grad_norm": 10.58282470703125, + "learning_rate": 6.7790594498669024e-06, + "log_odds_chosen": 0.21669375896453857, + "log_odds_ratio": -0.5990947484970093, + "logits/chosen": -1.0061519145965576, + "logits/rejected": -0.9448709487915039, + "logps/chosen": -1.753849744796753, + "logps/rejected": -1.9302830696105957, + "loss": 3.382, + "nll_loss": 0.78557950258255, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.175384983420372, + "rewards/margins": 0.017643319442868233, + "rewards/rejected": -0.1930283159017563, + "step": 182 + }, + { + "epoch": 0.48253131179960446, + "grad_norm": 10.520997047424316, + "learning_rate": 6.771960958296362e-06, + "log_odds_chosen": 0.19941487908363342, + "log_odds_ratio": -0.6015852689743042, + "logits/chosen": -1.1057069301605225, + "logits/rejected": -0.9830414056777954, + "logps/chosen": -1.5218502283096313, + "logps/rejected": -1.6809983253479004, + "loss": 3.1066, + "nll_loss": 0.7164870500564575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15218502283096313, + "rewards/margins": 0.015914827585220337, + "rewards/rejected": -0.16809985041618347, + "step": 183 + }, + { + "epoch": 0.4851680949241925, + "grad_norm": 10.83430004119873, + "learning_rate": 6.76486246672582e-06, + "log_odds_chosen": 0.21703600883483887, + "log_odds_ratio": -0.592914879322052, + "logits/chosen": -1.0694499015808105, + "logits/rejected": -1.0465267896652222, + "logps/chosen": -1.4693856239318848, + "logps/rejected": -1.6387711763381958, + "loss": 3.0489, + "nll_loss": 0.7029221653938293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14693856239318848, + "rewards/margins": 0.016938570886850357, + "rewards/rejected": -0.16387712955474854, + "step": 184 + }, + { + "epoch": 0.4878048780487805, + "grad_norm": 9.578657150268555, + "learning_rate": 6.757763975155279e-06, + "log_odds_chosen": 0.15174290537834167, + "log_odds_ratio": -0.6242505311965942, + "logits/chosen": -1.0101323127746582, + "logits/rejected": -0.9744507074356079, + "logps/chosen": -1.3636577129364014, + "logps/rejected": -1.4776711463928223, + "loss": 2.3882, + "nll_loss": 0.5346183180809021, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13636577129364014, + "rewards/margins": 0.011401347815990448, + "rewards/rejected": -0.14776712656021118, + "step": 185 + }, + { + "epoch": 0.4904416611733685, + "grad_norm": 11.106609344482422, + "learning_rate": 6.750665483584738e-06, + "log_odds_chosen": 0.2596626579761505, + "log_odds_ratio": -0.5743707418441772, + "logits/chosen": -1.0113422870635986, + "logits/rejected": -0.9298158288002014, + "logps/chosen": -1.6257975101470947, + "logps/rejected": -1.8388350009918213, + "loss": 2.7506, + "nll_loss": 0.6302106976509094, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.162579745054245, + "rewards/margins": 0.021303754299879074, + "rewards/rejected": -0.18388350307941437, + "step": 186 + }, + { + "epoch": 0.4930784442979565, + "grad_norm": 9.922343254089355, + "learning_rate": 6.743566992014197e-06, + "log_odds_chosen": 0.3453751802444458, + "log_odds_ratio": -0.5418750047683716, + "logits/chosen": -1.022857427597046, + "logits/rejected": -0.9566177129745483, + "logps/chosen": -1.563047170639038, + "logps/rejected": -1.8398334980010986, + "loss": 2.6673, + "nll_loss": 0.6126459836959839, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1563047170639038, + "rewards/margins": 0.027678625658154488, + "rewards/rejected": -0.18398335576057434, + "step": 187 + }, + { + "epoch": 0.4957152274225445, + "grad_norm": 10.53550910949707, + "learning_rate": 6.736468500443656e-06, + "log_odds_chosen": 0.3632819652557373, + "log_odds_ratio": -0.5351804494857788, + "logits/chosen": -0.975321352481842, + "logits/rejected": -0.9194474816322327, + "logps/chosen": -1.7827264070510864, + "logps/rejected": -2.0951194763183594, + "loss": 2.7061, + "nll_loss": 0.6229962110519409, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17827263474464417, + "rewards/margins": 0.031239299103617668, + "rewards/rejected": -0.20951193571090698, + "step": 188 + }, + { + "epoch": 0.4983520105471325, + "grad_norm": 10.74094009399414, + "learning_rate": 6.729370008873114e-06, + "log_odds_chosen": 0.23406413197517395, + "log_odds_ratio": -0.591535747051239, + "logits/chosen": -0.997490644454956, + "logits/rejected": -0.8993905782699585, + "logps/chosen": -1.6242046356201172, + "logps/rejected": -1.8176889419555664, + "loss": 2.9148, + "nll_loss": 0.6695369482040405, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16242045164108276, + "rewards/margins": 0.019348448142409325, + "rewards/rejected": -0.18176892399787903, + "step": 189 + }, + { + "epoch": 0.5009887936717206, + "grad_norm": 10.881436347961426, + "learning_rate": 6.722271517302573e-06, + "log_odds_chosen": 0.10114425420761108, + "log_odds_ratio": -0.6511253714561462, + "logits/chosen": -1.0813939571380615, + "logits/rejected": -1.008556842803955, + "logps/chosen": -1.4924644231796265, + "logps/rejected": -1.5765280723571777, + "loss": 3.0701, + "nll_loss": 0.702400267124176, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1492464542388916, + "rewards/margins": 0.008406376466155052, + "rewards/rejected": -0.1576528251171112, + "step": 190 + }, + { + "epoch": 0.5036255767963085, + "grad_norm": 10.459433555603027, + "learning_rate": 6.715173025732032e-06, + "log_odds_chosen": 0.3647010624408722, + "log_odds_ratio": -0.535969078540802, + "logits/chosen": -0.9934642314910889, + "logits/rejected": -0.9312925338745117, + "logps/chosen": -1.6314611434936523, + "logps/rejected": -1.9391553401947021, + "loss": 2.6815, + "nll_loss": 0.6167663931846619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16314613819122314, + "rewards/margins": 0.030769426375627518, + "rewards/rejected": -0.19391554594039917, + "step": 191 + }, + { + "epoch": 0.5062623599208965, + "grad_norm": 10.44395637512207, + "learning_rate": 6.708074534161491e-06, + "log_odds_chosen": 0.3406563997268677, + "log_odds_ratio": -0.5445241928100586, + "logits/chosen": -1.0486533641815186, + "logits/rejected": -0.9384101629257202, + "logps/chosen": -1.4763271808624268, + "logps/rejected": -1.745421051979065, + "loss": 3.3578, + "nll_loss": 0.7850080132484436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14763271808624268, + "rewards/margins": 0.02690940722823143, + "rewards/rejected": -0.1745421141386032, + "step": 192 + }, + { + "epoch": 0.5088991430454846, + "grad_norm": 10.408292770385742, + "learning_rate": 6.700976042590949e-06, + "log_odds_chosen": 0.3092145323753357, + "log_odds_ratio": -0.5610625743865967, + "logits/chosen": -1.0052587985992432, + "logits/rejected": -0.962735652923584, + "logps/chosen": -1.5190422534942627, + "logps/rejected": -1.7629897594451904, + "loss": 2.7384, + "nll_loss": 0.6284852027893066, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15190422534942627, + "rewards/margins": 0.024394752457737923, + "rewards/rejected": -0.17629897594451904, + "step": 193 + }, + { + "epoch": 0.5115359261700725, + "grad_norm": 9.567964553833008, + "learning_rate": 6.693877551020408e-06, + "log_odds_chosen": 0.22837528586387634, + "log_odds_ratio": -0.591896116733551, + "logits/chosen": -1.0158699750900269, + "logits/rejected": -0.9633442759513855, + "logps/chosen": -1.5977767705917358, + "logps/rejected": -1.7741581201553345, + "loss": 2.7471, + "nll_loss": 0.6275816559791565, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1597776710987091, + "rewards/margins": 0.01763814315199852, + "rewards/rejected": -0.17741578817367554, + "step": 194 + }, + { + "epoch": 0.5141727092946605, + "grad_norm": 9.766575813293457, + "learning_rate": 6.6867790594498664e-06, + "log_odds_chosen": 0.3533956706523895, + "log_odds_ratio": -0.5434699058532715, + "logits/chosen": -1.0098570585250854, + "logits/rejected": -0.9270682334899902, + "logps/chosen": -1.5814838409423828, + "logps/rejected": -1.865525722503662, + "loss": 2.5237, + "nll_loss": 0.5765711665153503, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1581483781337738, + "rewards/margins": 0.028404179960489273, + "rewards/rejected": -0.18655255436897278, + "step": 195 + }, + { + "epoch": 0.5168094924192486, + "grad_norm": 10.196769714355469, + "learning_rate": 6.679680567879326e-06, + "log_odds_chosen": 0.28227320313453674, + "log_odds_ratio": -0.5764878988265991, + "logits/chosen": -0.968219518661499, + "logits/rejected": -0.9135010242462158, + "logps/chosen": -1.6530375480651855, + "logps/rejected": -1.8860328197479248, + "loss": 2.6832, + "nll_loss": 0.6131478548049927, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1653037667274475, + "rewards/margins": 0.023299511522054672, + "rewards/rejected": -0.18860328197479248, + "step": 196 + }, + { + "epoch": 0.5194462755438365, + "grad_norm": 10.448760032653809, + "learning_rate": 6.672582076308784e-06, + "log_odds_chosen": 0.5269650816917419, + "log_odds_ratio": -0.47726505994796753, + "logits/chosen": -0.9919060468673706, + "logits/rejected": -0.917604923248291, + "logps/chosen": -1.4839609861373901, + "logps/rejected": -1.910081386566162, + "loss": 2.7142, + "nll_loss": 0.6308342814445496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1483961045742035, + "rewards/margins": 0.042612046003341675, + "rewards/rejected": -0.19100815057754517, + "step": 197 + }, + { + "epoch": 0.5220830586684245, + "grad_norm": 10.519347190856934, + "learning_rate": 6.665483584738242e-06, + "log_odds_chosen": 0.38204941153526306, + "log_odds_ratio": -0.5332264304161072, + "logits/chosen": -1.0032997131347656, + "logits/rejected": -0.9247130155563354, + "logps/chosen": -1.761028528213501, + "logps/rejected": -2.0825135707855225, + "loss": 3.2928, + "nll_loss": 0.7698801159858704, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17610284686088562, + "rewards/margins": 0.03214849531650543, + "rewards/rejected": -0.20825135707855225, + "step": 198 + }, + { + "epoch": 0.5247198417930126, + "grad_norm": 9.883003234863281, + "learning_rate": 6.658385093167702e-06, + "log_odds_chosen": 0.7173312306404114, + "log_odds_ratio": -0.44229722023010254, + "logits/chosen": -1.0049068927764893, + "logits/rejected": -0.9245249032974243, + "logps/chosen": -1.4914054870605469, + "logps/rejected": -2.0990054607391357, + "loss": 2.6475, + "nll_loss": 0.6176378130912781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14914055168628693, + "rewards/margins": 0.06075998395681381, + "rewards/rejected": -0.20990052819252014, + "step": 199 + }, + { + "epoch": 0.5273566249176005, + "grad_norm": 9.658519744873047, + "learning_rate": 6.65128660159716e-06, + "log_odds_chosen": 0.30500340461730957, + "log_odds_ratio": -0.5833555459976196, + "logits/chosen": -1.0268594026565552, + "logits/rejected": -0.938892126083374, + "logps/chosen": -1.7157214879989624, + "logps/rejected": -1.963449239730835, + "loss": 2.9324, + "nll_loss": 0.6747677326202393, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17157214879989624, + "rewards/margins": 0.024772752076387405, + "rewards/rejected": -0.19634489715099335, + "step": 200 + }, + { + "epoch": 0.5299934080421885, + "grad_norm": 10.24622631072998, + "learning_rate": 6.644188110026619e-06, + "log_odds_chosen": 0.5697795748710632, + "log_odds_ratio": -0.45827436447143555, + "logits/chosen": -1.0016233921051025, + "logits/rejected": -0.8836263418197632, + "logps/chosen": -1.4620568752288818, + "logps/rejected": -1.9116840362548828, + "loss": 2.6193, + "nll_loss": 0.6090010404586792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14620567858219147, + "rewards/margins": 0.044962719082832336, + "rewards/rejected": -0.1911683976650238, + "step": 201 + }, + { + "epoch": 0.5326301911667766, + "grad_norm": 9.450702667236328, + "learning_rate": 6.637089618456078e-06, + "log_odds_chosen": 0.20333410799503326, + "log_odds_ratio": -0.6024256944656372, + "logits/chosen": -0.940944492816925, + "logits/rejected": -0.8981947898864746, + "logps/chosen": -1.5878612995147705, + "logps/rejected": -1.7429168224334717, + "loss": 2.141, + "nll_loss": 0.4750024378299713, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15878614783287048, + "rewards/margins": 0.015505557879805565, + "rewards/rejected": -0.1742917001247406, + "step": 202 + }, + { + "epoch": 0.5352669742913645, + "grad_norm": 10.130104064941406, + "learning_rate": 6.629991126885537e-06, + "log_odds_chosen": 0.4129548966884613, + "log_odds_ratio": -0.5138856768608093, + "logits/chosen": -1.0429551601409912, + "logits/rejected": -0.9169206619262695, + "logps/chosen": -1.3947268724441528, + "logps/rejected": -1.723283290863037, + "loss": 2.6488, + "nll_loss": 0.6108206510543823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13947269320487976, + "rewards/margins": 0.03285562992095947, + "rewards/rejected": -0.17232832312583923, + "step": 203 + }, + { + "epoch": 0.5379037574159525, + "grad_norm": 10.768418312072754, + "learning_rate": 6.622892635314995e-06, + "log_odds_chosen": 0.2619969844818115, + "log_odds_ratio": -0.5781389474868774, + "logits/chosen": -1.0080143213272095, + "logits/rejected": -0.888601541519165, + "logps/chosen": -1.6371673345565796, + "logps/rejected": -1.8551280498504639, + "loss": 3.149, + "nll_loss": 0.7294327020645142, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16371673345565796, + "rewards/margins": 0.021796071901917458, + "rewards/rejected": -0.18551281094551086, + "step": 204 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 9.554441452026367, + "learning_rate": 6.615794143744455e-06, + "log_odds_chosen": 0.24392247200012207, + "log_odds_ratio": -0.5857117176055908, + "logits/chosen": -0.9610693454742432, + "logits/rejected": -0.9121558666229248, + "logps/chosen": -1.5005481243133545, + "logps/rejected": -1.6922739744186401, + "loss": 2.1473, + "nll_loss": 0.4782421886920929, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15005481243133545, + "rewards/margins": 0.01917259395122528, + "rewards/rejected": -0.16922740638256073, + "step": 205 + }, + { + "epoch": 0.5431773236651285, + "grad_norm": 9.785226821899414, + "learning_rate": 6.608695652173913e-06, + "log_odds_chosen": 0.6710850596427917, + "log_odds_ratio": -0.43129992485046387, + "logits/chosen": -0.9875367283821106, + "logits/rejected": -0.9106200933456421, + "logps/chosen": -1.3776631355285645, + "logps/rejected": -1.898719072341919, + "loss": 2.1726, + "nll_loss": 0.50001060962677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13776631653308868, + "rewards/margins": 0.05210559815168381, + "rewards/rejected": -0.1898719072341919, + "step": 206 + }, + { + "epoch": 0.5458141067897165, + "grad_norm": 10.392133712768555, + "learning_rate": 6.6015971606033715e-06, + "log_odds_chosen": 0.3169119656085968, + "log_odds_ratio": -0.5607678294181824, + "logits/chosen": -1.047536015510559, + "logits/rejected": -0.9540445804595947, + "logps/chosen": -1.5951440334320068, + "logps/rejected": -1.8588143587112427, + "loss": 2.6913, + "nll_loss": 0.6167535781860352, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1595143973827362, + "rewards/margins": 0.026367036625742912, + "rewards/rejected": -0.18588143587112427, + "step": 207 + }, + { + "epoch": 0.5484508899143046, + "grad_norm": 10.140395164489746, + "learning_rate": 6.5944986690328304e-06, + "log_odds_chosen": 0.33773016929626465, + "log_odds_ratio": -0.5480486154556274, + "logits/chosen": -1.039228916168213, + "logits/rejected": -0.966877281665802, + "logps/chosen": -1.5897202491760254, + "logps/rejected": -1.866443157196045, + "loss": 2.878, + "nll_loss": 0.6646842956542969, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15897202491760254, + "rewards/margins": 0.027672285214066505, + "rewards/rejected": -0.1866443157196045, + "step": 208 + }, + { + "epoch": 0.5510876730388925, + "grad_norm": 9.475105285644531, + "learning_rate": 6.5874001774622885e-06, + "log_odds_chosen": 0.27345022559165955, + "log_odds_ratio": -0.5832695364952087, + "logits/chosen": -1.0278539657592773, + "logits/rejected": -0.9749932885169983, + "logps/chosen": -1.287369966506958, + "logps/rejected": -1.5093766450881958, + "loss": 2.2055, + "nll_loss": 0.4930441975593567, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12873700261116028, + "rewards/margins": 0.022200649604201317, + "rewards/rejected": -0.15093766152858734, + "step": 209 + }, + { + "epoch": 0.5537244561634805, + "grad_norm": 9.57432746887207, + "learning_rate": 6.580301685891748e-06, + "log_odds_chosen": 0.3678041100502014, + "log_odds_ratio": -0.5408501029014587, + "logits/chosen": -1.0283927917480469, + "logits/rejected": -0.9835280179977417, + "logps/chosen": -1.4167461395263672, + "logps/rejected": -1.708855152130127, + "loss": 2.3869, + "nll_loss": 0.5426478385925293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14167462289333344, + "rewards/margins": 0.029210904613137245, + "rewards/rejected": -0.17088551819324493, + "step": 210 + }, + { + "epoch": 0.5563612392880686, + "grad_norm": 10.188741683959961, + "learning_rate": 6.573203194321206e-06, + "log_odds_chosen": 0.3999137878417969, + "log_odds_ratio": -0.5197833776473999, + "logits/chosen": -0.9907450675964355, + "logits/rejected": -0.9520745277404785, + "logps/chosen": -1.543394923210144, + "logps/rejected": -1.870898962020874, + "loss": 2.4805, + "nll_loss": 0.5681428909301758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1543394923210144, + "rewards/margins": 0.03275042027235031, + "rewards/rejected": -0.1870899200439453, + "step": 211 + }, + { + "epoch": 0.5589980224126566, + "grad_norm": 10.253950119018555, + "learning_rate": 6.566104702750665e-06, + "log_odds_chosen": 0.3432654142379761, + "log_odds_ratio": -0.5483474731445312, + "logits/chosen": -1.0167362689971924, + "logits/rejected": -0.9465508460998535, + "logps/chosen": -1.4691851139068604, + "logps/rejected": -1.7421796321868896, + "loss": 2.5392, + "nll_loss": 0.5799700021743774, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14691850543022156, + "rewards/margins": 0.027299458160996437, + "rewards/rejected": -0.17421796917915344, + "step": 212 + }, + { + "epoch": 0.5616348055372445, + "grad_norm": 9.691521644592285, + "learning_rate": 6.559006211180124e-06, + "log_odds_chosen": 0.2519860863685608, + "log_odds_ratio": -0.5869906544685364, + "logits/chosen": -0.9782098531723022, + "logits/rejected": -0.9147440791130066, + "logps/chosen": -1.5072137117385864, + "logps/rejected": -1.727609634399414, + "loss": 2.343, + "nll_loss": 0.5270425081253052, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15072137117385864, + "rewards/margins": 0.02203957922756672, + "rewards/rejected": -0.1727609634399414, + "step": 213 + }, + { + "epoch": 0.5642715886618326, + "grad_norm": 9.183805465698242, + "learning_rate": 6.551907719609583e-06, + "log_odds_chosen": 0.4426887035369873, + "log_odds_ratio": -0.5064529776573181, + "logits/chosen": -0.969307541847229, + "logits/rejected": -0.8896834254264832, + "logps/chosen": -1.4110374450683594, + "logps/rejected": -1.7519762516021729, + "loss": 1.8554, + "nll_loss": 0.4132058620452881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14110374450683594, + "rewards/margins": 0.03409387543797493, + "rewards/rejected": -0.17519763112068176, + "step": 214 + }, + { + "epoch": 0.5669083717864206, + "grad_norm": 10.12009048461914, + "learning_rate": 6.544809228039041e-06, + "log_odds_chosen": 0.3893926739692688, + "log_odds_ratio": -0.5226792097091675, + "logits/chosen": -1.0572798252105713, + "logits/rejected": -0.9570890665054321, + "logps/chosen": -1.468374490737915, + "logps/rejected": -1.7834434509277344, + "loss": 2.8241, + "nll_loss": 0.6537682414054871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14683745801448822, + "rewards/margins": 0.031506896018981934, + "rewards/rejected": -0.17834435403347015, + "step": 215 + }, + { + "epoch": 0.5695451549110085, + "grad_norm": 10.187172889709473, + "learning_rate": 6.5377107364685e-06, + "log_odds_chosen": 0.41759955883026123, + "log_odds_ratio": -0.510657787322998, + "logits/chosen": -1.046910285949707, + "logits/rejected": -0.9419330358505249, + "logps/chosen": -1.576021432876587, + "logps/rejected": -1.9195055961608887, + "loss": 2.7863, + "nll_loss": 0.6455209851264954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15760213136672974, + "rewards/margins": 0.03434841334819794, + "rewards/rejected": -0.19195055961608887, + "step": 216 + }, + { + "epoch": 0.5721819380355966, + "grad_norm": 10.267085075378418, + "learning_rate": 6.530612244897959e-06, + "log_odds_chosen": 0.2733405530452728, + "log_odds_ratio": -0.56819748878479, + "logits/chosen": -1.02101731300354, + "logits/rejected": -0.9294899702072144, + "logps/chosen": -1.7061502933502197, + "logps/rejected": -1.9277353286743164, + "loss": 2.8517, + "nll_loss": 0.6561131477355957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1706150472164154, + "rewards/margins": 0.022158479318022728, + "rewards/rejected": -0.1927735060453415, + "step": 217 + }, + { + "epoch": 0.5748187211601846, + "grad_norm": 9.67136287689209, + "learning_rate": 6.523513753327418e-06, + "log_odds_chosen": 0.5332562923431396, + "log_odds_ratio": -0.468158483505249, + "logits/chosen": -1.0288969278335571, + "logits/rejected": -0.9056495428085327, + "logps/chosen": -1.6826705932617188, + "logps/rejected": -2.130521774291992, + "loss": 2.7361, + "nll_loss": 0.6372010707855225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16826705634593964, + "rewards/margins": 0.04478512704372406, + "rewards/rejected": -0.2130521833896637, + "step": 218 + }, + { + "epoch": 0.5774555042847725, + "grad_norm": 9.922078132629395, + "learning_rate": 6.516415261756877e-06, + "log_odds_chosen": 0.2611735165119171, + "log_odds_ratio": -0.5755319595336914, + "logits/chosen": -1.0483546257019043, + "logits/rejected": -0.9757335186004639, + "logps/chosen": -1.6353774070739746, + "logps/rejected": -1.8514364957809448, + "loss": 2.7073, + "nll_loss": 0.6192625164985657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16353774070739746, + "rewards/margins": 0.02160591259598732, + "rewards/rejected": -0.18514364957809448, + "step": 219 + }, + { + "epoch": 0.5800922874093606, + "grad_norm": 10.130531311035156, + "learning_rate": 6.509316770186335e-06, + "log_odds_chosen": 0.4569411873817444, + "log_odds_ratio": -0.5014762878417969, + "logits/chosen": -1.0507677793502808, + "logits/rejected": -1.014943242073059, + "logps/chosen": -1.3462841510772705, + "logps/rejected": -1.6764225959777832, + "loss": 2.678, + "nll_loss": 0.6193474531173706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13462841510772705, + "rewards/margins": 0.03301384299993515, + "rewards/rejected": -0.1676422655582428, + "step": 220 + }, + { + "epoch": 0.5827290705339486, + "grad_norm": 9.818591117858887, + "learning_rate": 6.5022182786157944e-06, + "log_odds_chosen": 0.33437085151672363, + "log_odds_ratio": -0.5524057149887085, + "logits/chosen": -1.0554800033569336, + "logits/rejected": -0.9858191609382629, + "logps/chosen": -1.5950689315795898, + "logps/rejected": -1.8567230701446533, + "loss": 3.0333, + "nll_loss": 0.7030817270278931, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1595069169998169, + "rewards/margins": 0.026165399700403214, + "rewards/rejected": -0.1856723129749298, + "step": 221 + }, + { + "epoch": 0.5853658536585366, + "grad_norm": 9.592183113098145, + "learning_rate": 6.4951197870452525e-06, + "log_odds_chosen": 0.3747381865978241, + "log_odds_ratio": -0.5308906435966492, + "logits/chosen": -1.023972988128662, + "logits/rejected": -0.9763661623001099, + "logps/chosen": -1.470754861831665, + "logps/rejected": -1.762098789215088, + "loss": 2.3964, + "nll_loss": 0.5460025072097778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14707550406455994, + "rewards/margins": 0.02913437783718109, + "rewards/rejected": -0.17620986700057983, + "step": 222 + }, + { + "epoch": 0.5880026367831246, + "grad_norm": 10.06408977508545, + "learning_rate": 6.4880212954747105e-06, + "log_odds_chosen": 0.1953536868095398, + "log_odds_ratio": -0.6110211610794067, + "logits/chosen": -0.9953948259353638, + "logits/rejected": -0.9426294565200806, + "logps/chosen": -1.5649466514587402, + "logps/rejected": -1.721879243850708, + "loss": 2.5598, + "nll_loss": 0.5788469910621643, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15649467706680298, + "rewards/margins": 0.015693247318267822, + "rewards/rejected": -0.1721879243850708, + "step": 223 + }, + { + "epoch": 0.5906394199077126, + "grad_norm": 10.529962539672852, + "learning_rate": 6.48092280390417e-06, + "log_odds_chosen": 0.22896939516067505, + "log_odds_ratio": -0.5931162238121033, + "logits/chosen": -0.9920583963394165, + "logits/rejected": -0.9440720677375793, + "logps/chosen": -1.6963231563568115, + "logps/rejected": -1.8891266584396362, + "loss": 2.6771, + "nll_loss": 0.6099545955657959, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16963233053684235, + "rewards/margins": 0.019280334934592247, + "rewards/rejected": -0.18891265988349915, + "step": 224 + }, + { + "epoch": 0.5932762030323006, + "grad_norm": 9.350930213928223, + "learning_rate": 6.473824312333628e-06, + "log_odds_chosen": 0.46141666173934937, + "log_odds_ratio": -0.4986426830291748, + "logits/chosen": -0.9788612127304077, + "logits/rejected": -0.9355888962745667, + "logps/chosen": -1.3900244235992432, + "logps/rejected": -1.739728569984436, + "loss": 2.1033, + "nll_loss": 0.47596830129623413, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13900244235992432, + "rewards/margins": 0.034970417618751526, + "rewards/rejected": -0.17397285997867584, + "step": 225 + }, + { + "epoch": 0.5959129861568886, + "grad_norm": 9.999725341796875, + "learning_rate": 6.466725820763088e-06, + "log_odds_chosen": 0.4868529438972473, + "log_odds_ratio": -0.48662108182907104, + "logits/chosen": -1.0361462831497192, + "logits/rejected": -0.9489240646362305, + "logps/chosen": -1.4281866550445557, + "logps/rejected": -1.810755968093872, + "loss": 2.7363, + "nll_loss": 0.6354085206985474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1428186595439911, + "rewards/margins": 0.0382569283246994, + "rewards/rejected": -0.1810755878686905, + "step": 226 + }, + { + "epoch": 0.5985497692814766, + "grad_norm": 9.958191871643066, + "learning_rate": 6.459627329192546e-06, + "log_odds_chosen": 0.18820539116859436, + "log_odds_ratio": -0.6056598424911499, + "logits/chosen": -1.0441505908966064, + "logits/rejected": -0.9917400479316711, + "logps/chosen": -1.5679686069488525, + "logps/rejected": -1.7161450386047363, + "loss": 2.7614, + "nll_loss": 0.6297923922538757, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1567968726158142, + "rewards/margins": 0.014817635528743267, + "rewards/rejected": -0.17161451280117035, + "step": 227 + }, + { + "epoch": 0.6011865524060646, + "grad_norm": 10.128595352172852, + "learning_rate": 6.452528837622005e-06, + "log_odds_chosen": 0.2932848036289215, + "log_odds_ratio": -0.5645895004272461, + "logits/chosen": -0.9858654141426086, + "logits/rejected": -0.9277141094207764, + "logps/chosen": -1.513630747795105, + "logps/rejected": -1.7531180381774902, + "loss": 2.8007, + "nll_loss": 0.6437171697616577, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1513630747795105, + "rewards/margins": 0.02394874021410942, + "rewards/rejected": -0.17531180381774902, + "step": 228 + }, + { + "epoch": 0.6038233355306526, + "grad_norm": 9.5545015335083, + "learning_rate": 6.445430346051464e-06, + "log_odds_chosen": 0.6906859874725342, + "log_odds_ratio": -0.42688441276550293, + "logits/chosen": -0.949820876121521, + "logits/rejected": -0.9070621728897095, + "logps/chosen": -1.4062092304229736, + "logps/rejected": -1.950735092163086, + "loss": 1.7927, + "nll_loss": 0.40549033880233765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14062091708183289, + "rewards/margins": 0.054452601820230484, + "rewards/rejected": -0.19507351517677307, + "step": 229 + }, + { + "epoch": 0.6064601186552406, + "grad_norm": 10.831774711608887, + "learning_rate": 6.438331854480923e-06, + "log_odds_chosen": 0.3960343301296234, + "log_odds_ratio": -0.5205682516098022, + "logits/chosen": -1.0768241882324219, + "logits/rejected": -0.9421679973602295, + "logps/chosen": -1.5416074991226196, + "logps/rejected": -1.8671907186508179, + "loss": 3.1623, + "nll_loss": 0.7385120391845703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1541607528924942, + "rewards/margins": 0.032558321952819824, + "rewards/rejected": -0.18671908974647522, + "step": 230 + }, + { + "epoch": 0.6090969017798286, + "grad_norm": 9.034571647644043, + "learning_rate": 6.431233362910381e-06, + "log_odds_chosen": 0.564643383026123, + "log_odds_ratio": -0.46083638072013855, + "logits/chosen": -0.9658318758010864, + "logits/rejected": -0.9441201090812683, + "logps/chosen": -1.2823760509490967, + "logps/rejected": -1.703157901763916, + "loss": 1.6643, + "nll_loss": 0.36999040842056274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12823760509490967, + "rewards/margins": 0.04207818582653999, + "rewards/rejected": -0.17031580209732056, + "step": 231 + }, + { + "epoch": 0.6117336849044166, + "grad_norm": 10.05081558227539, + "learning_rate": 6.42413487133984e-06, + "log_odds_chosen": 0.3165929913520813, + "log_odds_ratio": -0.5530787110328674, + "logits/chosen": -1.0751591920852661, + "logits/rejected": -0.9716577529907227, + "logps/chosen": -1.5636658668518066, + "logps/rejected": -1.8268802165985107, + "loss": 2.8134, + "nll_loss": 0.6480363607406616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15636658668518066, + "rewards/margins": 0.026321424171328545, + "rewards/rejected": -0.18268801271915436, + "step": 232 + }, + { + "epoch": 0.6143704680290046, + "grad_norm": 10.55700969696045, + "learning_rate": 6.417036379769299e-06, + "log_odds_chosen": 0.21344926953315735, + "log_odds_ratio": -0.6002848148345947, + "logits/chosen": -1.0792782306671143, + "logits/rejected": -1.0400300025939941, + "logps/chosen": -1.662772297859192, + "logps/rejected": -1.8279900550842285, + "loss": 2.9567, + "nll_loss": 0.6791369318962097, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1662772297859192, + "rewards/margins": 0.01652177795767784, + "rewards/rejected": -0.18279901146888733, + "step": 233 + }, + { + "epoch": 0.6170072511535926, + "grad_norm": 9.214619636535645, + "learning_rate": 6.409937888198757e-06, + "log_odds_chosen": 0.22965389490127563, + "log_odds_ratio": -0.6114335060119629, + "logits/chosen": -0.9787470102310181, + "logits/rejected": -0.9347630143165588, + "logps/chosen": -1.5235848426818848, + "logps/rejected": -1.7249484062194824, + "loss": 2.1824, + "nll_loss": 0.48445600271224976, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1523585021495819, + "rewards/margins": 0.020136352628469467, + "rewards/rejected": -0.17249484360218048, + "step": 234 + }, + { + "epoch": 0.6196440342781806, + "grad_norm": 9.567245483398438, + "learning_rate": 6.4028393966282165e-06, + "log_odds_chosen": 0.2276856005191803, + "log_odds_ratio": -0.5981299877166748, + "logits/chosen": -0.9614957571029663, + "logits/rejected": -0.8972662687301636, + "logps/chosen": -1.5509299039840698, + "logps/rejected": -1.7373332977294922, + "loss": 2.3168, + "nll_loss": 0.5193837285041809, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1550929844379425, + "rewards/margins": 0.01864035427570343, + "rewards/rejected": -0.17373332381248474, + "step": 235 + }, + { + "epoch": 0.6222808174027686, + "grad_norm": 9.45780086517334, + "learning_rate": 6.3957409050576745e-06, + "log_odds_chosen": 0.5684501528739929, + "log_odds_ratio": -0.45974200963974, + "logits/chosen": -0.9882232546806335, + "logits/rejected": -0.9146469831466675, + "logps/chosen": -1.3238399028778076, + "logps/rejected": -1.7685880661010742, + "loss": 2.0164, + "nll_loss": 0.4581286311149597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13238400220870972, + "rewards/margins": 0.044474828988313675, + "rewards/rejected": -0.1768588274717331, + "step": 236 + }, + { + "epoch": 0.6249176005273567, + "grad_norm": 9.68630599975586, + "learning_rate": 6.388642413487134e-06, + "log_odds_chosen": 0.2962833344936371, + "log_odds_ratio": -0.563479483127594, + "logits/chosen": -1.0296807289123535, + "logits/rejected": -0.970088541507721, + "logps/chosen": -1.3749408721923828, + "logps/rejected": -1.5976872444152832, + "loss": 2.4518, + "nll_loss": 0.5565969944000244, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13749408721923828, + "rewards/margins": 0.02227463759481907, + "rewards/rejected": -0.1597687155008316, + "step": 237 + }, + { + "epoch": 0.6275543836519446, + "grad_norm": 10.236848831176758, + "learning_rate": 6.381543921916592e-06, + "log_odds_chosen": 0.24890679121017456, + "log_odds_ratio": -0.5857919454574585, + "logits/chosen": -1.0486788749694824, + "logits/rejected": -0.9479892253875732, + "logps/chosen": -1.5664364099502563, + "logps/rejected": -1.764575481414795, + "loss": 2.699, + "nll_loss": 0.6161803007125854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15664362907409668, + "rewards/margins": 0.019813913851976395, + "rewards/rejected": -0.17645753920078278, + "step": 238 + }, + { + "epoch": 0.6301911667765326, + "grad_norm": 9.747345924377441, + "learning_rate": 6.374445430346051e-06, + "log_odds_chosen": 0.22345906496047974, + "log_odds_ratio": -0.5982892513275146, + "logits/chosen": -1.0117814540863037, + "logits/rejected": -0.9376793503761292, + "logps/chosen": -1.4230129718780518, + "logps/rejected": -1.586336612701416, + "loss": 2.3352, + "nll_loss": 0.5239756107330322, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1423013061285019, + "rewards/margins": 0.01633235067129135, + "rewards/rejected": -0.15863364934921265, + "step": 239 + }, + { + "epoch": 0.6328279499011207, + "grad_norm": 9.946846008300781, + "learning_rate": 6.36734693877551e-06, + "log_odds_chosen": 0.48092615604400635, + "log_odds_ratio": -0.5143439769744873, + "logits/chosen": -1.0396180152893066, + "logits/rejected": -0.9259074926376343, + "logps/chosen": -1.461484670639038, + "logps/rejected": -1.854004979133606, + "loss": 2.3684, + "nll_loss": 0.5406550765037537, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1461484730243683, + "rewards/margins": 0.039252012968063354, + "rewards/rejected": -0.18540048599243164, + "step": 240 + }, + { + "epoch": 0.6354647330257086, + "grad_norm": 8.788450241088867, + "learning_rate": 6.360248447204968e-06, + "log_odds_chosen": 0.47860464453697205, + "log_odds_ratio": -0.4993082582950592, + "logits/chosen": -0.9827038645744324, + "logits/rejected": -0.946448028087616, + "logps/chosen": -1.3302018642425537, + "logps/rejected": -1.6790567636489868, + "loss": 1.8064, + "nll_loss": 0.4016784429550171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13302019238471985, + "rewards/margins": 0.03488549590110779, + "rewards/rejected": -0.16790568828582764, + "step": 241 + }, + { + "epoch": 0.6381015161502966, + "grad_norm": 9.804183006286621, + "learning_rate": 6.353149955634427e-06, + "log_odds_chosen": 0.39079076051712036, + "log_odds_ratio": -0.5262644290924072, + "logits/chosen": -1.0029771327972412, + "logits/rejected": -0.911673367023468, + "logps/chosen": -1.5539920330047607, + "logps/rejected": -1.8723018169403076, + "loss": 2.3257, + "nll_loss": 0.5287995338439941, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15539920330047607, + "rewards/margins": 0.03183097392320633, + "rewards/rejected": -0.187230184674263, + "step": 242 + }, + { + "epoch": 0.6407382992748847, + "grad_norm": 9.535198211669922, + "learning_rate": 6.346051464063886e-06, + "log_odds_chosen": 0.37506628036499023, + "log_odds_ratio": -0.5474951863288879, + "logits/chosen": -1.0286223888397217, + "logits/rejected": -0.9575079083442688, + "logps/chosen": -1.509547472000122, + "logps/rejected": -1.8135558366775513, + "loss": 2.2811, + "nll_loss": 0.5155328512191772, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15095475316047668, + "rewards/margins": 0.030400821939110756, + "rewards/rejected": -0.1813555806875229, + "step": 243 + }, + { + "epoch": 0.6433750823994726, + "grad_norm": 9.990318298339844, + "learning_rate": 6.338952972493345e-06, + "log_odds_chosen": 0.4546193778514862, + "log_odds_ratio": -0.5123645663261414, + "logits/chosen": -0.9370576739311218, + "logits/rejected": -0.9146692752838135, + "logps/chosen": -1.3397631645202637, + "logps/rejected": -1.688523530960083, + "loss": 2.5396, + "nll_loss": 0.5836517214775085, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1339763104915619, + "rewards/margins": 0.03487604111433029, + "rewards/rejected": -0.16885235905647278, + "step": 244 + }, + { + "epoch": 0.6460118655240606, + "grad_norm": 9.547015190124512, + "learning_rate": 6.331854480922804e-06, + "log_odds_chosen": 0.1481492817401886, + "log_odds_ratio": -0.6277358531951904, + "logits/chosen": -1.0131888389587402, + "logits/rejected": -0.9746987819671631, + "logps/chosen": -1.396069884300232, + "logps/rejected": -1.5164241790771484, + "loss": 2.0986, + "nll_loss": 0.46188369393348694, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13960698246955872, + "rewards/margins": 0.012035440653562546, + "rewards/rejected": -0.15164242684841156, + "step": 245 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 10.266288757324219, + "learning_rate": 6.324755989352263e-06, + "log_odds_chosen": 0.3449682295322418, + "log_odds_ratio": -0.5422677993774414, + "logits/chosen": -0.9396032094955444, + "logits/rejected": -0.8918734192848206, + "logps/chosen": -1.4369215965270996, + "logps/rejected": -1.6943345069885254, + "loss": 2.6221, + "nll_loss": 0.6013014912605286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14369216561317444, + "rewards/margins": 0.025741294026374817, + "rewards/rejected": -0.16943347454071045, + "step": 246 + }, + { + "epoch": 0.6512854317732366, + "grad_norm": 10.216219902038574, + "learning_rate": 6.317657497781721e-06, + "log_odds_chosen": 0.13044913113117218, + "log_odds_ratio": -0.6382143497467041, + "logits/chosen": -1.0274735689163208, + "logits/rejected": -0.9672619104385376, + "logps/chosen": -1.6018891334533691, + "logps/rejected": -1.697109580039978, + "loss": 2.5938, + "nll_loss": 0.5846191644668579, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16018891334533691, + "rewards/margins": 0.009522044099867344, + "rewards/rejected": -0.16971096396446228, + "step": 247 + }, + { + "epoch": 0.6539222148978246, + "grad_norm": 10.080612182617188, + "learning_rate": 6.3105590062111805e-06, + "log_odds_chosen": 0.3156033754348755, + "log_odds_ratio": -0.5620311498641968, + "logits/chosen": -0.9943605661392212, + "logits/rejected": -0.913483202457428, + "logps/chosen": -1.4455002546310425, + "logps/rejected": -1.6898422241210938, + "loss": 2.3417, + "nll_loss": 0.5292194485664368, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14455002546310425, + "rewards/margins": 0.024434205144643784, + "rewards/rejected": -0.16898421943187714, + "step": 248 + }, + { + "epoch": 0.6565589980224127, + "grad_norm": 10.621593475341797, + "learning_rate": 6.3034605146406385e-06, + "log_odds_chosen": 0.3722479045391083, + "log_odds_ratio": -0.5260739326477051, + "logits/chosen": -1.010783076286316, + "logits/rejected": -0.9419607520103455, + "logps/chosen": -1.393200159072876, + "logps/rejected": -1.6805057525634766, + "loss": 2.4488, + "nll_loss": 0.5595893263816833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1393200308084488, + "rewards/margins": 0.02873056009411812, + "rewards/rejected": -0.1680505871772766, + "step": 249 + }, + { + "epoch": 0.6591957811470006, + "grad_norm": 10.194984436035156, + "learning_rate": 6.296362023070097e-06, + "log_odds_chosen": 0.2856564223766327, + "log_odds_ratio": -0.5765359997749329, + "logits/chosen": -1.040339708328247, + "logits/rejected": -0.9896783828735352, + "logps/chosen": -1.6168367862701416, + "logps/rejected": -1.860325813293457, + "loss": 2.5298, + "nll_loss": 0.5747956037521362, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16168367862701416, + "rewards/margins": 0.024348901584744453, + "rewards/rejected": -0.18603257834911346, + "step": 250 + }, + { + "epoch": 0.6618325642715887, + "grad_norm": 10.166160583496094, + "learning_rate": 6.289263531499556e-06, + "log_odds_chosen": 0.49128079414367676, + "log_odds_ratio": -0.5052339434623718, + "logits/chosen": -1.0190470218658447, + "logits/rejected": -0.9464223384857178, + "logps/chosen": -1.583850383758545, + "logps/rejected": -1.9695919752120972, + "loss": 3.0117, + "nll_loss": 0.7024120092391968, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1583850383758545, + "rewards/margins": 0.038574155420064926, + "rewards/rejected": -0.1969592124223709, + "step": 251 + }, + { + "epoch": 0.6644693473961767, + "grad_norm": 9.572004318237305, + "learning_rate": 6.282165039929014e-06, + "log_odds_chosen": 0.6220227479934692, + "log_odds_ratio": -0.44183260202407837, + "logits/chosen": -1.118480920791626, + "logits/rejected": -0.9814411401748657, + "logps/chosen": -1.4758163690567017, + "logps/rejected": -1.9799898862838745, + "loss": 2.901, + "nll_loss": 0.6810585856437683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14758163690567017, + "rewards/margins": 0.05041735619306564, + "rewards/rejected": -0.1979989856481552, + "step": 252 + }, + { + "epoch": 0.6671061305207646, + "grad_norm": 10.045660972595215, + "learning_rate": 6.275066548358473e-06, + "log_odds_chosen": 0.2900097966194153, + "log_odds_ratio": -0.5657051801681519, + "logits/chosen": -1.0574668645858765, + "logits/rejected": -0.9897823333740234, + "logps/chosen": -1.668576955795288, + "logps/rejected": -1.9087705612182617, + "loss": 2.8323, + "nll_loss": 0.6514928936958313, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16685768961906433, + "rewards/margins": 0.024019362404942513, + "rewards/rejected": -0.1908770501613617, + "step": 253 + }, + { + "epoch": 0.6697429136453527, + "grad_norm": 9.784111976623535, + "learning_rate": 6.267968056787932e-06, + "log_odds_chosen": 0.21802666783332825, + "log_odds_ratio": -0.5974329113960266, + "logits/chosen": -1.0291823148727417, + "logits/rejected": -0.9502480030059814, + "logps/chosen": -1.4417308568954468, + "logps/rejected": -1.6095776557922363, + "loss": 2.3837, + "nll_loss": 0.536192774772644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14417308568954468, + "rewards/margins": 0.016784675419330597, + "rewards/rejected": -0.16095776855945587, + "step": 254 + }, + { + "epoch": 0.6723796967699407, + "grad_norm": 10.003222465515137, + "learning_rate": 6.260869565217391e-06, + "log_odds_chosen": 0.3013702630996704, + "log_odds_ratio": -0.5770978927612305, + "logits/chosen": -0.9175786375999451, + "logits/rejected": -0.8581453561782837, + "logps/chosen": -1.6248061656951904, + "logps/rejected": -1.8886432647705078, + "loss": 2.7559, + "nll_loss": 0.6312604546546936, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16248062252998352, + "rewards/margins": 0.026383716613054276, + "rewards/rejected": -0.1888643503189087, + "step": 255 + }, + { + "epoch": 0.6750164798945286, + "grad_norm": 9.680693626403809, + "learning_rate": 6.25377107364685e-06, + "log_odds_chosen": 0.2826671898365021, + "log_odds_ratio": -0.5702804327011108, + "logits/chosen": -1.057939052581787, + "logits/rejected": -0.9414132833480835, + "logps/chosen": -1.4873517751693726, + "logps/rejected": -1.7252017259597778, + "loss": 2.5016, + "nll_loss": 0.5683744549751282, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1487351655960083, + "rewards/margins": 0.023785004392266273, + "rewards/rejected": -0.17252019047737122, + "step": 256 + }, + { + "epoch": 0.6776532630191167, + "grad_norm": 9.82450008392334, + "learning_rate": 6.246672582076309e-06, + "log_odds_chosen": 0.42896807193756104, + "log_odds_ratio": -0.5112147927284241, + "logits/chosen": -1.066590428352356, + "logits/rejected": -0.9673632383346558, + "logps/chosen": -1.5336809158325195, + "logps/rejected": -1.882852554321289, + "loss": 2.7702, + "nll_loss": 0.641433835029602, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15336810052394867, + "rewards/margins": 0.03491714596748352, + "rewards/rejected": -0.1882852464914322, + "step": 257 + }, + { + "epoch": 0.6802900461437047, + "grad_norm": 9.927961349487305, + "learning_rate": 6.239574090505767e-06, + "log_odds_chosen": 0.38350343704223633, + "log_odds_ratio": -0.5303336381912231, + "logits/chosen": -1.0436394214630127, + "logits/rejected": -0.9509331583976746, + "logps/chosen": -1.5962193012237549, + "logps/rejected": -1.9133834838867188, + "loss": 2.694, + "nll_loss": 0.6204739212989807, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.159621924161911, + "rewards/margins": 0.03171642869710922, + "rewards/rejected": -0.19133836030960083, + "step": 258 + }, + { + "epoch": 0.6829268292682927, + "grad_norm": 10.485347747802734, + "learning_rate": 6.232475598935226e-06, + "log_odds_chosen": 0.1634972095489502, + "log_odds_ratio": -0.6173048615455627, + "logits/chosen": -1.057889699935913, + "logits/rejected": -1.0043100118637085, + "logps/chosen": -1.6032731533050537, + "logps/rejected": -1.7344703674316406, + "loss": 3.0032, + "nll_loss": 0.6890586614608765, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16032731533050537, + "rewards/margins": 0.01311972551047802, + "rewards/rejected": -0.17344704270362854, + "step": 259 + }, + { + "epoch": 0.6855636123928807, + "grad_norm": 9.942307472229004, + "learning_rate": 6.225377107364685e-06, + "log_odds_chosen": 0.45469415187835693, + "log_odds_ratio": -0.4982328414916992, + "logits/chosen": -1.0273206233978271, + "logits/rejected": -0.9484516978263855, + "logps/chosen": -1.450758934020996, + "logps/rejected": -1.815749168395996, + "loss": 2.3167, + "nll_loss": 0.5293515920639038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14507588744163513, + "rewards/margins": 0.0364990308880806, + "rewards/rejected": -0.18157494068145752, + "step": 260 + }, + { + "epoch": 0.6882003955174687, + "grad_norm": 10.553006172180176, + "learning_rate": 6.218278615794143e-06, + "log_odds_chosen": 0.3766656517982483, + "log_odds_ratio": -0.5311521887779236, + "logits/chosen": -1.0044142007827759, + "logits/rejected": -0.9288753867149353, + "logps/chosen": -1.6029090881347656, + "logps/rejected": -1.9176501035690308, + "loss": 2.6651, + "nll_loss": 0.6131627559661865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1602909117937088, + "rewards/margins": 0.03147410601377487, + "rewards/rejected": -0.19176501035690308, + "step": 261 + }, + { + "epoch": 0.6908371786420567, + "grad_norm": 10.289958000183105, + "learning_rate": 6.2111801242236025e-06, + "log_odds_chosen": 0.37050265073776245, + "log_odds_ratio": -0.5371365547180176, + "logits/chosen": -1.044307827949524, + "logits/rejected": -0.9999052286148071, + "logps/chosen": -1.5295069217681885, + "logps/rejected": -1.8322433233261108, + "loss": 2.8767, + "nll_loss": 0.6654726266860962, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1529507040977478, + "rewards/margins": 0.030273636803030968, + "rewards/rejected": -0.18322433531284332, + "step": 262 + }, + { + "epoch": 0.6934739617666447, + "grad_norm": 10.079249382019043, + "learning_rate": 6.204081632653061e-06, + "log_odds_chosen": 0.35678648948669434, + "log_odds_ratio": -0.5365309715270996, + "logits/chosen": -1.0440478324890137, + "logits/rejected": -0.9332611560821533, + "logps/chosen": -1.5165398120880127, + "logps/rejected": -1.7985224723815918, + "loss": 2.8466, + "nll_loss": 0.658006489276886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1516539752483368, + "rewards/margins": 0.028198271989822388, + "rewards/rejected": -0.17985224723815918, + "step": 263 + }, + { + "epoch": 0.6961107448912327, + "grad_norm": 10.370190620422363, + "learning_rate": 6.19698314108252e-06, + "log_odds_chosen": 0.31438887119293213, + "log_odds_ratio": -0.5823736786842346, + "logits/chosen": -1.0256203413009644, + "logits/rejected": -0.9438142776489258, + "logps/chosen": -1.4517719745635986, + "logps/rejected": -1.6985116004943848, + "loss": 2.22, + "nll_loss": 0.49675092101097107, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1451771855354309, + "rewards/margins": 0.02467396855354309, + "rewards/rejected": -0.169851154088974, + "step": 264 + }, + { + "epoch": 0.6987475280158207, + "grad_norm": 10.093335151672363, + "learning_rate": 6.189884649511978e-06, + "log_odds_chosen": 0.5108283758163452, + "log_odds_ratio": -0.48044654726982117, + "logits/chosen": -1.038010597229004, + "logits/rejected": -0.9140152931213379, + "logps/chosen": -1.3935471773147583, + "logps/rejected": -1.7870471477508545, + "loss": 2.311, + "nll_loss": 0.5297090411186218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13935470581054688, + "rewards/margins": 0.0393500030040741, + "rewards/rejected": -0.17870470881462097, + "step": 265 + }, + { + "epoch": 0.7013843111404087, + "grad_norm": 9.891541481018066, + "learning_rate": 6.182786157941437e-06, + "log_odds_chosen": 0.33088430762290955, + "log_odds_ratio": -0.5532064437866211, + "logits/chosen": -1.065554141998291, + "logits/rejected": -1.032827377319336, + "logps/chosen": -1.3114806413650513, + "logps/rejected": -1.5359127521514893, + "loss": 2.3022, + "nll_loss": 0.520240306854248, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1311480700969696, + "rewards/margins": 0.022443203255534172, + "rewards/rejected": -0.15359127521514893, + "step": 266 + }, + { + "epoch": 0.7040210942649967, + "grad_norm": 9.849285125732422, + "learning_rate": 6.175687666370896e-06, + "log_odds_chosen": 0.542593240737915, + "log_odds_ratio": -0.4701288342475891, + "logits/chosen": -1.037049412727356, + "logits/rejected": -0.9226208925247192, + "logps/chosen": -1.423630714416504, + "logps/rejected": -1.8576700687408447, + "loss": 2.2385, + "nll_loss": 0.5126224160194397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1423630714416504, + "rewards/margins": 0.043403930962085724, + "rewards/rejected": -0.1857670247554779, + "step": 267 + }, + { + "epoch": 0.7066578773895847, + "grad_norm": 10.11159610748291, + "learning_rate": 6.168589174800354e-06, + "log_odds_chosen": 0.3737114667892456, + "log_odds_ratio": -0.5347133278846741, + "logits/chosen": -1.0566768646240234, + "logits/rejected": -0.9615504741668701, + "logps/chosen": -1.5327125787734985, + "logps/rejected": -1.8281084299087524, + "loss": 2.7147, + "nll_loss": 0.6252114772796631, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15327127277851105, + "rewards/margins": 0.029539581388235092, + "rewards/rejected": -0.18281084299087524, + "step": 268 + }, + { + "epoch": 0.7092946605141727, + "grad_norm": 9.563637733459473, + "learning_rate": 6.161490683229813e-06, + "log_odds_chosen": 0.2552526295185089, + "log_odds_ratio": -0.5950664281845093, + "logits/chosen": -1.021816611289978, + "logits/rejected": -0.9392303824424744, + "logps/chosen": -1.48829984664917, + "logps/rejected": -1.7040952444076538, + "loss": 2.3807, + "nll_loss": 0.5356656908988953, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14882998168468475, + "rewards/margins": 0.021579559892416, + "rewards/rejected": -0.17040953040122986, + "step": 269 + }, + { + "epoch": 0.7119314436387607, + "grad_norm": 9.498360633850098, + "learning_rate": 6.154392191659272e-06, + "log_odds_chosen": 0.41562414169311523, + "log_odds_ratio": -0.5205641984939575, + "logits/chosen": -0.9885743856430054, + "logits/rejected": -0.9419618844985962, + "logps/chosen": -1.4006032943725586, + "logps/rejected": -1.7179901599884033, + "loss": 1.8683, + "nll_loss": 0.4150174856185913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14006033539772034, + "rewards/margins": 0.031738679856061935, + "rewards/rejected": -0.17179900407791138, + "step": 270 + }, + { + "epoch": 0.7145682267633487, + "grad_norm": 9.569456100463867, + "learning_rate": 6.147293700088731e-06, + "log_odds_chosen": 0.48436087369918823, + "log_odds_ratio": -0.49208369851112366, + "logits/chosen": -1.0386489629745483, + "logits/rejected": -0.9447503089904785, + "logps/chosen": -1.457115650177002, + "logps/rejected": -1.8508636951446533, + "loss": 2.3597, + "nll_loss": 0.5407109260559082, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14571155607700348, + "rewards/margins": 0.03937479853630066, + "rewards/rejected": -0.18508636951446533, + "step": 271 + }, + { + "epoch": 0.7172050098879367, + "grad_norm": 11.085938453674316, + "learning_rate": 6.140195208518189e-06, + "log_odds_chosen": 0.44340041279792786, + "log_odds_ratio": -0.5080432891845703, + "logits/chosen": -1.1370983123779297, + "logits/rejected": -1.0340384244918823, + "logps/chosen": -1.6304019689559937, + "logps/rejected": -1.9962579011917114, + "loss": 3.1557, + "nll_loss": 0.7381245493888855, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1630401909351349, + "rewards/margins": 0.036585595458745956, + "rewards/rejected": -0.19962579011917114, + "step": 272 + }, + { + "epoch": 0.7198417930125247, + "grad_norm": 10.364092826843262, + "learning_rate": 6.133096716947649e-06, + "log_odds_chosen": 0.23562709987163544, + "log_odds_ratio": -0.5894039869308472, + "logits/chosen": -1.0973641872406006, + "logits/rejected": -1.036959171295166, + "logps/chosen": -1.6053028106689453, + "logps/rejected": -1.7993088960647583, + "loss": 2.8391, + "nll_loss": 0.650845468044281, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16053026914596558, + "rewards/margins": 0.019400635734200478, + "rewards/rejected": -0.1799308955669403, + "step": 273 + }, + { + "epoch": 0.7224785761371127, + "grad_norm": 9.403023719787598, + "learning_rate": 6.125998225377107e-06, + "log_odds_chosen": 0.5421465635299683, + "log_odds_ratio": -0.47444844245910645, + "logits/chosen": -1.0056291818618774, + "logits/rejected": -0.9600179195404053, + "logps/chosen": -1.420364260673523, + "logps/rejected": -1.8501088619232178, + "loss": 2.0598, + "nll_loss": 0.467495322227478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14203643798828125, + "rewards/margins": 0.04297446459531784, + "rewards/rejected": -0.1850108951330185, + "step": 274 + }, + { + "epoch": 0.7251153592617007, + "grad_norm": 10.056530952453613, + "learning_rate": 6.1188997338065665e-06, + "log_odds_chosen": 0.6246941089630127, + "log_odds_ratio": -0.47945964336395264, + "logits/chosen": -1.0567095279693604, + "logits/rejected": -0.9212784767150879, + "logps/chosen": -1.582176685333252, + "logps/rejected": -2.1189141273498535, + "loss": 2.7229, + "nll_loss": 0.6327791213989258, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1582176685333252, + "rewards/margins": 0.05367375165224075, + "rewards/rejected": -0.21189141273498535, + "step": 275 + }, + { + "epoch": 0.7277521423862887, + "grad_norm": 10.36487102508545, + "learning_rate": 6.111801242236025e-06, + "log_odds_chosen": 0.3678748905658722, + "log_odds_ratio": -0.5407025814056396, + "logits/chosen": -1.0414223670959473, + "logits/rejected": -0.9859704375267029, + "logps/chosen": -1.5252094268798828, + "logps/rejected": -1.8135719299316406, + "loss": 2.6263, + "nll_loss": 0.602510929107666, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15252093970775604, + "rewards/margins": 0.028836257755756378, + "rewards/rejected": -0.18135720491409302, + "step": 276 + }, + { + "epoch": 0.7303889255108768, + "grad_norm": 9.549089431762695, + "learning_rate": 6.104702750665483e-06, + "log_odds_chosen": 0.22213289141654968, + "log_odds_ratio": -0.5968341827392578, + "logits/chosen": -1.0602388381958008, + "logits/rejected": -0.9919387698173523, + "logps/chosen": -1.4902098178863525, + "logps/rejected": -1.6675565242767334, + "loss": 2.3944, + "nll_loss": 0.5389155745506287, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1490209847688675, + "rewards/margins": 0.017734672874212265, + "rewards/rejected": -0.16675564646720886, + "step": 277 + }, + { + "epoch": 0.7330257086354647, + "grad_norm": 9.606721878051758, + "learning_rate": 6.097604259094942e-06, + "log_odds_chosen": 0.2623395323753357, + "log_odds_ratio": -0.5808668732643127, + "logits/chosen": -1.0555493831634521, + "logits/rejected": -0.9892610907554626, + "logps/chosen": -1.4999754428863525, + "logps/rejected": -1.7185721397399902, + "loss": 2.4409, + "nll_loss": 0.5521459579467773, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1499975323677063, + "rewards/margins": 0.02185966446995735, + "rewards/rejected": -0.17185720801353455, + "step": 278 + }, + { + "epoch": 0.7356624917600527, + "grad_norm": 10.382844924926758, + "learning_rate": 6.0905057675244005e-06, + "log_odds_chosen": 0.2454441487789154, + "log_odds_ratio": -0.5859407186508179, + "logits/chosen": -0.9638535976409912, + "logits/rejected": -0.9263155460357666, + "logps/chosen": -1.5392746925354004, + "logps/rejected": -1.7312613725662231, + "loss": 2.809, + "nll_loss": 0.6436530947685242, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15392747521400452, + "rewards/margins": 0.019198667258024216, + "rewards/rejected": -0.17312613129615784, + "step": 279 + }, + { + "epoch": 0.7382992748846408, + "grad_norm": 9.472749710083008, + "learning_rate": 6.083407275953859e-06, + "log_odds_chosen": 0.05665683373808861, + "log_odds_ratio": -0.670945405960083, + "logits/chosen": -1.0366100072860718, + "logits/rejected": -0.9746451377868652, + "logps/chosen": -1.4893945455551147, + "logps/rejected": -1.5296626091003418, + "loss": 2.1057, + "nll_loss": 0.4593251943588257, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14893946051597595, + "rewards/margins": 0.004026808775961399, + "rewards/rejected": -0.15296626091003418, + "step": 280 + }, + { + "epoch": 0.7409360580092288, + "grad_norm": 9.456088066101074, + "learning_rate": 6.076308784383318e-06, + "log_odds_chosen": 0.27157944440841675, + "log_odds_ratio": -0.5769294500350952, + "logits/chosen": -0.9719491600990295, + "logits/rejected": -0.9362573623657227, + "logps/chosen": -1.4628607034683228, + "logps/rejected": -1.685976266860962, + "loss": 1.9465, + "nll_loss": 0.4289361238479614, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14628607034683228, + "rewards/margins": 0.02231154590845108, + "rewards/rejected": -0.16859760880470276, + "step": 281 + }, + { + "epoch": 0.7435728411338167, + "grad_norm": 10.356871604919434, + "learning_rate": 6.069210292812777e-06, + "log_odds_chosen": 0.4430519938468933, + "log_odds_ratio": -0.5062916278839111, + "logits/chosen": -1.0771749019622803, + "logits/rejected": -0.9861480593681335, + "logps/chosen": -1.5304234027862549, + "logps/rejected": -1.8878121376037598, + "loss": 2.872, + "nll_loss": 0.6673677563667297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15304234623908997, + "rewards/margins": 0.03573887050151825, + "rewards/rejected": -0.18878121674060822, + "step": 282 + }, + { + "epoch": 0.7462096242584048, + "grad_norm": 10.179832458496094, + "learning_rate": 6.062111801242236e-06, + "log_odds_chosen": 0.11679290235042572, + "log_odds_ratio": -0.6467869281768799, + "logits/chosen": -0.9838048219680786, + "logits/rejected": -0.9357846975326538, + "logps/chosen": -1.581260323524475, + "logps/rejected": -1.6788735389709473, + "loss": 2.7507, + "nll_loss": 0.6229973435401917, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15812602639198303, + "rewards/margins": 0.009761332534253597, + "rewards/rejected": -0.1678873598575592, + "step": 283 + }, + { + "epoch": 0.7488464073829928, + "grad_norm": 9.709866523742676, + "learning_rate": 6.055013309671695e-06, + "log_odds_chosen": 0.1251356452703476, + "log_odds_ratio": -0.6371869444847107, + "logits/chosen": -0.9856249094009399, + "logits/rejected": -0.9588727951049805, + "logps/chosen": -1.3696320056915283, + "logps/rejected": -1.4571166038513184, + "loss": 2.046, + "nll_loss": 0.4477924108505249, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13696320354938507, + "rewards/margins": 0.008748448453843594, + "rewards/rejected": -0.14571166038513184, + "step": 284 + }, + { + "epoch": 0.7514831905075807, + "grad_norm": 9.576770782470703, + "learning_rate": 6.047914818101153e-06, + "log_odds_chosen": 0.8595589399337769, + "log_odds_ratio": -0.4332429766654968, + "logits/chosen": -0.989209771156311, + "logits/rejected": -0.9225939512252808, + "logps/chosen": -1.6342555284500122, + "logps/rejected": -2.388579845428467, + "loss": 2.1112, + "nll_loss": 0.4844658672809601, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16342556476593018, + "rewards/margins": 0.0754324272274971, + "rewards/rejected": -0.23885798454284668, + "step": 285 + }, + { + "epoch": 0.7541199736321688, + "grad_norm": 9.967181205749512, + "learning_rate": 6.040816326530612e-06, + "log_odds_chosen": 0.5282140374183655, + "log_odds_ratio": -0.4664245843887329, + "logits/chosen": -1.0063544511795044, + "logits/rejected": -0.9135369658470154, + "logps/chosen": -1.426211953163147, + "logps/rejected": -1.8436334133148193, + "loss": 2.6222, + "nll_loss": 0.6088989973068237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14262118935585022, + "rewards/margins": 0.04174215719103813, + "rewards/rejected": -0.18436335027217865, + "step": 286 + }, + { + "epoch": 0.7567567567567568, + "grad_norm": 10.017861366271973, + "learning_rate": 6.033717834960071e-06, + "log_odds_chosen": 0.377551794052124, + "log_odds_ratio": -0.5416433215141296, + "logits/chosen": -1.077212929725647, + "logits/rejected": -0.9847630262374878, + "logps/chosen": -1.4043962955474854, + "logps/rejected": -1.7077497243881226, + "loss": 2.6262, + "nll_loss": 0.6023932695388794, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14043962955474854, + "rewards/margins": 0.030335336923599243, + "rewards/rejected": -0.17077496647834778, + "step": 287 + }, + { + "epoch": 0.7593935398813447, + "grad_norm": 10.703896522521973, + "learning_rate": 6.026619343389529e-06, + "log_odds_chosen": 0.5014585256576538, + "log_odds_ratio": -0.47988784313201904, + "logits/chosen": -1.066920280456543, + "logits/rejected": -0.9490258097648621, + "logps/chosen": -1.6449315547943115, + "logps/rejected": -2.0663325786590576, + "loss": 2.751, + "nll_loss": 0.6397608518600464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16449317336082458, + "rewards/margins": 0.04214010387659073, + "rewards/rejected": -0.20663326978683472, + "step": 288 + }, + { + "epoch": 0.7620303230059328, + "grad_norm": 11.897303581237793, + "learning_rate": 6.019520851818989e-06, + "log_odds_chosen": 0.2389475405216217, + "log_odds_ratio": -0.5903327465057373, + "logits/chosen": -1.0451998710632324, + "logits/rejected": -0.9830878973007202, + "logps/chosen": -2.0414822101593018, + "logps/rejected": -2.2409515380859375, + "loss": 4.2908, + "nll_loss": 1.013670563697815, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20414823293685913, + "rewards/margins": 0.01994692161679268, + "rewards/rejected": -0.2240951508283615, + "step": 289 + }, + { + "epoch": 0.7646671061305208, + "grad_norm": 9.770140647888184, + "learning_rate": 6.012422360248447e-06, + "log_odds_chosen": 0.44657570123672485, + "log_odds_ratio": -0.5040766000747681, + "logits/chosen": -1.0088856220245361, + "logits/rejected": -0.9383779168128967, + "logps/chosen": -1.5648479461669922, + "logps/rejected": -1.9286675453186035, + "loss": 2.4701, + "nll_loss": 0.5671277642250061, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15648479759693146, + "rewards/margins": 0.03638195991516113, + "rewards/rejected": -0.19286677241325378, + "step": 290 + }, + { + "epoch": 0.7673038892551087, + "grad_norm": 10.833564758300781, + "learning_rate": 6.0053238686779056e-06, + "log_odds_chosen": 0.35445818305015564, + "log_odds_ratio": -0.540325939655304, + "logits/chosen": -1.0003407001495361, + "logits/rejected": -0.9062169790267944, + "logps/chosen": -1.5486013889312744, + "logps/rejected": -1.8227717876434326, + "loss": 2.9061, + "nll_loss": 0.6724933981895447, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15486015379428864, + "rewards/margins": 0.02741703763604164, + "rewards/rejected": -0.18227717280387878, + "step": 291 + }, + { + "epoch": 0.7699406723796968, + "grad_norm": 10.445130348205566, + "learning_rate": 5.9982253771073645e-06, + "log_odds_chosen": 0.24582642316818237, + "log_odds_ratio": -0.5868759155273438, + "logits/chosen": -1.0471625328063965, + "logits/rejected": -0.9607016444206238, + "logps/chosen": -1.5718451738357544, + "logps/rejected": -1.7703518867492676, + "loss": 2.7326, + "nll_loss": 0.6244602203369141, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15718451142311096, + "rewards/margins": 0.019850673153996468, + "rewards/rejected": -0.17703518271446228, + "step": 292 + }, + { + "epoch": 0.7725774555042848, + "grad_norm": 9.336409568786621, + "learning_rate": 5.991126885536823e-06, + "log_odds_chosen": 0.28063106536865234, + "log_odds_ratio": -0.5680664777755737, + "logits/chosen": -1.0261191129684448, + "logits/rejected": -0.96542888879776, + "logps/chosen": -1.2532575130462646, + "logps/rejected": -1.4647881984710693, + "loss": 1.8778, + "nll_loss": 0.4126465320587158, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1253257393836975, + "rewards/margins": 0.021153081208467484, + "rewards/rejected": -0.1464788317680359, + "step": 293 + }, + { + "epoch": 0.7752142386288727, + "grad_norm": 10.19467830657959, + "learning_rate": 5.984028393966282e-06, + "log_odds_chosen": 0.3982515335083008, + "log_odds_ratio": -0.5293940305709839, + "logits/chosen": -1.054459810256958, + "logits/rejected": -1.005912184715271, + "logps/chosen": -1.3878642320632935, + "logps/rejected": -1.7107563018798828, + "loss": 2.2757, + "nll_loss": 0.5159916877746582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1387864202260971, + "rewards/margins": 0.032289210706949234, + "rewards/rejected": -0.17107564210891724, + "step": 294 + }, + { + "epoch": 0.7778510217534608, + "grad_norm": 10.44420051574707, + "learning_rate": 5.97692990239574e-06, + "log_odds_chosen": 0.37824469804763794, + "log_odds_ratio": -0.5246709585189819, + "logits/chosen": -1.0574809312820435, + "logits/rejected": -0.9439191222190857, + "logps/chosen": -1.517193078994751, + "logps/rejected": -1.8170679807662964, + "loss": 2.7871, + "nll_loss": 0.6443036198616028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15171930193901062, + "rewards/margins": 0.029987502843141556, + "rewards/rejected": -0.18170681595802307, + "step": 295 + }, + { + "epoch": 0.7804878048780488, + "grad_norm": 9.556363105773926, + "learning_rate": 5.969831410825199e-06, + "log_odds_chosen": 0.5033254623413086, + "log_odds_ratio": -0.4875450134277344, + "logits/chosen": -1.0689876079559326, + "logits/rejected": -0.9918434619903564, + "logps/chosen": -1.4760112762451172, + "logps/rejected": -1.8789564371109009, + "loss": 2.5634, + "nll_loss": 0.592098593711853, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14760112762451172, + "rewards/margins": 0.040294528007507324, + "rewards/rejected": -0.18789565563201904, + "step": 296 + }, + { + "epoch": 0.7831245880026367, + "grad_norm": 10.892327308654785, + "learning_rate": 5.962732919254658e-06, + "log_odds_chosen": 0.5084447860717773, + "log_odds_ratio": -0.4953627288341522, + "logits/chosen": -0.9680205583572388, + "logits/rejected": -0.8744199872016907, + "logps/chosen": -1.580714464187622, + "logps/rejected": -2.009085178375244, + "loss": 3.0477, + "nll_loss": 0.712380051612854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15807145833969116, + "rewards/margins": 0.04283707216382027, + "rewards/rejected": -0.20090851187705994, + "step": 297 + }, + { + "epoch": 0.7857613711272248, + "grad_norm": 10.603328704833984, + "learning_rate": 5.955634427684117e-06, + "log_odds_chosen": 0.3260452449321747, + "log_odds_ratio": -0.5495268106460571, + "logits/chosen": -1.048534870147705, + "logits/rejected": -1.0102920532226562, + "logps/chosen": -1.6070526838302612, + "logps/rejected": -1.8762900829315186, + "loss": 2.8359, + "nll_loss": 0.6540123224258423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16070528328418732, + "rewards/margins": 0.02692374214529991, + "rewards/rejected": -0.18762901425361633, + "step": 298 + }, + { + "epoch": 0.7883981542518128, + "grad_norm": 9.625335693359375, + "learning_rate": 5.948535936113575e-06, + "log_odds_chosen": 0.4078028202056885, + "log_odds_ratio": -0.5223979949951172, + "logits/chosen": -1.0432285070419312, + "logits/rejected": -0.966896653175354, + "logps/chosen": -1.4103808403015137, + "logps/rejected": -1.7310397624969482, + "loss": 2.3302, + "nll_loss": 0.5303081274032593, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14103807508945465, + "rewards/margins": 0.03206588327884674, + "rewards/rejected": -0.17310397326946259, + "step": 299 + }, + { + "epoch": 0.7910349373764007, + "grad_norm": 10.106436729431152, + "learning_rate": 5.941437444543035e-06, + "log_odds_chosen": 0.5095257759094238, + "log_odds_ratio": -0.48302483558654785, + "logits/chosen": -1.0242811441421509, + "logits/rejected": -0.9563637971878052, + "logps/chosen": -1.5495760440826416, + "logps/rejected": -1.979056477546692, + "loss": 2.345, + "nll_loss": 0.5379441976547241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1549576073884964, + "rewards/margins": 0.042948052287101746, + "rewards/rejected": -0.19790564477443695, + "step": 300 + }, + { + "epoch": 0.7936717205009888, + "grad_norm": 11.148320198059082, + "learning_rate": 5.934338952972493e-06, + "log_odds_chosen": 0.3150947093963623, + "log_odds_ratio": -0.5589739084243774, + "logits/chosen": -1.0287885665893555, + "logits/rejected": -0.9546213746070862, + "logps/chosen": -1.4842816591262817, + "logps/rejected": -1.726853370666504, + "loss": 3.0095, + "nll_loss": 0.6964690685272217, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14842815697193146, + "rewards/margins": 0.02425718866288662, + "rewards/rejected": -0.17268535494804382, + "step": 301 + }, + { + "epoch": 0.7963085036255768, + "grad_norm": 9.838821411132812, + "learning_rate": 5.927240461401953e-06, + "log_odds_chosen": 0.5734976530075073, + "log_odds_ratio": -0.45255494117736816, + "logits/chosen": -1.057968020439148, + "logits/rejected": -0.9363830089569092, + "logps/chosen": -1.5468251705169678, + "logps/rejected": -2.021021604537964, + "loss": 2.7558, + "nll_loss": 0.6436825394630432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15468251705169678, + "rewards/margins": 0.047419652342796326, + "rewards/rejected": -0.2021021544933319, + "step": 302 + }, + { + "epoch": 0.7989452867501649, + "grad_norm": 9.92994213104248, + "learning_rate": 5.920141969831411e-06, + "log_odds_chosen": 0.4116380512714386, + "log_odds_ratio": -0.5225210189819336, + "logits/chosen": -1.0409326553344727, + "logits/rejected": -0.9100532531738281, + "logps/chosen": -1.4202492237091064, + "logps/rejected": -1.7517368793487549, + "loss": 2.1704, + "nll_loss": 0.4903418719768524, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1420249193906784, + "rewards/margins": 0.03314877673983574, + "rewards/rejected": -0.17517369985580444, + "step": 303 + }, + { + "epoch": 0.8015820698747528, + "grad_norm": 10.500617980957031, + "learning_rate": 5.913043478260869e-06, + "log_odds_chosen": 0.3255411386489868, + "log_odds_ratio": -0.5523953437805176, + "logits/chosen": -0.9314634799957275, + "logits/rejected": -0.8714169263839722, + "logps/chosen": -1.528282880783081, + "logps/rejected": -1.787902593612671, + "loss": 2.8426, + "nll_loss": 0.6554076075553894, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15282830595970154, + "rewards/margins": 0.025961963459849358, + "rewards/rejected": -0.17879024147987366, + "step": 304 + }, + { + "epoch": 0.8042188529993408, + "grad_norm": 10.315369606018066, + "learning_rate": 5.9059449866903285e-06, + "log_odds_chosen": 0.4616833031177521, + "log_odds_ratio": -0.5284723043441772, + "logits/chosen": -1.0559440851211548, + "logits/rejected": -0.9789779186248779, + "logps/chosen": -1.544727087020874, + "logps/rejected": -1.9428236484527588, + "loss": 2.5563, + "nll_loss": 0.5862153768539429, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1544727236032486, + "rewards/margins": 0.03980964422225952, + "rewards/rejected": -0.19428236782550812, + "step": 305 + }, + { + "epoch": 0.8068556361239289, + "grad_norm": 10.912592887878418, + "learning_rate": 5.8988464951197865e-06, + "log_odds_chosen": 0.4379751682281494, + "log_odds_ratio": -0.5078282356262207, + "logits/chosen": -1.045093297958374, + "logits/rejected": -0.9739735722541809, + "logps/chosen": -1.608254313468933, + "logps/rejected": -1.9695265293121338, + "loss": 2.7347, + "nll_loss": 0.6328895092010498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1608254462480545, + "rewards/margins": 0.03612722083926201, + "rewards/rejected": -0.1969526708126068, + "step": 306 + }, + { + "epoch": 0.8094924192485168, + "grad_norm": 10.669661521911621, + "learning_rate": 5.891748003549245e-06, + "log_odds_chosen": 0.21525943279266357, + "log_odds_ratio": -0.5981292724609375, + "logits/chosen": -1.049196720123291, + "logits/rejected": -1.0034172534942627, + "logps/chosen": -1.5248042345046997, + "logps/rejected": -1.6964774131774902, + "loss": 2.9062, + "nll_loss": 0.6667332053184509, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15248043835163116, + "rewards/margins": 0.017167314887046814, + "rewards/rejected": -0.16964775323867798, + "step": 307 + }, + { + "epoch": 0.8121292023731048, + "grad_norm": 9.655624389648438, + "learning_rate": 5.884649511978704e-06, + "log_odds_chosen": 0.4819903075695038, + "log_odds_ratio": -0.4996598958969116, + "logits/chosen": -0.9081611633300781, + "logits/rejected": -0.8642649054527283, + "logps/chosen": -1.4164258241653442, + "logps/rejected": -1.7920730113983154, + "loss": 2.2922, + "nll_loss": 0.5230814218521118, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14164258539676666, + "rewards/margins": 0.0375647246837616, + "rewards/rejected": -0.17920729517936707, + "step": 308 + }, + { + "epoch": 0.8147659854976929, + "grad_norm": 9.362163543701172, + "learning_rate": 5.877551020408163e-06, + "log_odds_chosen": 0.2944612205028534, + "log_odds_ratio": -0.5692933797836304, + "logits/chosen": -1.01718008518219, + "logits/rejected": -0.9646862149238586, + "logps/chosen": -1.3186593055725098, + "logps/rejected": -1.5281615257263184, + "loss": 2.1738, + "nll_loss": 0.48651689291000366, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1318659484386444, + "rewards/margins": 0.02095022052526474, + "rewards/rejected": -0.15281614661216736, + "step": 309 + }, + { + "epoch": 0.8174027686222808, + "grad_norm": 9.855142593383789, + "learning_rate": 5.870452528837621e-06, + "log_odds_chosen": 0.3722895383834839, + "log_odds_ratio": -0.5283873081207275, + "logits/chosen": -1.020612120628357, + "logits/rejected": -0.9500452280044556, + "logps/chosen": -1.4737542867660522, + "logps/rejected": -1.772289752960205, + "loss": 2.3783, + "nll_loss": 0.5417414903640747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1473754346370697, + "rewards/margins": 0.029853537678718567, + "rewards/rejected": -0.17722897231578827, + "step": 310 + }, + { + "epoch": 0.8200395517468688, + "grad_norm": 10.67618465423584, + "learning_rate": 5.863354037267081e-06, + "log_odds_chosen": 0.2837017774581909, + "log_odds_ratio": -0.570830225944519, + "logits/chosen": -1.1645097732543945, + "logits/rejected": -1.0463659763336182, + "logps/chosen": -1.5850422382354736, + "logps/rejected": -1.8165497779846191, + "loss": 3.2837, + "nll_loss": 0.7638373970985413, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15850421786308289, + "rewards/margins": 0.02315075509250164, + "rewards/rejected": -0.18165498971939087, + "step": 311 + }, + { + "epoch": 0.8226763348714569, + "grad_norm": 9.986859321594238, + "learning_rate": 5.856255545696539e-06, + "log_odds_chosen": 0.4877340793609619, + "log_odds_ratio": -0.4837613105773926, + "logits/chosen": -1.0402659177780151, + "logits/rejected": -0.9313653707504272, + "logps/chosen": -1.4441670179367065, + "logps/rejected": -1.829864263534546, + "loss": 2.2893, + "nll_loss": 0.523948609828949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1444167047739029, + "rewards/margins": 0.03856972977519035, + "rewards/rejected": -0.18298642337322235, + "step": 312 + }, + { + "epoch": 0.8253131179960448, + "grad_norm": 10.904727935791016, + "learning_rate": 5.849157054125998e-06, + "log_odds_chosen": 0.39935219287872314, + "log_odds_ratio": -0.5432575345039368, + "logits/chosen": -1.1372846364974976, + "logits/rejected": -1.0501521825790405, + "logps/chosen": -1.5840697288513184, + "logps/rejected": -1.9227668046951294, + "loss": 3.1338, + "nll_loss": 0.7291156053543091, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15840697288513184, + "rewards/margins": 0.033869706094264984, + "rewards/rejected": -0.19227667152881622, + "step": 313 + }, + { + "epoch": 0.8279499011206328, + "grad_norm": 10.428577423095703, + "learning_rate": 5.842058562555457e-06, + "log_odds_chosen": 0.39743325114250183, + "log_odds_ratio": -0.5214748382568359, + "logits/chosen": -0.9990111589431763, + "logits/rejected": -0.9133065938949585, + "logps/chosen": -1.6697391271591187, + "logps/rejected": -1.9973664283752441, + "loss": 2.6977, + "nll_loss": 0.6222816109657288, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16697391867637634, + "rewards/margins": 0.032762713730335236, + "rewards/rejected": -0.19973662495613098, + "step": 314 + }, + { + "epoch": 0.8305866842452209, + "grad_norm": 10.464115142822266, + "learning_rate": 5.834960070984915e-06, + "log_odds_chosen": 0.3513261377811432, + "log_odds_ratio": -0.5399587750434875, + "logits/chosen": -1.063493013381958, + "logits/rejected": -0.9799022078514099, + "logps/chosen": -1.6098552942276, + "logps/rejected": -1.8995361328125, + "loss": 2.7644, + "nll_loss": 0.637109637260437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16098552942276, + "rewards/margins": 0.028968090191483498, + "rewards/rejected": -0.18995361030101776, + "step": 315 + }, + { + "epoch": 0.8332234673698088, + "grad_norm": 10.210542678833008, + "learning_rate": 5.827861579414375e-06, + "log_odds_chosen": 0.057153940200805664, + "log_odds_ratio": -0.6794668436050415, + "logits/chosen": -1.0624723434448242, + "logits/rejected": -0.9977174997329712, + "logps/chosen": -1.5775511264801025, + "logps/rejected": -1.6190568208694458, + "loss": 2.9337, + "nll_loss": 0.6654831767082214, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15775510668754578, + "rewards/margins": 0.004150571301579475, + "rewards/rejected": -0.1619056761264801, + "step": 316 + }, + { + "epoch": 0.8358602504943968, + "grad_norm": 10.235295295715332, + "learning_rate": 5.820763087843833e-06, + "log_odds_chosen": 0.12494079768657684, + "log_odds_ratio": -0.645836591720581, + "logits/chosen": -1.0902093648910522, + "logits/rejected": -1.016379952430725, + "logps/chosen": -1.5835883617401123, + "logps/rejected": -1.687776803970337, + "loss": 2.8211, + "nll_loss": 0.6406936645507812, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1583588421344757, + "rewards/margins": 0.010418838821351528, + "rewards/rejected": -0.1687776893377304, + "step": 317 + }, + { + "epoch": 0.8384970336189849, + "grad_norm": 10.28027629852295, + "learning_rate": 5.813664596273292e-06, + "log_odds_chosen": 0.343504935503006, + "log_odds_ratio": -0.5613068342208862, + "logits/chosen": -1.0396804809570312, + "logits/rejected": -0.9597609043121338, + "logps/chosen": -1.4261085987091064, + "logps/rejected": -1.692732572555542, + "loss": 2.5255, + "nll_loss": 0.5752564072608948, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1426108479499817, + "rewards/margins": 0.026662401854991913, + "rewards/rejected": -0.1692732572555542, + "step": 318 + }, + { + "epoch": 0.8411338167435728, + "grad_norm": 10.127836227416992, + "learning_rate": 5.8065661047027505e-06, + "log_odds_chosen": 0.34942787885665894, + "log_odds_ratio": -0.541016697883606, + "logits/chosen": -1.0738239288330078, + "logits/rejected": -0.9604414105415344, + "logps/chosen": -1.5903185606002808, + "logps/rejected": -1.8677293062210083, + "loss": 2.8788, + "nll_loss": 0.6655933260917664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15903185307979584, + "rewards/margins": 0.02774108201265335, + "rewards/rejected": -0.1867729276418686, + "step": 319 + }, + { + "epoch": 0.8437705998681608, + "grad_norm": 10.790935516357422, + "learning_rate": 5.799467613132209e-06, + "log_odds_chosen": 0.153792604804039, + "log_odds_ratio": -0.6239289045333862, + "logits/chosen": -1.0212633609771729, + "logits/rejected": -0.9398971199989319, + "logps/chosen": -1.6086022853851318, + "logps/rejected": -1.7359929084777832, + "loss": 2.607, + "nll_loss": 0.5893584489822388, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16086022555828094, + "rewards/margins": 0.012739075347781181, + "rewards/rejected": -0.17359930276870728, + "step": 320 + }, + { + "epoch": 0.8464073829927489, + "grad_norm": 9.60370922088623, + "learning_rate": 5.792369121561668e-06, + "log_odds_chosen": 0.9587531089782715, + "log_odds_ratio": -0.35142982006073, + "logits/chosen": -1.0414528846740723, + "logits/rejected": -0.9721252918243408, + "logps/chosen": -1.310626745223999, + "logps/rejected": -2.090373992919922, + "loss": 2.1543, + "nll_loss": 0.5034200549125671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13106267154216766, + "rewards/margins": 0.07797471433877945, + "rewards/rejected": -0.20903737843036652, + "step": 321 + }, + { + "epoch": 0.8490441661173368, + "grad_norm": 9.56372356414795, + "learning_rate": 5.785270629991126e-06, + "log_odds_chosen": 0.42057231068611145, + "log_odds_ratio": -0.5202645659446716, + "logits/chosen": -1.0739099979400635, + "logits/rejected": -0.9998117685317993, + "logps/chosen": -1.427781105041504, + "logps/rejected": -1.7579572200775146, + "loss": 2.5026, + "nll_loss": 0.5736272931098938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14277812838554382, + "rewards/margins": 0.033017609268426895, + "rewards/rejected": -0.17579573392868042, + "step": 322 + }, + { + "epoch": 0.8516809492419248, + "grad_norm": 10.817325592041016, + "learning_rate": 5.778172138420585e-06, + "log_odds_chosen": 0.2748401165008545, + "log_odds_ratio": -0.5693619251251221, + "logits/chosen": -1.0628693103790283, + "logits/rejected": -0.9903963804244995, + "logps/chosen": -1.7415344715118408, + "logps/rejected": -1.9746755361557007, + "loss": 2.8572, + "nll_loss": 0.6573653221130371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17415344715118408, + "rewards/margins": 0.023314107209444046, + "rewards/rejected": -0.19746755063533783, + "step": 323 + }, + { + "epoch": 0.8543177323665129, + "grad_norm": 10.59162712097168, + "learning_rate": 5.771073646850044e-06, + "log_odds_chosen": 0.2887657582759857, + "log_odds_ratio": -0.5717019438743591, + "logits/chosen": -1.0636672973632812, + "logits/rejected": -0.9916437864303589, + "logps/chosen": -1.5421836376190186, + "logps/rejected": -1.7747539281845093, + "loss": 2.8713, + "nll_loss": 0.6606504917144775, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15421836078166962, + "rewards/margins": 0.023257026448845863, + "rewards/rejected": -0.17747539281845093, + "step": 324 + }, + { + "epoch": 0.8569545154911009, + "grad_norm": 10.08297348022461, + "learning_rate": 5.763975155279503e-06, + "log_odds_chosen": 0.43007218837738037, + "log_odds_ratio": -0.5519193410873413, + "logits/chosen": -1.0398885011672974, + "logits/rejected": -0.9652789235115051, + "logps/chosen": -1.4443862438201904, + "logps/rejected": -1.7982938289642334, + "loss": 2.3122, + "nll_loss": 0.5228697061538696, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14443862438201904, + "rewards/margins": 0.03539075329899788, + "rewards/rejected": -0.17982937395572662, + "step": 325 + }, + { + "epoch": 0.8595912986156888, + "grad_norm": 10.488415718078613, + "learning_rate": 5.756876663708961e-06, + "log_odds_chosen": 0.3969072103500366, + "log_odds_ratio": -0.5206924676895142, + "logits/chosen": -1.0502766370773315, + "logits/rejected": -0.9669512510299683, + "logps/chosen": -1.6841930150985718, + "logps/rejected": -2.0151259899139404, + "loss": 2.9243, + "nll_loss": 0.6790080070495605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16841931641101837, + "rewards/margins": 0.033093273639678955, + "rewards/rejected": -0.20151257514953613, + "step": 326 + }, + { + "epoch": 0.8622280817402769, + "grad_norm": 10.095006942749023, + "learning_rate": 5.749778172138421e-06, + "log_odds_chosen": 0.456265926361084, + "log_odds_ratio": -0.5051561594009399, + "logits/chosen": -1.0466737747192383, + "logits/rejected": -0.947853684425354, + "logps/chosen": -1.474120020866394, + "logps/rejected": -1.8521654605865479, + "loss": 2.1415, + "nll_loss": 0.4848534166812897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1474120020866394, + "rewards/margins": 0.037804536521434784, + "rewards/rejected": -0.18521654605865479, + "step": 327 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 9.11341667175293, + "learning_rate": 5.742679680567879e-06, + "log_odds_chosen": 0.3190004229545593, + "log_odds_ratio": -0.5560978055000305, + "logits/chosen": -0.9748751521110535, + "logits/rejected": -0.8502944707870483, + "logps/chosen": -1.3002684116363525, + "logps/rejected": -1.544651746749878, + "loss": 1.8325, + "nll_loss": 0.4025198817253113, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13002684712409973, + "rewards/margins": 0.02443832904100418, + "rewards/rejected": -0.15446516871452332, + "step": 328 + }, + { + "epoch": 0.8675016479894528, + "grad_norm": 10.462133407592773, + "learning_rate": 5.735581188997338e-06, + "log_odds_chosen": 0.24863064289093018, + "log_odds_ratio": -0.5877097249031067, + "logits/chosen": -0.9955823421478271, + "logits/rejected": -0.9353163838386536, + "logps/chosen": -1.693455457687378, + "logps/rejected": -1.8955365419387817, + "loss": 3.0426, + "nll_loss": 0.7018847465515137, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16934554278850555, + "rewards/margins": 0.020208114758133888, + "rewards/rejected": -0.1895536482334137, + "step": 329 + }, + { + "epoch": 0.8701384311140409, + "grad_norm": 10.17776107788086, + "learning_rate": 5.728482697426797e-06, + "log_odds_chosen": 0.40002769231796265, + "log_odds_ratio": -0.5155510306358337, + "logits/chosen": -1.1227819919586182, + "logits/rejected": -0.9743717908859253, + "logps/chosen": -1.4541687965393066, + "logps/rejected": -1.7705214023590088, + "loss": 2.7577, + "nll_loss": 0.6378784775733948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14541688561439514, + "rewards/margins": 0.03163526952266693, + "rewards/rejected": -0.17705215513706207, + "step": 330 + }, + { + "epoch": 0.8727752142386289, + "grad_norm": 10.216160774230957, + "learning_rate": 5.721384205856255e-06, + "log_odds_chosen": 0.5271902084350586, + "log_odds_ratio": -0.4871942102909088, + "logits/chosen": -0.994499921798706, + "logits/rejected": -0.9453480243682861, + "logps/chosen": -1.3821659088134766, + "logps/rejected": -1.7877823114395142, + "loss": 2.3179, + "nll_loss": 0.5307646989822388, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13821661472320557, + "rewards/margins": 0.04056163504719734, + "rewards/rejected": -0.17877823114395142, + "step": 331 + }, + { + "epoch": 0.8754119973632168, + "grad_norm": 10.18932819366455, + "learning_rate": 5.7142857142857145e-06, + "log_odds_chosen": 0.5570278167724609, + "log_odds_ratio": -0.4699929654598236, + "logits/chosen": -1.0728272199630737, + "logits/rejected": -0.9810482263565063, + "logps/chosen": -1.4654444456100464, + "logps/rejected": -1.9071202278137207, + "loss": 2.6198, + "nll_loss": 0.6079585552215576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1465444564819336, + "rewards/margins": 0.044167570769786835, + "rewards/rejected": -0.19071203470230103, + "step": 332 + }, + { + "epoch": 0.8780487804878049, + "grad_norm": 10.48474407196045, + "learning_rate": 5.7071872227151726e-06, + "log_odds_chosen": 0.3994866907596588, + "log_odds_ratio": -0.5195596814155579, + "logits/chosen": -1.068738579750061, + "logits/rejected": -1.0077134370803833, + "logps/chosen": -1.590066909790039, + "logps/rejected": -1.9161949157714844, + "loss": 3.0394, + "nll_loss": 0.7078840732574463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15900669991970062, + "rewards/margins": 0.03261279687285423, + "rewards/rejected": -0.19161948561668396, + "step": 333 + }, + { + "epoch": 0.8806855636123929, + "grad_norm": 10.939852714538574, + "learning_rate": 5.7000887311446315e-06, + "log_odds_chosen": 0.4621232748031616, + "log_odds_ratio": -0.5040398240089417, + "logits/chosen": -1.11872398853302, + "logits/rejected": -0.9806749820709229, + "logps/chosen": -1.4889421463012695, + "logps/rejected": -1.8606245517730713, + "loss": 2.736, + "nll_loss": 0.6336002945899963, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14889422059059143, + "rewards/margins": 0.0371682345867157, + "rewards/rejected": -0.18606245517730713, + "step": 334 + }, + { + "epoch": 0.8833223467369808, + "grad_norm": 10.190740585327148, + "learning_rate": 5.69299023957409e-06, + "log_odds_chosen": 0.37728533148765564, + "log_odds_ratio": -0.5308787226676941, + "logits/chosen": -1.0650832653045654, + "logits/rejected": -0.9698933362960815, + "logps/chosen": -1.6103969812393188, + "logps/rejected": -1.924202561378479, + "loss": 2.8435, + "nll_loss": 0.6577939987182617, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16103971004486084, + "rewards/margins": 0.03138056397438049, + "rewards/rejected": -0.19242027401924133, + "step": 335 + }, + { + "epoch": 0.8859591298615689, + "grad_norm": 10.346920013427734, + "learning_rate": 5.685891748003549e-06, + "log_odds_chosen": 0.5905814170837402, + "log_odds_ratio": -0.4763306975364685, + "logits/chosen": -1.0905077457427979, + "logits/rejected": -0.9927341938018799, + "logps/chosen": -1.4622387886047363, + "logps/rejected": -1.9198663234710693, + "loss": 2.724, + "nll_loss": 0.6333789229393005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14622387290000916, + "rewards/margins": 0.04576274752616882, + "rewards/rejected": -0.19198662042617798, + "step": 336 + }, + { + "epoch": 0.8885959129861569, + "grad_norm": 9.778738021850586, + "learning_rate": 5.678793256433007e-06, + "log_odds_chosen": 0.3346019983291626, + "log_odds_ratio": -0.547289252281189, + "logits/chosen": -1.0419234037399292, + "logits/rejected": -0.9784678816795349, + "logps/chosen": -1.5617038011550903, + "logps/rejected": -1.835525393486023, + "loss": 2.445, + "nll_loss": 0.556515634059906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15617038309574127, + "rewards/margins": 0.027382152155041695, + "rewards/rejected": -0.18355253338813782, + "step": 337 + }, + { + "epoch": 0.8912326961107448, + "grad_norm": 10.788568496704102, + "learning_rate": 5.671694764862467e-06, + "log_odds_chosen": 0.4987928867340088, + "log_odds_ratio": -0.49477386474609375, + "logits/chosen": -1.0442382097244263, + "logits/rejected": -0.9384560585021973, + "logps/chosen": -1.481365442276001, + "logps/rejected": -1.8602831363677979, + "loss": 2.8371, + "nll_loss": 0.6598080992698669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14813652634620667, + "rewards/margins": 0.03789178282022476, + "rewards/rejected": -0.18602833151817322, + "step": 338 + }, + { + "epoch": 0.8938694792353329, + "grad_norm": 9.854400634765625, + "learning_rate": 5.664596273291925e-06, + "log_odds_chosen": 0.4089469909667969, + "log_odds_ratio": -0.514413058757782, + "logits/chosen": -1.0501763820648193, + "logits/rejected": -0.9793403148651123, + "logps/chosen": -1.3807724714279175, + "logps/rejected": -1.6960806846618652, + "loss": 2.5394, + "nll_loss": 0.583407461643219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1380772590637207, + "rewards/margins": 0.03153081610798836, + "rewards/rejected": -0.16960807144641876, + "step": 339 + }, + { + "epoch": 0.8965062623599209, + "grad_norm": 10.947417259216309, + "learning_rate": 5.657497781721383e-06, + "log_odds_chosen": 0.44161561131477356, + "log_odds_ratio": -0.5027471780776978, + "logits/chosen": -1.0611671209335327, + "logits/rejected": -0.9183800220489502, + "logps/chosen": -1.4360787868499756, + "logps/rejected": -1.7909188270568848, + "loss": 2.8081, + "nll_loss": 0.6517510414123535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14360786974430084, + "rewards/margins": 0.03548401594161987, + "rewards/rejected": -0.17909188568592072, + "step": 340 + }, + { + "epoch": 0.8991430454845089, + "grad_norm": 10.07443618774414, + "learning_rate": 5.650399290150843e-06, + "log_odds_chosen": 0.2103627473115921, + "log_odds_ratio": -0.5991295576095581, + "logits/chosen": -1.0860145092010498, + "logits/rejected": -1.0008509159088135, + "logps/chosen": -1.425404667854309, + "logps/rejected": -1.5859260559082031, + "loss": 2.7957, + "nll_loss": 0.6390042304992676, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14254045486450195, + "rewards/margins": 0.016052136197686195, + "rewards/rejected": -0.1585926115512848, + "step": 341 + }, + { + "epoch": 0.9017798286090969, + "grad_norm": 10.20143985748291, + "learning_rate": 5.643300798580301e-06, + "log_odds_chosen": 0.4558635950088501, + "log_odds_ratio": -0.5051071643829346, + "logits/chosen": -1.1710106134414673, + "logits/rejected": -1.0035669803619385, + "logps/chosen": -1.4524896144866943, + "logps/rejected": -1.8239222764968872, + "loss": 3.073, + "nll_loss": 0.717727780342102, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14524896442890167, + "rewards/margins": 0.0371432825922966, + "rewards/rejected": -0.18239223957061768, + "step": 342 + }, + { + "epoch": 0.9044166117336849, + "grad_norm": 9.72130012512207, + "learning_rate": 5.636202307009761e-06, + "log_odds_chosen": 0.47912660241127014, + "log_odds_ratio": -0.48404398560523987, + "logits/chosen": -1.0630624294281006, + "logits/rejected": -0.9384533166885376, + "logps/chosen": -1.3418054580688477, + "logps/rejected": -1.7115063667297363, + "loss": 2.3891, + "nll_loss": 0.5488689541816711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13418054580688477, + "rewards/margins": 0.03697009012103081, + "rewards/rejected": -0.17115065455436707, + "step": 343 + }, + { + "epoch": 0.9070533948582729, + "grad_norm": 10.672527313232422, + "learning_rate": 5.629103815439219e-06, + "log_odds_chosen": 0.4753820300102234, + "log_odds_ratio": -0.4976285994052887, + "logits/chosen": -1.090612769126892, + "logits/rejected": -0.9278324842453003, + "logps/chosen": -1.5014450550079346, + "logps/rejected": -1.8953273296356201, + "loss": 2.7992, + "nll_loss": 0.6500457525253296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1501445174217224, + "rewards/margins": 0.03938821703195572, + "rewards/rejected": -0.18953272700309753, + "step": 344 + }, + { + "epoch": 0.9096901779828609, + "grad_norm": 10.501937866210938, + "learning_rate": 5.622005323868678e-06, + "log_odds_chosen": 0.3252415359020233, + "log_odds_ratio": -0.5536338686943054, + "logits/chosen": -1.1071209907531738, + "logits/rejected": -1.0367724895477295, + "logps/chosen": -1.341892123222351, + "logps/rejected": -1.5972182750701904, + "loss": 2.475, + "nll_loss": 0.5633936524391174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13418921828269958, + "rewards/margins": 0.02553262561559677, + "rewards/rejected": -0.15972182154655457, + "step": 345 + }, + { + "epoch": 0.9123269611074489, + "grad_norm": 10.16054630279541, + "learning_rate": 5.6149068322981366e-06, + "log_odds_chosen": 0.27317702770233154, + "log_odds_ratio": -0.5727741718292236, + "logits/chosen": -1.0333210229873657, + "logits/rejected": -0.957564651966095, + "logps/chosen": -1.5706524848937988, + "logps/rejected": -1.7979657649993896, + "loss": 3.005, + "nll_loss": 0.6939662098884583, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1570652574300766, + "rewards/margins": 0.022731315344572067, + "rewards/rejected": -0.17979657649993896, + "step": 346 + }, + { + "epoch": 0.914963744232037, + "grad_norm": 9.568496704101562, + "learning_rate": 5.6078083407275955e-06, + "log_odds_chosen": 0.40468311309814453, + "log_odds_ratio": -0.5176348090171814, + "logits/chosen": -1.1225817203521729, + "logits/rejected": -1.0085132122039795, + "logps/chosen": -1.4263901710510254, + "logps/rejected": -1.7484097480773926, + "loss": 2.6263, + "nll_loss": 0.6048138737678528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14263901114463806, + "rewards/margins": 0.03220197558403015, + "rewards/rejected": -0.1748409867286682, + "step": 347 + }, + { + "epoch": 0.9176005273566249, + "grad_norm": 9.960814476013184, + "learning_rate": 5.6007098491570535e-06, + "log_odds_chosen": 0.3930380344390869, + "log_odds_ratio": -0.5187526345252991, + "logits/chosen": -1.0023831129074097, + "logits/rejected": -0.8820410370826721, + "logps/chosen": -1.4457470178604126, + "logps/rejected": -1.7578613758087158, + "loss": 2.2526, + "nll_loss": 0.5112727284431458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14457471668720245, + "rewards/margins": 0.031211430206894875, + "rewards/rejected": -0.17578613758087158, + "step": 348 + }, + { + "epoch": 0.9202373104812129, + "grad_norm": 10.322500228881836, + "learning_rate": 5.593611357586512e-06, + "log_odds_chosen": 0.2274288386106491, + "log_odds_ratio": -0.5969381332397461, + "logits/chosen": -1.02559494972229, + "logits/rejected": -0.922670841217041, + "logps/chosen": -1.7443609237670898, + "logps/rejected": -1.9361728429794312, + "loss": 2.6544, + "nll_loss": 0.6039038300514221, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17443609237670898, + "rewards/margins": 0.019181186333298683, + "rewards/rejected": -0.19361728429794312, + "step": 349 + }, + { + "epoch": 0.922874093605801, + "grad_norm": 9.499584197998047, + "learning_rate": 5.586512866015971e-06, + "log_odds_chosen": 0.5290266275405884, + "log_odds_ratio": -0.48338058590888977, + "logits/chosen": -1.0399274826049805, + "logits/rejected": -0.9718201160430908, + "logps/chosen": -1.174059271812439, + "logps/rejected": -1.534996509552002, + "loss": 2.229, + "nll_loss": 0.5089079737663269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11740593612194061, + "rewards/margins": 0.03609371930360794, + "rewards/rejected": -0.15349964797496796, + "step": 350 + }, + { + "epoch": 0.9255108767303889, + "grad_norm": 10.729506492614746, + "learning_rate": 5.57941437444543e-06, + "log_odds_chosen": 0.26497402787208557, + "log_odds_ratio": -0.5773090720176697, + "logits/chosen": -1.126983880996704, + "logits/rejected": -1.0317203998565674, + "logps/chosen": -1.5925211906433105, + "logps/rejected": -1.8076058626174927, + "loss": 2.9425, + "nll_loss": 0.6779011487960815, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1592521369457245, + "rewards/margins": 0.021508460864424706, + "rewards/rejected": -0.18076059222221375, + "step": 351 + }, + { + "epoch": 0.9281476598549769, + "grad_norm": 10.258927345275879, + "learning_rate": 5.572315882874889e-06, + "log_odds_chosen": 0.2672571837902069, + "log_odds_ratio": -0.5744418501853943, + "logits/chosen": -1.1412999629974365, + "logits/rejected": -1.0702255964279175, + "logps/chosen": -1.3987594842910767, + "logps/rejected": -1.6080760955810547, + "loss": 2.8588, + "nll_loss": 0.6572588086128235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13987594842910767, + "rewards/margins": 0.020931649953126907, + "rewards/rejected": -0.16080759465694427, + "step": 352 + }, + { + "epoch": 0.930784442979565, + "grad_norm": 10.075135231018066, + "learning_rate": 5.565217391304347e-06, + "log_odds_chosen": 0.28277862071990967, + "log_odds_ratio": -0.5662992000579834, + "logits/chosen": -1.0691959857940674, + "logits/rejected": -0.9810045957565308, + "logps/chosen": -1.5185537338256836, + "logps/rejected": -1.7473249435424805, + "loss": 2.8186, + "nll_loss": 0.6480088233947754, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15185536444187164, + "rewards/margins": 0.022877134382724762, + "rewards/rejected": -0.174732506275177, + "step": 353 + }, + { + "epoch": 0.9334212261041529, + "grad_norm": 10.633783340454102, + "learning_rate": 5.558118899733807e-06, + "log_odds_chosen": 0.672451913356781, + "log_odds_ratio": -0.44251322746276855, + "logits/chosen": -1.0403993129730225, + "logits/rejected": -0.9178810119628906, + "logps/chosen": -1.4975465536117554, + "logps/rejected": -2.0363898277282715, + "loss": 2.5513, + "nll_loss": 0.5935852527618408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14975465834140778, + "rewards/margins": 0.05388431251049042, + "rewards/rejected": -0.2036389708518982, + "step": 354 + }, + { + "epoch": 0.9360580092287409, + "grad_norm": 10.344921112060547, + "learning_rate": 5.551020408163265e-06, + "log_odds_chosen": 0.4677634537220001, + "log_odds_ratio": -0.4962654411792755, + "logits/chosen": -1.047995924949646, + "logits/rejected": -0.934207558631897, + "logps/chosen": -1.47959566116333, + "logps/rejected": -1.8597514629364014, + "loss": 2.4314, + "nll_loss": 0.5582197904586792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14795956015586853, + "rewards/margins": 0.03801558539271355, + "rewards/rejected": -0.18597514927387238, + "step": 355 + }, + { + "epoch": 0.938694792353329, + "grad_norm": 10.867266654968262, + "learning_rate": 5.543921916592724e-06, + "log_odds_chosen": 0.17453785240650177, + "log_odds_ratio": -0.6157727241516113, + "logits/chosen": -1.0531580448150635, + "logits/rejected": -1.0111100673675537, + "logps/chosen": -1.4414186477661133, + "logps/rejected": -1.5767408609390259, + "loss": 2.5583, + "nll_loss": 0.5779985189437866, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14414185285568237, + "rewards/margins": 0.013532244600355625, + "rewards/rejected": -0.15767410397529602, + "step": 356 + }, + { + "epoch": 0.9413315754779169, + "grad_norm": 9.612220764160156, + "learning_rate": 5.536823425022183e-06, + "log_odds_chosen": 0.49452072381973267, + "log_odds_ratio": -0.4899476170539856, + "logits/chosen": -1.0678932666778564, + "logits/rejected": -0.9185231924057007, + "logps/chosen": -1.3783776760101318, + "logps/rejected": -1.7587839365005493, + "loss": 2.3197, + "nll_loss": 0.5309328436851501, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13783776760101318, + "rewards/margins": 0.03804062306880951, + "rewards/rejected": -0.1758784055709839, + "step": 357 + }, + { + "epoch": 0.9439683586025049, + "grad_norm": 9.743525505065918, + "learning_rate": 5.529724933451641e-06, + "log_odds_chosen": 0.6043331027030945, + "log_odds_ratio": -0.45537304878234863, + "logits/chosen": -1.034125566482544, + "logits/rejected": -0.9460897445678711, + "logps/chosen": -1.2822744846343994, + "logps/rejected": -1.7196097373962402, + "loss": 2.1604, + "nll_loss": 0.49456602334976196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12822744250297546, + "rewards/margins": 0.043733518570661545, + "rewards/rejected": -0.1719609498977661, + "step": 358 + }, + { + "epoch": 0.946605141727093, + "grad_norm": 9.58714771270752, + "learning_rate": 5.5226264418811006e-06, + "log_odds_chosen": 0.41552817821502686, + "log_odds_ratio": -0.5585276484489441, + "logits/chosen": -1.0117213726043701, + "logits/rejected": -0.9441577792167664, + "logps/chosen": -1.4235024452209473, + "logps/rejected": -1.7747458219528198, + "loss": 2.1996, + "nll_loss": 0.49404376745224, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14235025644302368, + "rewards/margins": 0.035124339163303375, + "rewards/rejected": -0.17747458815574646, + "step": 359 + }, + { + "epoch": 0.9492419248516809, + "grad_norm": 10.443949699401855, + "learning_rate": 5.515527950310559e-06, + "log_odds_chosen": 0.26815563440322876, + "log_odds_ratio": -0.5817369222640991, + "logits/chosen": -1.004830002784729, + "logits/rejected": -0.9195725321769714, + "logps/chosen": -1.612457036972046, + "logps/rejected": -1.839476466178894, + "loss": 2.6319, + "nll_loss": 0.5997951030731201, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1612457036972046, + "rewards/margins": 0.022701943293213844, + "rewards/rejected": -0.18394765257835388, + "step": 360 + }, + { + "epoch": 0.951878707976269, + "grad_norm": 9.963574409484863, + "learning_rate": 5.5084294587400175e-06, + "log_odds_chosen": 0.2849772572517395, + "log_odds_ratio": -0.5643788576126099, + "logits/chosen": -1.0444022417068481, + "logits/rejected": -0.9414665699005127, + "logps/chosen": -1.5040690898895264, + "logps/rejected": -1.7351229190826416, + "loss": 2.6506, + "nll_loss": 0.6061998605728149, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15040689706802368, + "rewards/margins": 0.023105382919311523, + "rewards/rejected": -0.1735122799873352, + "step": 361 + }, + { + "epoch": 0.954515491100857, + "grad_norm": 10.363738059997559, + "learning_rate": 5.501330967169476e-06, + "log_odds_chosen": 0.322379469871521, + "log_odds_ratio": -0.5474145412445068, + "logits/chosen": -1.1142265796661377, + "logits/rejected": -1.0194593667984009, + "logps/chosen": -1.393839716911316, + "logps/rejected": -1.6361058950424194, + "loss": 2.849, + "nll_loss": 0.6575071811676025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1393839716911316, + "rewards/margins": 0.024226615205407143, + "rewards/rejected": -0.16361059248447418, + "step": 362 + }, + { + "epoch": 0.9571522742254449, + "grad_norm": 9.178751945495605, + "learning_rate": 5.494232475598935e-06, + "log_odds_chosen": 0.2973731458187103, + "log_odds_ratio": -0.5822466611862183, + "logits/chosen": -0.9972629547119141, + "logits/rejected": -0.9414466619491577, + "logps/chosen": -1.4518791437149048, + "logps/rejected": -1.6809438467025757, + "loss": 2.2369, + "nll_loss": 0.501007080078125, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14518792927265167, + "rewards/margins": 0.022906456142663956, + "rewards/rejected": -0.16809438169002533, + "step": 363 + }, + { + "epoch": 0.959789057350033, + "grad_norm": 10.308340072631836, + "learning_rate": 5.487133984028393e-06, + "log_odds_chosen": 0.19245854020118713, + "log_odds_ratio": -0.6071769595146179, + "logits/chosen": -1.0446027517318726, + "logits/rejected": -0.9913628697395325, + "logps/chosen": -1.4108364582061768, + "logps/rejected": -1.5582778453826904, + "loss": 2.4827, + "nll_loss": 0.5599597692489624, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14108365774154663, + "rewards/margins": 0.014744145795702934, + "rewards/rejected": -0.15582779049873352, + "step": 364 + }, + { + "epoch": 0.962425840474621, + "grad_norm": 9.615263938903809, + "learning_rate": 5.480035492457852e-06, + "log_odds_chosen": 0.4360993504524231, + "log_odds_ratio": -0.5149202942848206, + "logits/chosen": -1.065071940422058, + "logits/rejected": -0.9887789487838745, + "logps/chosen": -1.297107458114624, + "logps/rejected": -1.6414079666137695, + "loss": 2.3168, + "nll_loss": 0.5277169942855835, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12971073389053345, + "rewards/margins": 0.03443005681037903, + "rewards/rejected": -0.16414080560207367, + "step": 365 + }, + { + "epoch": 0.9650626235992089, + "grad_norm": 9.44418716430664, + "learning_rate": 5.472937000887311e-06, + "log_odds_chosen": 0.37738606333732605, + "log_odds_ratio": -0.5338386297225952, + "logits/chosen": -1.0057923793792725, + "logits/rejected": -0.8819607496261597, + "logps/chosen": -1.292341947555542, + "logps/rejected": -1.5872286558151245, + "loss": 2.5454, + "nll_loss": 0.5829612612724304, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1292341947555542, + "rewards/margins": 0.02948867343366146, + "rewards/rejected": -0.1587228775024414, + "step": 366 + }, + { + "epoch": 0.967699406723797, + "grad_norm": 10.713010787963867, + "learning_rate": 5.465838509316769e-06, + "log_odds_chosen": 0.3017590045928955, + "log_odds_ratio": -0.5661972165107727, + "logits/chosen": -0.9886905550956726, + "logits/rejected": -0.8983290791511536, + "logps/chosen": -1.1995577812194824, + "logps/rejected": -1.4248850345611572, + "loss": 2.631, + "nll_loss": 0.6011286973953247, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11995577812194824, + "rewards/margins": 0.02253272570669651, + "rewards/rejected": -0.142488494515419, + "step": 367 + }, + { + "epoch": 0.970336189848385, + "grad_norm": 9.215208053588867, + "learning_rate": 5.458740017746229e-06, + "log_odds_chosen": 0.2892542779445648, + "log_odds_ratio": -0.5717054009437561, + "logits/chosen": -1.0206345319747925, + "logits/rejected": -0.9631547927856445, + "logps/chosen": -1.4143803119659424, + "logps/rejected": -1.6542450189590454, + "loss": 2.5335, + "nll_loss": 0.576204776763916, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14143803715705872, + "rewards/margins": 0.02398647367954254, + "rewards/rejected": -0.16542451083660126, + "step": 368 + }, + { + "epoch": 0.972972972972973, + "grad_norm": 10.076217651367188, + "learning_rate": 5.451641526175687e-06, + "log_odds_chosen": 0.4455964267253876, + "log_odds_ratio": -0.5248406529426575, + "logits/chosen": -0.9466066360473633, + "logits/rejected": -0.8634353876113892, + "logps/chosen": -1.264702558517456, + "logps/rejected": -1.5656343698501587, + "loss": 2.0119, + "nll_loss": 0.450481116771698, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12647025287151337, + "rewards/margins": 0.030093185603618622, + "rewards/rejected": -0.15656344592571259, + "step": 369 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 10.204376220703125, + "learning_rate": 5.444543034605147e-06, + "log_odds_chosen": 0.6873888373374939, + "log_odds_ratio": -0.4200359582901001, + "logits/chosen": -1.0495414733886719, + "logits/rejected": -0.9574683904647827, + "logps/chosen": -1.3910161256790161, + "logps/rejected": -1.9476149082183838, + "loss": 2.2624, + "nll_loss": 0.5236042141914368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13910160958766937, + "rewards/margins": 0.05565987899899483, + "rewards/rejected": -0.1947614848613739, + "step": 370 + }, + { + "epoch": 0.978246539222149, + "grad_norm": 9.809714317321777, + "learning_rate": 5.437444543034605e-06, + "log_odds_chosen": 0.4018175005912781, + "log_odds_ratio": -0.5197112560272217, + "logits/chosen": -1.0419161319732666, + "logits/rejected": -0.9370607137680054, + "logps/chosen": -1.5038185119628906, + "logps/rejected": -1.8241524696350098, + "loss": 2.4621, + "nll_loss": 0.5635530352592468, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15038184821605682, + "rewards/margins": 0.03203340247273445, + "rewards/rejected": -0.18241524696350098, + "step": 371 + }, + { + "epoch": 0.980883322346737, + "grad_norm": 9.597217559814453, + "learning_rate": 5.430346051464064e-06, + "log_odds_chosen": 0.4155101180076599, + "log_odds_ratio": -0.5198583006858826, + "logits/chosen": -1.0249592065811157, + "logits/rejected": -0.9465623497962952, + "logps/chosen": -1.2694138288497925, + "logps/rejected": -1.595078945159912, + "loss": 2.3738, + "nll_loss": 0.5414611101150513, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12694138288497925, + "rewards/margins": 0.03256651386618614, + "rewards/rejected": -0.1595079004764557, + "step": 372 + }, + { + "epoch": 0.983520105471325, + "grad_norm": 10.037396430969238, + "learning_rate": 5.423247559893523e-06, + "log_odds_chosen": 0.39195820689201355, + "log_odds_ratio": -0.5266801118850708, + "logits/chosen": -1.0737645626068115, + "logits/rejected": -1.0174031257629395, + "logps/chosen": -1.4611886739730835, + "logps/rejected": -1.7686667442321777, + "loss": 2.699, + "nll_loss": 0.6220787763595581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1461188793182373, + "rewards/margins": 0.03074781224131584, + "rewards/rejected": -0.17686668038368225, + "step": 373 + }, + { + "epoch": 0.986156888595913, + "grad_norm": 10.649054527282715, + "learning_rate": 5.416149068322981e-06, + "log_odds_chosen": 0.4070664346218109, + "log_odds_ratio": -0.5168882608413696, + "logits/chosen": -1.1013745069503784, + "logits/rejected": -0.9411362409591675, + "logps/chosen": -1.5345265865325928, + "logps/rejected": -1.864203691482544, + "loss": 2.871, + "nll_loss": 0.6660618782043457, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15345266461372375, + "rewards/margins": 0.0329677015542984, + "rewards/rejected": -0.18642036616802216, + "step": 374 + }, + { + "epoch": 0.988793671720501, + "grad_norm": 10.262837409973145, + "learning_rate": 5.4090505767524396e-06, + "log_odds_chosen": 0.3801131546497345, + "log_odds_ratio": -0.5231006145477295, + "logits/chosen": -1.0540034770965576, + "logits/rejected": -0.9516990184783936, + "logps/chosen": -1.4297250509262085, + "logps/rejected": -1.727344036102295, + "loss": 2.4712, + "nll_loss": 0.5654802322387695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14297249913215637, + "rewards/margins": 0.02976190857589245, + "rewards/rejected": -0.17273442447185516, + "step": 375 + }, + { + "epoch": 0.991430454845089, + "grad_norm": 9.602299690246582, + "learning_rate": 5.4019520851818985e-06, + "log_odds_chosen": 0.41827696561813354, + "log_odds_ratio": -0.5190730094909668, + "logits/chosen": -1.040710210800171, + "logits/rejected": -0.9790891408920288, + "logps/chosen": -1.4436545372009277, + "logps/rejected": -1.7767916917800903, + "loss": 2.3436, + "nll_loss": 0.533988356590271, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14436544477939606, + "rewards/margins": 0.033313728868961334, + "rewards/rejected": -0.177679181098938, + "step": 376 + }, + { + "epoch": 0.994067237969677, + "grad_norm": 10.112712860107422, + "learning_rate": 5.394853593611357e-06, + "log_odds_chosen": 0.2984105050563812, + "log_odds_ratio": -0.5783449411392212, + "logits/chosen": -1.05629563331604, + "logits/rejected": -0.9793335795402527, + "logps/chosen": -1.4983892440795898, + "logps/rejected": -1.7358847856521606, + "loss": 2.6497, + "nll_loss": 0.6046023368835449, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14983892440795898, + "rewards/margins": 0.023749545216560364, + "rewards/rejected": -0.17358846962451935, + "step": 377 + }, + { + "epoch": 0.996704021094265, + "grad_norm": 10.661310195922852, + "learning_rate": 5.3877551020408154e-06, + "log_odds_chosen": 0.313271164894104, + "log_odds_ratio": -0.5534718632698059, + "logits/chosen": -1.0316417217254639, + "logits/rejected": -0.9664131999015808, + "logps/chosen": -1.574042797088623, + "logps/rejected": -1.8318827152252197, + "loss": 2.6505, + "nll_loss": 0.6072766184806824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15740427374839783, + "rewards/margins": 0.025783995166420937, + "rewards/rejected": -0.18318825960159302, + "step": 378 + }, + { + "epoch": 0.999340804218853, + "grad_norm": 10.49639892578125, + "learning_rate": 5.380656610470275e-06, + "log_odds_chosen": 0.4876595437526703, + "log_odds_ratio": -0.48396334052085876, + "logits/chosen": -1.0692003965377808, + "logits/rejected": -0.942592978477478, + "logps/chosen": -1.5492660999298096, + "logps/rejected": -1.9446545839309692, + "loss": 2.7173, + "nll_loss": 0.6309238076210022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.154926598072052, + "rewards/margins": 0.03953886032104492, + "rewards/rejected": -0.19446545839309692, + "step": 379 + }, + { + "epoch": 1.0019775873434411, + "grad_norm": 11.285173416137695, + "learning_rate": 5.373558118899733e-06, + "log_odds_chosen": 0.42649227380752563, + "log_odds_ratio": -0.51619553565979, + "logits/chosen": -1.1701511144638062, + "logits/rejected": -1.0790563821792603, + "logps/chosen": -1.349685788154602, + "logps/rejected": -1.6809329986572266, + "loss": 2.7709, + "nll_loss": 0.641103982925415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1349685788154602, + "rewards/margins": 0.03312472254037857, + "rewards/rejected": -0.16809332370758057, + "step": 380 + }, + { + "epoch": 1.004614370468029, + "grad_norm": 9.188010215759277, + "learning_rate": 5.366459627329193e-06, + "log_odds_chosen": 0.9289355874061584, + "log_odds_ratio": -0.3500681519508362, + "logits/chosen": -1.0177814960479736, + "logits/rejected": -0.8898583650588989, + "logps/chosen": -1.3462495803833008, + "logps/rejected": -2.111600399017334, + "loss": 2.2798, + "nll_loss": 0.5349517464637756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13462495803833008, + "rewards/margins": 0.07653509080410004, + "rewards/rejected": -0.21116004884243011, + "step": 381 + }, + { + "epoch": 1.007251153592617, + "grad_norm": 10.165288925170898, + "learning_rate": 5.359361135758651e-06, + "log_odds_chosen": 0.3176172375679016, + "log_odds_ratio": -0.5604002475738525, + "logits/chosen": -0.9797457456588745, + "logits/rejected": -0.9105274677276611, + "logps/chosen": -1.435107946395874, + "logps/rejected": -1.694528341293335, + "loss": 2.5792, + "nll_loss": 0.5887516140937805, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14351078867912292, + "rewards/margins": 0.025942042469978333, + "rewards/rejected": -0.16945281624794006, + "step": 382 + }, + { + "epoch": 1.0098879367172051, + "grad_norm": 10.067802429199219, + "learning_rate": 5.352262644188109e-06, + "log_odds_chosen": 0.3345809578895569, + "log_odds_ratio": -0.5521736741065979, + "logits/chosen": -1.121835708618164, + "logits/rejected": -1.015265703201294, + "logps/chosen": -1.5804250240325928, + "logps/rejected": -1.8588757514953613, + "loss": 2.8837, + "nll_loss": 0.6657131910324097, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15804249048233032, + "rewards/margins": 0.027845071628689766, + "rewards/rejected": -0.18588756024837494, + "step": 383 + }, + { + "epoch": 1.012524719841793, + "grad_norm": 9.84504508972168, + "learning_rate": 5.345164152617569e-06, + "log_odds_chosen": 0.6527172327041626, + "log_odds_ratio": -0.4251009225845337, + "logits/chosen": -1.0640441179275513, + "logits/rejected": -0.9334746599197388, + "logps/chosen": -1.3364887237548828, + "logps/rejected": -1.8548702001571655, + "loss": 2.3608, + "nll_loss": 0.5476840734481812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13364887237548828, + "rewards/margins": 0.05183815956115723, + "rewards/rejected": -0.1854870468378067, + "step": 384 + }, + { + "epoch": 1.015161502966381, + "grad_norm": 9.216593742370605, + "learning_rate": 5.338065661047027e-06, + "log_odds_chosen": 0.7035154104232788, + "log_odds_ratio": -0.43145692348480225, + "logits/chosen": -1.049159288406372, + "logits/rejected": -0.9552618861198425, + "logps/chosen": -1.4319472312927246, + "logps/rejected": -2.0070128440856934, + "loss": 2.2116, + "nll_loss": 0.5097614526748657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14319473505020142, + "rewards/margins": 0.05750656872987747, + "rewards/rejected": -0.2007012814283371, + "step": 385 + }, + { + "epoch": 1.0177982860909691, + "grad_norm": 10.160662651062012, + "learning_rate": 5.330967169476486e-06, + "log_odds_chosen": 0.41731148958206177, + "log_odds_ratio": -0.5152519345283508, + "logits/chosen": -1.0017313957214355, + "logits/rejected": -0.8921623826026917, + "logps/chosen": -1.5558286905288696, + "logps/rejected": -1.9011025428771973, + "loss": 2.7231, + "nll_loss": 0.629248321056366, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15558286011219025, + "rewards/margins": 0.03452739492058754, + "rewards/rejected": -0.19011026620864868, + "step": 386 + }, + { + "epoch": 1.020435069215557, + "grad_norm": 9.78851318359375, + "learning_rate": 5.323868677905945e-06, + "log_odds_chosen": 0.28756850957870483, + "log_odds_ratio": -0.5681681036949158, + "logits/chosen": -1.0233585834503174, + "logits/rejected": -0.9709876775741577, + "logps/chosen": -1.409879446029663, + "logps/rejected": -1.637386441230774, + "loss": 2.1324, + "nll_loss": 0.47629034519195557, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14098794758319855, + "rewards/margins": 0.022750694304704666, + "rewards/rejected": -0.16373863816261292, + "step": 387 + }, + { + "epoch": 1.023071852340145, + "grad_norm": 9.475854873657227, + "learning_rate": 5.3167701863354036e-06, + "log_odds_chosen": 0.43278807401657104, + "log_odds_ratio": -0.5204096436500549, + "logits/chosen": -1.0105952024459839, + "logits/rejected": -0.9548962712287903, + "logps/chosen": -1.4009045362472534, + "logps/rejected": -1.716213583946228, + "loss": 2.1709, + "nll_loss": 0.49069300293922424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1400904655456543, + "rewards/margins": 0.03153090178966522, + "rewards/rejected": -0.17162136733531952, + "step": 388 + }, + { + "epoch": 1.0257086354647331, + "grad_norm": 9.53403377532959, + "learning_rate": 5.3096716947648625e-06, + "log_odds_chosen": 0.5109670162200928, + "log_odds_ratio": -0.47555267810821533, + "logits/chosen": -1.055998682975769, + "logits/rejected": -0.8984580636024475, + "logps/chosen": -1.4810338020324707, + "logps/rejected": -1.8954992294311523, + "loss": 2.4187, + "nll_loss": 0.5571247935295105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14810338616371155, + "rewards/margins": 0.041446536779403687, + "rewards/rejected": -0.18954992294311523, + "step": 389 + }, + { + "epoch": 1.028345418589321, + "grad_norm": 10.100629806518555, + "learning_rate": 5.302573203194321e-06, + "log_odds_chosen": 0.3726976811885834, + "log_odds_ratio": -0.5383884906768799, + "logits/chosen": -1.140406847000122, + "logits/rejected": -1.0378773212432861, + "logps/chosen": -1.490544319152832, + "logps/rejected": -1.7971141338348389, + "loss": 2.7248, + "nll_loss": 0.6273695826530457, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14905443787574768, + "rewards/margins": 0.03065699152648449, + "rewards/rejected": -0.17971143126487732, + "step": 390 + }, + { + "epoch": 1.030982201713909, + "grad_norm": 10.351038932800293, + "learning_rate": 5.2954747116237794e-06, + "log_odds_chosen": 0.4708203375339508, + "log_odds_ratio": -0.49839675426483154, + "logits/chosen": -1.0792633295059204, + "logits/rejected": -0.9910233020782471, + "logps/chosen": -1.5079857110977173, + "logps/rejected": -1.898199200630188, + "loss": 2.6497, + "nll_loss": 0.6125774383544922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15079857409000397, + "rewards/margins": 0.03902135044336319, + "rewards/rejected": -0.18981991708278656, + "step": 391 + }, + { + "epoch": 1.0336189848384971, + "grad_norm": 10.037555694580078, + "learning_rate": 5.288376220053238e-06, + "log_odds_chosen": 0.3636520802974701, + "log_odds_ratio": -0.5394154787063599, + "logits/chosen": -1.0570249557495117, + "logits/rejected": -0.9358998537063599, + "logps/chosen": -1.3649983406066895, + "logps/rejected": -1.6455130577087402, + "loss": 2.3773, + "nll_loss": 0.5403822064399719, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13649982213974, + "rewards/margins": 0.028051460161805153, + "rewards/rejected": -0.1645512878894806, + "step": 392 + }, + { + "epoch": 1.036255767963085, + "grad_norm": 10.128984451293945, + "learning_rate": 5.281277728482697e-06, + "log_odds_chosen": 0.25981563329696655, + "log_odds_ratio": -0.578166127204895, + "logits/chosen": -1.1017396450042725, + "logits/rejected": -0.9830251336097717, + "logps/chosen": -1.5177595615386963, + "logps/rejected": -1.7209172248840332, + "loss": 2.8123, + "nll_loss": 0.645270824432373, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15177595615386963, + "rewards/margins": 0.02031576819717884, + "rewards/rejected": -0.17209172248840332, + "step": 393 + }, + { + "epoch": 1.038892551087673, + "grad_norm": 10.736696243286133, + "learning_rate": 5.274179236912155e-06, + "log_odds_chosen": 0.42231547832489014, + "log_odds_ratio": -0.5113197565078735, + "logits/chosen": -1.021410584449768, + "logits/rejected": -0.9038841128349304, + "logps/chosen": -1.6272623538970947, + "logps/rejected": -1.9719369411468506, + "loss": 2.9515, + "nll_loss": 0.6867390871047974, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1627262383699417, + "rewards/margins": 0.03446745127439499, + "rewards/rejected": -0.1971937119960785, + "step": 394 + }, + { + "epoch": 1.0415293342122611, + "grad_norm": 10.150442123413086, + "learning_rate": 5.267080745341615e-06, + "log_odds_chosen": 0.6440749168395996, + "log_odds_ratio": -0.43997421860694885, + "logits/chosen": -0.9821280837059021, + "logits/rejected": -0.9708842039108276, + "logps/chosen": -1.400512933731079, + "logps/rejected": -1.8895323276519775, + "loss": 2.4461, + "nll_loss": 0.5675250887870789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14005127549171448, + "rewards/margins": 0.04890194535255432, + "rewards/rejected": -0.18895323574543, + "step": 395 + }, + { + "epoch": 1.044166117336849, + "grad_norm": 9.373574256896973, + "learning_rate": 5.259982253771073e-06, + "log_odds_chosen": 0.5626580119132996, + "log_odds_ratio": -0.4574100077152252, + "logits/chosen": -1.0475770235061646, + "logits/rejected": -1.0139124393463135, + "logps/chosen": -1.2695597410202026, + "logps/rejected": -1.6981005668640137, + "loss": 2.0175, + "nll_loss": 0.45862582325935364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12695598602294922, + "rewards/margins": 0.04285407438874245, + "rewards/rejected": -0.16981005668640137, + "step": 396 + }, + { + "epoch": 1.046802900461437, + "grad_norm": 9.228339195251465, + "learning_rate": 5.252883762200533e-06, + "log_odds_chosen": 0.4349876046180725, + "log_odds_ratio": -0.5105225443840027, + "logits/chosen": -1.016300916671753, + "logits/rejected": -0.9554897546768188, + "logps/chosen": -1.288040280342102, + "logps/rejected": -1.6193768978118896, + "loss": 2.2641, + "nll_loss": 0.5149763822555542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1288040280342102, + "rewards/margins": 0.033133648335933685, + "rewards/rejected": -0.1619376838207245, + "step": 397 + }, + { + "epoch": 1.0494396835860251, + "grad_norm": 9.794448852539062, + "learning_rate": 5.245785270629991e-06, + "log_odds_chosen": 0.28050416707992554, + "log_odds_ratio": -0.5663173198699951, + "logits/chosen": -1.0508898496627808, + "logits/rejected": -0.9707998037338257, + "logps/chosen": -1.4081400632858276, + "logps/rejected": -1.6302982568740845, + "loss": 2.4217, + "nll_loss": 0.5487897396087646, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14081400632858276, + "rewards/margins": 0.0222158245742321, + "rewards/rejected": -0.16302983462810516, + "step": 398 + }, + { + "epoch": 1.052076466710613, + "grad_norm": 9.469202041625977, + "learning_rate": 5.23868677905945e-06, + "log_odds_chosen": 0.5130904912948608, + "log_odds_ratio": -0.5062092542648315, + "logits/chosen": -0.992918074131012, + "logits/rejected": -0.9598261117935181, + "logps/chosen": -1.3223826885223389, + "logps/rejected": -1.7184679508209229, + "loss": 2.0811, + "nll_loss": 0.46966129541397095, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1322382688522339, + "rewards/margins": 0.039608534425497055, + "rewards/rejected": -0.17184680700302124, + "step": 399 + }, + { + "epoch": 1.054713249835201, + "grad_norm": 9.868070602416992, + "learning_rate": 5.231588287488909e-06, + "log_odds_chosen": 0.3694670796394348, + "log_odds_ratio": -0.5339500904083252, + "logits/chosen": -1.092237949371338, + "logits/rejected": -0.9644882678985596, + "logps/chosen": -1.4973866939544678, + "logps/rejected": -1.7971856594085693, + "loss": 2.905, + "nll_loss": 0.6728647351264954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14973866939544678, + "rewards/margins": 0.029979918152093887, + "rewards/rejected": -0.17971858382225037, + "step": 400 + }, + { + "epoch": 1.0573500329597891, + "grad_norm": 10.309577941894531, + "learning_rate": 5.224489795918367e-06, + "log_odds_chosen": 0.32514360547065735, + "log_odds_ratio": -0.5523920059204102, + "logits/chosen": -1.0738779306411743, + "logits/rejected": -1.0128540992736816, + "logps/chosen": -1.4930551052093506, + "logps/rejected": -1.7555242776870728, + "loss": 2.9088, + "nll_loss": 0.6719701886177063, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.149305522441864, + "rewards/margins": 0.026246918365359306, + "rewards/rejected": -0.17555244266986847, + "step": 401 + }, + { + "epoch": 1.059986816084377, + "grad_norm": 10.44379711151123, + "learning_rate": 5.217391304347826e-06, + "log_odds_chosen": 0.33604544401168823, + "log_odds_ratio": -0.5441054105758667, + "logits/chosen": -0.9352221488952637, + "logits/rejected": -0.8872804641723633, + "logps/chosen": -1.3509907722473145, + "logps/rejected": -1.5996346473693848, + "loss": 2.6763, + "nll_loss": 0.6146520972251892, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13509908318519592, + "rewards/margins": 0.024864397943019867, + "rewards/rejected": -0.1599634736776352, + "step": 402 + }, + { + "epoch": 1.062623599208965, + "grad_norm": 10.469642639160156, + "learning_rate": 5.2102928127772845e-06, + "log_odds_chosen": 0.1823788732290268, + "log_odds_ratio": -0.6198056936264038, + "logits/chosen": -1.0478174686431885, + "logits/rejected": -0.9866634607315063, + "logps/chosen": -1.7168128490447998, + "logps/rejected": -1.8612220287322998, + "loss": 2.7923, + "nll_loss": 0.636086642742157, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17168128490447998, + "rewards/margins": 0.014440920203924179, + "rewards/rejected": -0.18612220883369446, + "step": 403 + }, + { + "epoch": 1.0652603823335531, + "grad_norm": 9.132431030273438, + "learning_rate": 5.2031943212067434e-06, + "log_odds_chosen": 0.4466651380062103, + "log_odds_ratio": -0.5053660273551941, + "logits/chosen": -1.0298335552215576, + "logits/rejected": -0.9548412561416626, + "logps/chosen": -1.1939764022827148, + "logps/rejected": -1.5169267654418945, + "loss": 1.7979, + "nll_loss": 0.3989323377609253, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11939764022827148, + "rewards/margins": 0.03229503333568573, + "rewards/rejected": -0.15169267356395721, + "step": 404 + }, + { + "epoch": 1.067897165458141, + "grad_norm": 10.261722564697266, + "learning_rate": 5.1960958296362015e-06, + "log_odds_chosen": 0.3869773745536804, + "log_odds_ratio": -0.5333517789840698, + "logits/chosen": -1.0374103784561157, + "logits/rejected": -0.973752498626709, + "logps/chosen": -1.4903892278671265, + "logps/rejected": -1.7808723449707031, + "loss": 2.5851, + "nll_loss": 0.5929421782493591, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14903892576694489, + "rewards/margins": 0.029048318043351173, + "rewards/rejected": -0.1780872642993927, + "step": 405 + }, + { + "epoch": 1.070533948582729, + "grad_norm": 10.340511322021484, + "learning_rate": 5.188997338065661e-06, + "log_odds_chosen": 0.44721418619155884, + "log_odds_ratio": -0.5180215835571289, + "logits/chosen": -1.0950462818145752, + "logits/rejected": -1.0476313829421997, + "logps/chosen": -1.402875542640686, + "logps/rejected": -1.748979926109314, + "loss": 2.78, + "nll_loss": 0.6431941390037537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14028754830360413, + "rewards/margins": 0.03461045026779175, + "rewards/rejected": -0.17489799857139587, + "step": 406 + }, + { + "epoch": 1.0731707317073171, + "grad_norm": 9.088520050048828, + "learning_rate": 5.181898846495119e-06, + "log_odds_chosen": 0.4929713010787964, + "log_odds_ratio": -0.48062002658843994, + "logits/chosen": -0.9891932606697083, + "logits/rejected": -0.9239880442619324, + "logps/chosen": -1.1858923435211182, + "logps/rejected": -1.5480952262878418, + "loss": 1.8229, + "nll_loss": 0.4076550602912903, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11858922988176346, + "rewards/margins": 0.03622030094265938, + "rewards/rejected": -0.15480953454971313, + "step": 407 + }, + { + "epoch": 1.075807514831905, + "grad_norm": 10.639555931091309, + "learning_rate": 5.174800354924579e-06, + "log_odds_chosen": 0.3192574977874756, + "log_odds_ratio": -0.5502256155014038, + "logits/chosen": -1.108659029006958, + "logits/rejected": -1.0146270990371704, + "logps/chosen": -1.526962161064148, + "logps/rejected": -1.7815569639205933, + "loss": 2.7723, + "nll_loss": 0.6380521655082703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15269622206687927, + "rewards/margins": 0.025459475815296173, + "rewards/rejected": -0.17815569043159485, + "step": 408 + }, + { + "epoch": 1.078444297956493, + "grad_norm": 9.81585693359375, + "learning_rate": 5.167701863354037e-06, + "log_odds_chosen": 0.5805134177207947, + "log_odds_ratio": -0.45596566796302795, + "logits/chosen": -1.0219948291778564, + "logits/rejected": -0.9056349396705627, + "logps/chosen": -1.3104722499847412, + "logps/rejected": -1.7477952241897583, + "loss": 2.2402, + "nll_loss": 0.5144590735435486, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13104721903800964, + "rewards/margins": 0.04373229295015335, + "rewards/rejected": -0.17477953433990479, + "step": 409 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 9.126127243041992, + "learning_rate": 5.160603371783495e-06, + "log_odds_chosen": 0.7718862891197205, + "log_odds_ratio": -0.4180232286453247, + "logits/chosen": -1.0102792978286743, + "logits/rejected": -0.9463008642196655, + "logps/chosen": -1.3646860122680664, + "logps/rejected": -2.0148112773895264, + "loss": 1.9482, + "nll_loss": 0.4452366232872009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13646861910820007, + "rewards/margins": 0.06501252204179764, + "rewards/rejected": -0.20148113369941711, + "step": 410 + }, + { + "epoch": 1.083717864205669, + "grad_norm": 9.693203926086426, + "learning_rate": 5.153504880212955e-06, + "log_odds_chosen": 0.4520252048969269, + "log_odds_ratio": -0.5075293779373169, + "logits/chosen": -1.0716478824615479, + "logits/rejected": -0.9970329999923706, + "logps/chosen": -1.3662054538726807, + "logps/rejected": -1.7278333902359009, + "loss": 2.3542, + "nll_loss": 0.5377950668334961, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13662053644657135, + "rewards/margins": 0.036162812262773514, + "rewards/rejected": -0.17278335988521576, + "step": 411 + }, + { + "epoch": 1.086354647330257, + "grad_norm": 9.425996780395508, + "learning_rate": 5.146406388642413e-06, + "log_odds_chosen": 0.4897967278957367, + "log_odds_ratio": -0.48537391424179077, + "logits/chosen": -1.063400149345398, + "logits/rejected": -0.960479736328125, + "logps/chosen": -1.388871669769287, + "logps/rejected": -1.7831190824508667, + "loss": 2.3116, + "nll_loss": 0.5293655395507812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1388871669769287, + "rewards/margins": 0.03942474350333214, + "rewards/rejected": -0.17831191420555115, + "step": 412 + }, + { + "epoch": 1.0889914304548451, + "grad_norm": 10.309266090393066, + "learning_rate": 5.139307897071872e-06, + "log_odds_chosen": 0.271070271730423, + "log_odds_ratio": -0.5740872025489807, + "logits/chosen": -1.0737278461456299, + "logits/rejected": -0.9297885298728943, + "logps/chosen": -1.3509784936904907, + "logps/rejected": -1.5514028072357178, + "loss": 2.6739, + "nll_loss": 0.6110659837722778, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13509784638881683, + "rewards/margins": 0.020042438060045242, + "rewards/rejected": -0.15514028072357178, + "step": 413 + }, + { + "epoch": 1.091628213579433, + "grad_norm": 9.698198318481445, + "learning_rate": 5.132209405501331e-06, + "log_odds_chosen": 0.40453335642814636, + "log_odds_ratio": -0.5244268774986267, + "logits/chosen": -1.020873785018921, + "logits/rejected": -0.9422011971473694, + "logps/chosen": -1.3256057500839233, + "logps/rejected": -1.645374059677124, + "loss": 2.1306, + "nll_loss": 0.48021090030670166, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1325605809688568, + "rewards/margins": 0.031976841390132904, + "rewards/rejected": -0.16453741490840912, + "step": 414 + }, + { + "epoch": 1.094264996704021, + "grad_norm": 9.672815322875977, + "learning_rate": 5.12511091393079e-06, + "log_odds_chosen": 0.5226827263832092, + "log_odds_ratio": -0.47890496253967285, + "logits/chosen": -1.0159393548965454, + "logits/rejected": -0.957647979259491, + "logps/chosen": -1.376617431640625, + "logps/rejected": -1.7834666967391968, + "loss": 2.3519, + "nll_loss": 0.5400886535644531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13766175508499146, + "rewards/margins": 0.04068492725491524, + "rewards/rejected": -0.1783466637134552, + "step": 415 + }, + { + "epoch": 1.0969017798286091, + "grad_norm": 10.251235961914062, + "learning_rate": 5.118012422360248e-06, + "log_odds_chosen": 0.5625154376029968, + "log_odds_ratio": -0.4570402503013611, + "logits/chosen": -1.0735247135162354, + "logits/rejected": -0.9383422136306763, + "logps/chosen": -1.4990665912628174, + "logps/rejected": -1.9590812921524048, + "loss": 2.7048, + "nll_loss": 0.6304997801780701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14990666508674622, + "rewards/margins": 0.04600147530436516, + "rewards/rejected": -0.19590812921524048, + "step": 416 + }, + { + "epoch": 1.099538562953197, + "grad_norm": 9.985235214233398, + "learning_rate": 5.1109139307897074e-06, + "log_odds_chosen": 0.4486045837402344, + "log_odds_ratio": -0.5067667365074158, + "logits/chosen": -1.0886329412460327, + "logits/rejected": -1.0065972805023193, + "logps/chosen": -1.4825245141983032, + "logps/rejected": -1.8411731719970703, + "loss": 2.6359, + "nll_loss": 0.6083040833473206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1482524573802948, + "rewards/margins": 0.03586485981941223, + "rewards/rejected": -0.18411731719970703, + "step": 417 + }, + { + "epoch": 1.102175346077785, + "grad_norm": 10.900262832641602, + "learning_rate": 5.1038154392191655e-06, + "log_odds_chosen": 0.35260820388793945, + "log_odds_ratio": -0.5387884974479675, + "logits/chosen": -1.152395248413086, + "logits/rejected": -1.0343936681747437, + "logps/chosen": -1.4348869323730469, + "logps/rejected": -1.7173516750335693, + "loss": 2.9281, + "nll_loss": 0.6781498789787292, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14348870515823364, + "rewards/margins": 0.028246475383639336, + "rewards/rejected": -0.17173516750335693, + "step": 418 + }, + { + "epoch": 1.1048121292023731, + "grad_norm": 9.87543773651123, + "learning_rate": 5.096716947648624e-06, + "log_odds_chosen": 0.5148847699165344, + "log_odds_ratio": -0.47807198762893677, + "logits/chosen": -1.1029891967773438, + "logits/rejected": -0.9355456233024597, + "logps/chosen": -1.3388253450393677, + "logps/rejected": -1.7494006156921387, + "loss": 2.4237, + "nll_loss": 0.5581195950508118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.133882537484169, + "rewards/margins": 0.0410575233399868, + "rewards/rejected": -0.1749400645494461, + "step": 419 + }, + { + "epoch": 1.107448912326961, + "grad_norm": 10.111366271972656, + "learning_rate": 5.089618456078083e-06, + "log_odds_chosen": 0.6326606273651123, + "log_odds_ratio": -0.4852484464645386, + "logits/chosen": -1.0890178680419922, + "logits/rejected": -0.988831639289856, + "logps/chosen": -1.4711456298828125, + "logps/rejected": -2.0076844692230225, + "loss": 2.8792, + "nll_loss": 0.6712836623191833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1471145749092102, + "rewards/margins": 0.05365390330553055, + "rewards/rejected": -0.20076845586299896, + "step": 420 + }, + { + "epoch": 1.110085695451549, + "grad_norm": 9.26616096496582, + "learning_rate": 5.082519964507541e-06, + "log_odds_chosen": 0.5602860450744629, + "log_odds_ratio": -0.46852120757102966, + "logits/chosen": -1.087837815284729, + "logits/rejected": -1.003281831741333, + "logps/chosen": -1.235466718673706, + "logps/rejected": -1.6632263660430908, + "loss": 2.1426, + "nll_loss": 0.48879703879356384, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12354665994644165, + "rewards/margins": 0.042775969952344894, + "rewards/rejected": -0.16632264852523804, + "step": 421 + }, + { + "epoch": 1.1127224785761372, + "grad_norm": 10.583718299865723, + "learning_rate": 5.075421472937001e-06, + "log_odds_chosen": 0.35559818148612976, + "log_odds_ratio": -0.5407001376152039, + "logits/chosen": -0.9944896697998047, + "logits/rejected": -0.9455467462539673, + "logps/chosen": -1.3642079830169678, + "logps/rejected": -1.643673062324524, + "loss": 2.3662, + "nll_loss": 0.5374903678894043, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1364208161830902, + "rewards/margins": 0.027946488931775093, + "rewards/rejected": -0.16436730325222015, + "step": 422 + }, + { + "epoch": 1.115359261700725, + "grad_norm": 9.83077335357666, + "learning_rate": 5.068322981366459e-06, + "log_odds_chosen": 0.37923258543014526, + "log_odds_ratio": -0.5372962951660156, + "logits/chosen": -1.0408728122711182, + "logits/rejected": -1.0057107210159302, + "logps/chosen": -1.2983362674713135, + "logps/rejected": -1.584801435470581, + "loss": 2.2054, + "nll_loss": 0.49762964248657227, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1298336386680603, + "rewards/margins": 0.028646504506468773, + "rewards/rejected": -0.15848013758659363, + "step": 423 + }, + { + "epoch": 1.117996044825313, + "grad_norm": 9.340259552001953, + "learning_rate": 5.061224489795918e-06, + "log_odds_chosen": 0.4945228397846222, + "log_odds_ratio": -0.4810750484466553, + "logits/chosen": -0.9996337294578552, + "logits/rejected": -0.9505899548530579, + "logps/chosen": -1.3723878860473633, + "logps/rejected": -1.766256332397461, + "loss": 2.0538, + "nll_loss": 0.46534520387649536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13723880052566528, + "rewards/margins": 0.03938683122396469, + "rewards/rejected": -0.17662563920021057, + "step": 424 + }, + { + "epoch": 1.1206328279499012, + "grad_norm": 9.9264554977417, + "learning_rate": 5.054125998225377e-06, + "log_odds_chosen": 0.23201516270637512, + "log_odds_ratio": -0.5995275378227234, + "logits/chosen": -1.0680444240570068, + "logits/rejected": -1.009355068206787, + "logps/chosen": -1.3498444557189941, + "logps/rejected": -1.5279866456985474, + "loss": 2.3752, + "nll_loss": 0.5338517427444458, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13498443365097046, + "rewards/margins": 0.017814233899116516, + "rewards/rejected": -0.15279868245124817, + "step": 425 + }, + { + "epoch": 1.123269611074489, + "grad_norm": 10.826032638549805, + "learning_rate": 5.047027506654836e-06, + "log_odds_chosen": 0.28324422240257263, + "log_odds_ratio": -0.5778370499610901, + "logits/chosen": -1.1000683307647705, + "logits/rejected": -0.9509692788124084, + "logps/chosen": -1.635487675666809, + "logps/rejected": -1.8743829727172852, + "loss": 3.1423, + "nll_loss": 0.7277827262878418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1635487675666809, + "rewards/margins": 0.023889539763331413, + "rewards/rejected": -0.18743830919265747, + "step": 426 + }, + { + "epoch": 1.125906394199077, + "grad_norm": 9.88503646850586, + "learning_rate": 5.039929015084295e-06, + "log_odds_chosen": 0.6358194351196289, + "log_odds_ratio": -0.4514765739440918, + "logits/chosen": -0.9731795191764832, + "logits/rejected": -0.9241248369216919, + "logps/chosen": -1.6356950998306274, + "logps/rejected": -2.1824707984924316, + "loss": 2.0791, + "nll_loss": 0.4746202826499939, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16356950998306274, + "rewards/margins": 0.05467755347490311, + "rewards/rejected": -0.21824705600738525, + "step": 427 + }, + { + "epoch": 1.1285431773236652, + "grad_norm": 10.21439266204834, + "learning_rate": 5.032830523513753e-06, + "log_odds_chosen": 0.4135708510875702, + "log_odds_ratio": -0.5264768600463867, + "logits/chosen": -1.0862725973129272, + "logits/rejected": -1.0438268184661865, + "logps/chosen": -1.42360520362854, + "logps/rejected": -1.7432069778442383, + "loss": 2.8331, + "nll_loss": 0.6556384563446045, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14236053824424744, + "rewards/margins": 0.031960174441337585, + "rewards/rejected": -0.17432071268558502, + "step": 428 + }, + { + "epoch": 1.131179960448253, + "grad_norm": 10.110530853271484, + "learning_rate": 5.025732031943212e-06, + "log_odds_chosen": 0.3847730755805969, + "log_odds_ratio": -0.5318898558616638, + "logits/chosen": -1.0231150388717651, + "logits/rejected": -0.8973355293273926, + "logps/chosen": -1.4105229377746582, + "logps/rejected": -1.7216198444366455, + "loss": 2.1062, + "nll_loss": 0.4733680486679077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14105229079723358, + "rewards/margins": 0.031109701842069626, + "rewards/rejected": -0.1721619963645935, + "step": 429 + }, + { + "epoch": 1.133816743572841, + "grad_norm": 9.565515518188477, + "learning_rate": 5.018633540372671e-06, + "log_odds_chosen": 0.507100522518158, + "log_odds_ratio": -0.5001344680786133, + "logits/chosen": -1.053093671798706, + "logits/rejected": -1.0269943475723267, + "logps/chosen": -1.353144884109497, + "logps/rejected": -1.7529642581939697, + "loss": 2.6367, + "nll_loss": 0.6091610193252563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13531449437141418, + "rewards/margins": 0.039981938898563385, + "rewards/rejected": -0.17529642581939697, + "step": 430 + }, + { + "epoch": 1.1364535266974292, + "grad_norm": 10.121060371398926, + "learning_rate": 5.0115350488021295e-06, + "log_odds_chosen": 0.4973146617412567, + "log_odds_ratio": -0.4870888590812683, + "logits/chosen": -1.0365742444992065, + "logits/rejected": -0.9439826607704163, + "logps/chosen": -1.552242398262024, + "logps/rejected": -1.9457755088806152, + "loss": 2.5115, + "nll_loss": 0.5791730284690857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1552242487668991, + "rewards/margins": 0.039353299885988235, + "rewards/rejected": -0.19457754492759705, + "step": 431 + }, + { + "epoch": 1.139090309822017, + "grad_norm": 10.338253021240234, + "learning_rate": 5.0044365572315875e-06, + "log_odds_chosen": 0.5865770578384399, + "log_odds_ratio": -0.5449586510658264, + "logits/chosen": -0.9849517941474915, + "logits/rejected": -0.8847633600234985, + "logps/chosen": -1.4991849660873413, + "logps/rejected": -2.0054585933685303, + "loss": 2.8563, + "nll_loss": 0.6595792770385742, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14991851150989532, + "rewards/margins": 0.05062737315893173, + "rewards/rejected": -0.20054587721824646, + "step": 432 + }, + { + "epoch": 1.1417270929466052, + "grad_norm": 9.96427059173584, + "learning_rate": 4.997338065661047e-06, + "log_odds_chosen": 0.4820500910282135, + "log_odds_ratio": -0.49063819646835327, + "logits/chosen": -1.0988550186157227, + "logits/rejected": -0.9819570183753967, + "logps/chosen": -1.3548251390457153, + "logps/rejected": -1.744004487991333, + "loss": 2.5723, + "nll_loss": 0.5940203666687012, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13548250496387482, + "rewards/margins": 0.03891792893409729, + "rewards/rejected": -0.1744004487991333, + "step": 433 + }, + { + "epoch": 1.1443638760711932, + "grad_norm": 10.090310096740723, + "learning_rate": 4.990239574090505e-06, + "log_odds_chosen": 0.4672353267669678, + "log_odds_ratio": -0.4982440173625946, + "logits/chosen": -1.1151236295700073, + "logits/rejected": -1.002767562866211, + "logps/chosen": -1.3489617109298706, + "logps/rejected": -1.7005971670150757, + "loss": 2.4581, + "nll_loss": 0.564690351486206, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1348961740732193, + "rewards/margins": 0.035163555294275284, + "rewards/rejected": -0.17005972564220428, + "step": 434 + }, + { + "epoch": 1.147000659195781, + "grad_norm": 10.10417652130127, + "learning_rate": 4.983141082519965e-06, + "log_odds_chosen": 0.4814032316207886, + "log_odds_ratio": -0.5029208064079285, + "logits/chosen": -1.0239670276641846, + "logits/rejected": -0.9829295873641968, + "logps/chosen": -1.3184508085250854, + "logps/rejected": -1.6729843616485596, + "loss": 2.0892, + "nll_loss": 0.4720097780227661, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13184508681297302, + "rewards/margins": 0.035453349351882935, + "rewards/rejected": -0.16729843616485596, + "step": 435 + }, + { + "epoch": 1.1496374423203692, + "grad_norm": 9.528637886047363, + "learning_rate": 4.976042590949423e-06, + "log_odds_chosen": 0.23049712181091309, + "log_odds_ratio": -0.5938129425048828, + "logits/chosen": -1.0522797107696533, + "logits/rejected": -0.9975440502166748, + "logps/chosen": -1.4030239582061768, + "logps/rejected": -1.583653450012207, + "loss": 2.2764, + "nll_loss": 0.5097287893295288, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1403023898601532, + "rewards/margins": 0.01806293986737728, + "rewards/rejected": -0.15836533904075623, + "step": 436 + }, + { + "epoch": 1.1522742254449572, + "grad_norm": 10.909761428833008, + "learning_rate": 4.968944099378881e-06, + "log_odds_chosen": 0.0930374264717102, + "log_odds_ratio": -0.6607744693756104, + "logits/chosen": -0.9617547392845154, + "logits/rejected": -0.9185218811035156, + "logps/chosen": -1.7329206466674805, + "logps/rejected": -1.8154338598251343, + "loss": 3.0056, + "nll_loss": 0.6853236556053162, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.17329205572605133, + "rewards/margins": 0.008251333609223366, + "rewards/rejected": -0.18154339492321014, + "step": 437 + }, + { + "epoch": 1.154911008569545, + "grad_norm": 9.53764820098877, + "learning_rate": 4.961845607808341e-06, + "log_odds_chosen": 0.6326797604560852, + "log_odds_ratio": -0.438091516494751, + "logits/chosen": -1.0544040203094482, + "logits/rejected": -0.9387926459312439, + "logps/chosen": -1.3823257684707642, + "logps/rejected": -1.8888732194900513, + "loss": 2.1051, + "nll_loss": 0.48247024416923523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13823257386684418, + "rewards/margins": 0.05065474659204483, + "rewards/rejected": -0.1888873279094696, + "step": 438 + }, + { + "epoch": 1.1575477916941332, + "grad_norm": 10.221098899841309, + "learning_rate": 4.954747116237799e-06, + "log_odds_chosen": 0.25028908252716064, + "log_odds_ratio": -0.5832465887069702, + "logits/chosen": -1.045633316040039, + "logits/rejected": -1.0008597373962402, + "logps/chosen": -1.2798808813095093, + "logps/rejected": -1.4623150825500488, + "loss": 2.5023, + "nll_loss": 0.5672391057014465, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12798810005187988, + "rewards/margins": 0.018243417143821716, + "rewards/rejected": -0.1462315171957016, + "step": 439 + }, + { + "epoch": 1.1601845748187212, + "grad_norm": 9.680135726928711, + "learning_rate": 4.947648624667258e-06, + "log_odds_chosen": 0.7590002417564392, + "log_odds_ratio": -0.40648603439331055, + "logits/chosen": -1.078658103942871, + "logits/rejected": -0.9909886121749878, + "logps/chosen": -1.2903246879577637, + "logps/rejected": -1.8791279792785645, + "loss": 2.0585, + "nll_loss": 0.47397562861442566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12903249263763428, + "rewards/margins": 0.05888032540678978, + "rewards/rejected": -0.18791279196739197, + "step": 440 + }, + { + "epoch": 1.162821357943309, + "grad_norm": 9.91891098022461, + "learning_rate": 4.940550133096717e-06, + "log_odds_chosen": 0.4134746193885803, + "log_odds_ratio": -0.5270575284957886, + "logits/chosen": -1.071434497833252, + "logits/rejected": -0.9813169240951538, + "logps/chosen": -1.4695409536361694, + "logps/rejected": -1.8117649555206299, + "loss": 2.4518, + "nll_loss": 0.5602436661720276, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14695410430431366, + "rewards/margins": 0.03422239422798157, + "rewards/rejected": -0.18117651343345642, + "step": 441 + }, + { + "epoch": 1.1654581410678972, + "grad_norm": 9.274177551269531, + "learning_rate": 4.933451641526176e-06, + "log_odds_chosen": 0.5152239203453064, + "log_odds_ratio": -0.4806838631629944, + "logits/chosen": -1.0709270238876343, + "logits/rejected": -1.0069520473480225, + "logps/chosen": -1.366707444190979, + "logps/rejected": -1.7815189361572266, + "loss": 2.2684, + "nll_loss": 0.5190252661705017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13667075335979462, + "rewards/margins": 0.0414811447262764, + "rewards/rejected": -0.17815189063549042, + "step": 442 + }, + { + "epoch": 1.1680949241924852, + "grad_norm": 9.643570899963379, + "learning_rate": 4.926353149955634e-06, + "log_odds_chosen": 0.3073192834854126, + "log_odds_ratio": -0.565639078617096, + "logits/chosen": -1.0718648433685303, + "logits/rejected": -1.0097239017486572, + "logps/chosen": -1.4407001733779907, + "logps/rejected": -1.6975566148757935, + "loss": 2.3838, + "nll_loss": 0.5393775701522827, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14407002925872803, + "rewards/margins": 0.02568562515079975, + "rewards/rejected": -0.16975563764572144, + "step": 443 + }, + { + "epoch": 1.170731707317073, + "grad_norm": 9.919586181640625, + "learning_rate": 4.9192546583850935e-06, + "log_odds_chosen": 0.2999556362628937, + "log_odds_ratio": -0.5668871998786926, + "logits/chosen": -1.0565747022628784, + "logits/rejected": -0.9812292456626892, + "logps/chosen": -1.3061429262161255, + "logps/rejected": -1.5474501848220825, + "loss": 2.1788, + "nll_loss": 0.4880080819129944, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1306142956018448, + "rewards/margins": 0.02413073554635048, + "rewards/rejected": -0.15474501252174377, + "step": 444 + }, + { + "epoch": 1.1733684904416612, + "grad_norm": 9.811331748962402, + "learning_rate": 4.9121561668145515e-06, + "log_odds_chosen": 0.3747047781944275, + "log_odds_ratio": -0.5285030603408813, + "logits/chosen": -1.0548806190490723, + "logits/rejected": -0.9971832036972046, + "logps/chosen": -1.227198600769043, + "logps/rejected": -1.4902523756027222, + "loss": 2.1844, + "nll_loss": 0.49325278401374817, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12271985411643982, + "rewards/margins": 0.02630537748336792, + "rewards/rejected": -0.14902523159980774, + "step": 445 + }, + { + "epoch": 1.1760052735662492, + "grad_norm": 9.10595703125, + "learning_rate": 4.9050576752440104e-06, + "log_odds_chosen": 0.619392991065979, + "log_odds_ratio": -0.454559326171875, + "logits/chosen": -1.030524492263794, + "logits/rejected": -0.9345372915267944, + "logps/chosen": -1.3098779916763306, + "logps/rejected": -1.8098673820495605, + "loss": 1.9186, + "nll_loss": 0.4341898560523987, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13098779320716858, + "rewards/margins": 0.049998946487903595, + "rewards/rejected": -0.18098673224449158, + "step": 446 + }, + { + "epoch": 1.178642056690837, + "grad_norm": 10.539815902709961, + "learning_rate": 4.897959183673469e-06, + "log_odds_chosen": 0.23752687871456146, + "log_odds_ratio": -0.5838688611984253, + "logits/chosen": -1.1413980722427368, + "logits/rejected": -1.022702932357788, + "logps/chosen": -1.532981038093567, + "logps/rejected": -1.7253105640411377, + "loss": 3.512, + "nll_loss": 0.8196170330047607, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15329810976982117, + "rewards/margins": 0.019232943654060364, + "rewards/rejected": -0.17253105342388153, + "step": 447 + }, + { + "epoch": 1.1812788398154253, + "grad_norm": 9.152738571166992, + "learning_rate": 4.890860692102927e-06, + "log_odds_chosen": 0.817240834236145, + "log_odds_ratio": -0.4035162329673767, + "logits/chosen": -0.9740374088287354, + "logits/rejected": -0.8783177733421326, + "logps/chosen": -1.3137390613555908, + "logps/rejected": -1.9894474744796753, + "loss": 1.8385, + "nll_loss": 0.4192817807197571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13137391209602356, + "rewards/margins": 0.06757082790136337, + "rewards/rejected": -0.19894473254680634, + "step": 448 + }, + { + "epoch": 1.1839156229400132, + "grad_norm": 8.939960479736328, + "learning_rate": 4.883762200532387e-06, + "log_odds_chosen": 0.3687882721424103, + "log_odds_ratio": -0.5365138053894043, + "logits/chosen": -1.0808924436569214, + "logits/rejected": -0.9705110788345337, + "logps/chosen": -1.3692409992218018, + "logps/rejected": -1.6519029140472412, + "loss": 2.1733, + "nll_loss": 0.4896632432937622, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1369241178035736, + "rewards/margins": 0.0282661821693182, + "rewards/rejected": -0.16519027948379517, + "step": 449 + }, + { + "epoch": 1.186552406064601, + "grad_norm": 10.072639465332031, + "learning_rate": 4.876663708961845e-06, + "log_odds_chosen": 0.6125161051750183, + "log_odds_ratio": -0.4399271011352539, + "logits/chosen": -1.109391689300537, + "logits/rejected": -1.0129214525222778, + "logps/chosen": -1.2192109823226929, + "logps/rejected": -1.678152322769165, + "loss": 2.1898, + "nll_loss": 0.5034641027450562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12192109227180481, + "rewards/margins": 0.045894138514995575, + "rewards/rejected": -0.16781523823738098, + "step": 450 + }, + { + "epoch": 1.1891891891891893, + "grad_norm": 10.272133827209473, + "learning_rate": 4.869565217391304e-06, + "log_odds_chosen": 0.35755234956741333, + "log_odds_ratio": -0.534394383430481, + "logits/chosen": -1.0693304538726807, + "logits/rejected": -0.9832642674446106, + "logps/chosen": -1.6391563415527344, + "logps/rejected": -1.9330048561096191, + "loss": 2.827, + "nll_loss": 0.6533028483390808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16391563415527344, + "rewards/margins": 0.029384873807430267, + "rewards/rejected": -0.1933005005121231, + "step": 451 + }, + { + "epoch": 1.1918259723137772, + "grad_norm": 10.0197172164917, + "learning_rate": 4.862466725820763e-06, + "log_odds_chosen": 0.36595743894577026, + "log_odds_ratio": -0.5484901070594788, + "logits/chosen": -1.0909594297409058, + "logits/rejected": -1.0414979457855225, + "logps/chosen": -1.3381670713424683, + "logps/rejected": -1.585001826286316, + "loss": 2.6636, + "nll_loss": 0.6110493540763855, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1338167041540146, + "rewards/margins": 0.024683479219675064, + "rewards/rejected": -0.15850019454956055, + "step": 452 + }, + { + "epoch": 1.194462755438365, + "grad_norm": 9.387800216674805, + "learning_rate": 4.855368234250222e-06, + "log_odds_chosen": 0.3087214231491089, + "log_odds_ratio": -0.5611314177513123, + "logits/chosen": -1.0786112546920776, + "logits/rejected": -0.9813193082809448, + "logps/chosen": -1.4780585765838623, + "logps/rejected": -1.7218488454818726, + "loss": 2.4138, + "nll_loss": 0.5473390817642212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14780586957931519, + "rewards/margins": 0.02437901869416237, + "rewards/rejected": -0.17218489944934845, + "step": 453 + }, + { + "epoch": 1.1970995385629533, + "grad_norm": 10.184649467468262, + "learning_rate": 4.84826974267968e-06, + "log_odds_chosen": 0.50560462474823, + "log_odds_ratio": -0.4792592525482178, + "logits/chosen": -1.097749948501587, + "logits/rejected": -0.9480170011520386, + "logps/chosen": -1.3219808340072632, + "logps/rejected": -1.7089626789093018, + "loss": 2.4091, + "nll_loss": 0.5543407797813416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13219808042049408, + "rewards/margins": 0.03869818150997162, + "rewards/rejected": -0.1708962619304657, + "step": 454 + }, + { + "epoch": 1.1997363216875412, + "grad_norm": 9.074294090270996, + "learning_rate": 4.841171251109139e-06, + "log_odds_chosen": 0.7959095239639282, + "log_odds_ratio": -0.40639543533325195, + "logits/chosen": -1.0378267765045166, + "logits/rejected": -0.9526659250259399, + "logps/chosen": -1.2382162809371948, + "logps/rejected": -1.829305648803711, + "loss": 2.0053, + "nll_loss": 0.46069228649139404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12382163852453232, + "rewards/margins": 0.059108927845954895, + "rewards/rejected": -0.18293055891990662, + "step": 455 + }, + { + "epoch": 1.2023731048121291, + "grad_norm": 9.499855041503906, + "learning_rate": 4.834072759538598e-06, + "log_odds_chosen": 0.29473090171813965, + "log_odds_ratio": -0.573003888130188, + "logits/chosen": -1.0707342624664307, + "logits/rejected": -0.9860851168632507, + "logps/chosen": -1.314915418624878, + "logps/rejected": -1.5479837656021118, + "loss": 2.3088, + "nll_loss": 0.5199118256568909, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1314915418624878, + "rewards/margins": 0.023306837305426598, + "rewards/rejected": -0.15479837357997894, + "step": 456 + }, + { + "epoch": 1.2050098879367173, + "grad_norm": 9.589157104492188, + "learning_rate": 4.826974267968057e-06, + "log_odds_chosen": 0.2921063303947449, + "log_odds_ratio": -0.5728156566619873, + "logits/chosen": -1.0033105611801147, + "logits/rejected": -0.9454600811004639, + "logps/chosen": -1.4705820083618164, + "logps/rejected": -1.7040565013885498, + "loss": 2.1781, + "nll_loss": 0.48725056648254395, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14705820381641388, + "rewards/margins": 0.023347454145550728, + "rewards/rejected": -0.17040565609931946, + "step": 457 + }, + { + "epoch": 1.2076466710613052, + "grad_norm": 9.195829391479492, + "learning_rate": 4.8198757763975155e-06, + "log_odds_chosen": 0.5113301277160645, + "log_odds_ratio": -0.4782109558582306, + "logits/chosen": -1.0404274463653564, + "logits/rejected": -0.9821817874908447, + "logps/chosen": -1.3258305788040161, + "logps/rejected": -1.7209516763687134, + "loss": 2.0337, + "nll_loss": 0.4606133997440338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13258305191993713, + "rewards/margins": 0.03951210528612137, + "rewards/rejected": -0.1720951795578003, + "step": 458 + }, + { + "epoch": 1.2102834541858931, + "grad_norm": 9.847295761108398, + "learning_rate": 4.812777284826974e-06, + "log_odds_chosen": 0.45984476804733276, + "log_odds_ratio": -0.49745601415634155, + "logits/chosen": -1.033888578414917, + "logits/rejected": -0.9601117372512817, + "logps/chosen": -1.411940574645996, + "logps/rejected": -1.7642412185668945, + "loss": 2.2796, + "nll_loss": 0.5201572775840759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14119404554367065, + "rewards/margins": 0.03523007407784462, + "rewards/rejected": -0.17642413079738617, + "step": 459 + }, + { + "epoch": 1.2129202373104813, + "grad_norm": 9.481552124023438, + "learning_rate": 4.805678793256433e-06, + "log_odds_chosen": 0.2638810873031616, + "log_odds_ratio": -0.5864376425743103, + "logits/chosen": -1.0747355222702026, + "logits/rejected": -1.0247514247894287, + "logps/chosen": -1.3305190801620483, + "logps/rejected": -1.5326287746429443, + "loss": 2.4889, + "nll_loss": 0.5635707974433899, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13305191695690155, + "rewards/margins": 0.02021096646785736, + "rewards/rejected": -0.1532628834247589, + "step": 460 + }, + { + "epoch": 1.2155570204350692, + "grad_norm": 10.324326515197754, + "learning_rate": 4.798580301685891e-06, + "log_odds_chosen": 0.4667387008666992, + "log_odds_ratio": -0.49474701285362244, + "logits/chosen": -1.1680094003677368, + "logits/rejected": -1.090071439743042, + "logps/chosen": -1.333450198173523, + "logps/rejected": -1.7026855945587158, + "loss": 2.6032, + "nll_loss": 0.6013253331184387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13334502279758453, + "rewards/margins": 0.03692355006933212, + "rewards/rejected": -0.17026856541633606, + "step": 461 + }, + { + "epoch": 1.2181938035596573, + "grad_norm": 9.674001693725586, + "learning_rate": 4.79148181011535e-06, + "log_odds_chosen": 0.6859418749809265, + "log_odds_ratio": -0.43002980947494507, + "logits/chosen": -1.090169906616211, + "logits/rejected": -0.9816117286682129, + "logps/chosen": -1.3842506408691406, + "logps/rejected": -1.94362211227417, + "loss": 2.3453, + "nll_loss": 0.5433187484741211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1384250670671463, + "rewards/margins": 0.05593716353178024, + "rewards/rejected": -0.19436222314834595, + "step": 462 + }, + { + "epoch": 1.2208305866842453, + "grad_norm": 10.021893501281738, + "learning_rate": 4.784383318544809e-06, + "log_odds_chosen": 0.34028083086013794, + "log_odds_ratio": -0.5499065518379211, + "logits/chosen": -1.116060733795166, + "logits/rejected": -1.0241994857788086, + "logps/chosen": -1.4120571613311768, + "logps/rejected": -1.6751539707183838, + "loss": 2.6105, + "nll_loss": 0.5976427793502808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14120571315288544, + "rewards/margins": 0.02630968950688839, + "rewards/rejected": -0.16751541197299957, + "step": 463 + }, + { + "epoch": 1.2234673698088332, + "grad_norm": 10.2836332321167, + "learning_rate": 4.777284826974267e-06, + "log_odds_chosen": 0.5319832563400269, + "log_odds_ratio": -0.4687221348285675, + "logits/chosen": -1.099975824356079, + "logits/rejected": -0.9656794667243958, + "logps/chosen": -1.3129050731658936, + "logps/rejected": -1.7177257537841797, + "loss": 2.3185, + "nll_loss": 0.5327426195144653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1312904953956604, + "rewards/margins": 0.040482066571712494, + "rewards/rejected": -0.1717725694179535, + "step": 464 + }, + { + "epoch": 1.2261041529334213, + "grad_norm": 8.89089584350586, + "learning_rate": 4.770186335403727e-06, + "log_odds_chosen": 0.5987759828567505, + "log_odds_ratio": -0.4692104756832123, + "logits/chosen": -1.0071309804916382, + "logits/rejected": -0.9458831548690796, + "logps/chosen": -1.357689619064331, + "logps/rejected": -1.819558024406433, + "loss": 1.733, + "nll_loss": 0.38633984327316284, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13576896488666534, + "rewards/margins": 0.04618682339787483, + "rewards/rejected": -0.18195581436157227, + "step": 465 + }, + { + "epoch": 1.2287409360580093, + "grad_norm": 9.948925018310547, + "learning_rate": 4.763087843833185e-06, + "log_odds_chosen": 0.15255561470985413, + "log_odds_ratio": -0.6316505074501038, + "logits/chosen": -1.0351141691207886, + "logits/rejected": -1.004443645477295, + "logps/chosen": -1.345405101776123, + "logps/rejected": -1.4590325355529785, + "loss": 2.131, + "nll_loss": 0.46959295868873596, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13454051315784454, + "rewards/margins": 0.01136273704469204, + "rewards/rejected": -0.14590325951576233, + "step": 466 + }, + { + "epoch": 1.2313777191825972, + "grad_norm": 10.681402206420898, + "learning_rate": 4.755989352262644e-06, + "log_odds_chosen": 0.3940380811691284, + "log_odds_ratio": -0.5332112312316895, + "logits/chosen": -1.1033565998077393, + "logits/rejected": -1.0111594200134277, + "logps/chosen": -1.4389121532440186, + "logps/rejected": -1.7569591999053955, + "loss": 2.3208, + "nll_loss": 0.5268727540969849, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14389123022556305, + "rewards/margins": 0.031804703176021576, + "rewards/rejected": -0.17569592595100403, + "step": 467 + }, + { + "epoch": 1.2340145023071853, + "grad_norm": 9.829061508178711, + "learning_rate": 4.748890860692103e-06, + "log_odds_chosen": 0.5439039468765259, + "log_odds_ratio": -0.46788734197616577, + "logits/chosen": -1.027842402458191, + "logits/rejected": -0.9599955677986145, + "logps/chosen": -1.3897879123687744, + "logps/rejected": -1.8280632495880127, + "loss": 2.157, + "nll_loss": 0.4924602508544922, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13897879421710968, + "rewards/margins": 0.043827541172504425, + "rewards/rejected": -0.1828063428401947, + "step": 468 + }, + { + "epoch": 1.2366512854317733, + "grad_norm": 10.2400541305542, + "learning_rate": 4.741792369121562e-06, + "log_odds_chosen": 0.26251474022865295, + "log_odds_ratio": -0.5879825353622437, + "logits/chosen": -1.071041464805603, + "logits/rejected": -0.9921205043792725, + "logps/chosen": -1.557490587234497, + "logps/rejected": -1.7682377099990845, + "loss": 2.774, + "nll_loss": 0.6346943378448486, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15574905276298523, + "rewards/margins": 0.021074706688523293, + "rewards/rejected": -0.17682376503944397, + "step": 469 + }, + { + "epoch": 1.2392880685563612, + "grad_norm": 9.739836692810059, + "learning_rate": 4.73469387755102e-06, + "log_odds_chosen": 0.49304506182670593, + "log_odds_ratio": -0.4902867376804352, + "logits/chosen": -1.0478068590164185, + "logits/rejected": -0.988756537437439, + "logps/chosen": -1.3048160076141357, + "logps/rejected": -1.6764576435089111, + "loss": 2.2099, + "nll_loss": 0.5034420490264893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13048160076141357, + "rewards/margins": 0.03716415539383888, + "rewards/rejected": -0.16764578223228455, + "step": 470 + }, + { + "epoch": 1.2419248516809493, + "grad_norm": 10.401447296142578, + "learning_rate": 4.7275953859804795e-06, + "log_odds_chosen": 0.2648758292198181, + "log_odds_ratio": -0.5851804614067078, + "logits/chosen": -1.1387560367584229, + "logits/rejected": -1.0944470167160034, + "logps/chosen": -1.4404828548431396, + "logps/rejected": -1.6441142559051514, + "loss": 2.4325, + "nll_loss": 0.5495949983596802, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.144048273563385, + "rewards/margins": 0.02036314457654953, + "rewards/rejected": -0.16441142559051514, + "step": 471 + }, + { + "epoch": 1.2445616348055373, + "grad_norm": 10.700909614562988, + "learning_rate": 4.720496894409938e-06, + "log_odds_chosen": 0.24711650609970093, + "log_odds_ratio": -0.6107478737831116, + "logits/chosen": -1.0727221965789795, + "logits/rejected": -0.9578004479408264, + "logps/chosen": -1.564539909362793, + "logps/rejected": -1.7778160572052002, + "loss": 2.5579, + "nll_loss": 0.5783965587615967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15645399689674377, + "rewards/margins": 0.021327603608369827, + "rewards/rejected": -0.1777816116809845, + "step": 472 + }, + { + "epoch": 1.2471984179301252, + "grad_norm": 10.717304229736328, + "learning_rate": 4.713398402839396e-06, + "log_odds_chosen": 0.10234677046537399, + "log_odds_ratio": -0.6500293016433716, + "logits/chosen": -1.0790221691131592, + "logits/rejected": -0.99981689453125, + "logps/chosen": -1.481553077697754, + "logps/rejected": -1.548191785812378, + "loss": 2.5536, + "nll_loss": 0.5734033584594727, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1481553167104721, + "rewards/margins": 0.006663870066404343, + "rewards/rejected": -0.15481917560100555, + "step": 473 + }, + { + "epoch": 1.2498352010547134, + "grad_norm": 10.185778617858887, + "learning_rate": 4.706299911268855e-06, + "log_odds_chosen": 0.4353746175765991, + "log_odds_ratio": -0.5056857466697693, + "logits/chosen": -1.0191655158996582, + "logits/rejected": -0.9557269811630249, + "logps/chosen": -1.4184210300445557, + "logps/rejected": -1.7543326616287231, + "loss": 2.1173, + "nll_loss": 0.4787678122520447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14184211194515228, + "rewards/margins": 0.03359116241335869, + "rewards/rejected": -0.17543327808380127, + "step": 474 + }, + { + "epoch": 1.2524719841793013, + "grad_norm": 8.66719913482666, + "learning_rate": 4.6992014196983134e-06, + "log_odds_chosen": 0.351694256067276, + "log_odds_ratio": -0.5478801727294922, + "logits/chosen": -1.0281472206115723, + "logits/rejected": -0.9631309509277344, + "logps/chosen": -1.3119217157363892, + "logps/rejected": -1.575613260269165, + "loss": 1.9327, + "nll_loss": 0.4283771514892578, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1311921775341034, + "rewards/margins": 0.026369158178567886, + "rewards/rejected": -0.1575613170862198, + "step": 475 + }, + { + "epoch": 1.2551087673038892, + "grad_norm": 9.768600463867188, + "learning_rate": 4.692102928127773e-06, + "log_odds_chosen": 0.6341227889060974, + "log_odds_ratio": -0.4367087483406067, + "logits/chosen": -1.022531509399414, + "logits/rejected": -0.9440472722053528, + "logps/chosen": -1.2810771465301514, + "logps/rejected": -1.7700272798538208, + "loss": 1.8679, + "nll_loss": 0.42331641912460327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1281077116727829, + "rewards/margins": 0.04889502748847008, + "rewards/rejected": -0.17700272798538208, + "step": 476 + }, + { + "epoch": 1.2577455504284774, + "grad_norm": 10.173186302185059, + "learning_rate": 4.685004436557231e-06, + "log_odds_chosen": 0.44422340393066406, + "log_odds_ratio": -0.5055138468742371, + "logits/chosen": -1.0871319770812988, + "logits/rejected": -0.9896256327629089, + "logps/chosen": -1.4739582538604736, + "logps/rejected": -1.84141206741333, + "loss": 2.4561, + "nll_loss": 0.5634859800338745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14739583432674408, + "rewards/margins": 0.036745380610227585, + "rewards/rejected": -0.18414120376110077, + "step": 477 + }, + { + "epoch": 1.2603823335530653, + "grad_norm": 9.409655570983887, + "learning_rate": 4.67790594498669e-06, + "log_odds_chosen": 0.8021876811981201, + "log_odds_ratio": -0.430938184261322, + "logits/chosen": -1.0572388172149658, + "logits/rejected": -0.9765225052833557, + "logps/chosen": -1.298008918762207, + "logps/rejected": -1.934680700302124, + "loss": 2.0465, + "nll_loss": 0.46853333711624146, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1298009157180786, + "rewards/margins": 0.0636671632528305, + "rewards/rejected": -0.19346806406974792, + "step": 478 + }, + { + "epoch": 1.2630191166776532, + "grad_norm": 9.278806686401367, + "learning_rate": 4.670807453416149e-06, + "log_odds_chosen": 0.5163518786430359, + "log_odds_ratio": -0.48643603920936584, + "logits/chosen": -1.025752067565918, + "logits/rejected": -0.9585089087486267, + "logps/chosen": -1.2525575160980225, + "logps/rejected": -1.6311428546905518, + "loss": 1.8546, + "nll_loss": 0.41501230001449585, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12525574862957, + "rewards/margins": 0.037858542054891586, + "rewards/rejected": -0.1631142795085907, + "step": 479 + }, + { + "epoch": 1.2656558998022414, + "grad_norm": 10.19020938873291, + "learning_rate": 4.663708961845608e-06, + "log_odds_chosen": 0.4915609061717987, + "log_odds_ratio": -0.49741148948669434, + "logits/chosen": -1.096588373184204, + "logits/rejected": -1.0395936965942383, + "logps/chosen": -1.332758903503418, + "logps/rejected": -1.6852349042892456, + "loss": 2.4374, + "nll_loss": 0.5596209168434143, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13327589631080627, + "rewards/margins": 0.035247594118118286, + "rewards/rejected": -0.16852349042892456, + "step": 480 + }, + { + "epoch": 1.2682926829268293, + "grad_norm": 10.530284881591797, + "learning_rate": 4.656610470275066e-06, + "log_odds_chosen": 0.3427813649177551, + "log_odds_ratio": -0.543682873249054, + "logits/chosen": -1.1346460580825806, + "logits/rejected": -1.0081522464752197, + "logps/chosen": -1.4239239692687988, + "logps/rejected": -1.6867188215255737, + "loss": 2.7272, + "nll_loss": 0.6274286508560181, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14239239692687988, + "rewards/margins": 0.026279501616954803, + "rewards/rejected": -0.1686718761920929, + "step": 481 + }, + { + "epoch": 1.2709294660514172, + "grad_norm": 9.774396896362305, + "learning_rate": 4.649511978704525e-06, + "log_odds_chosen": 0.5521863102912903, + "log_odds_ratio": -0.4603966176509857, + "logits/chosen": -1.0786266326904297, + "logits/rejected": -0.986792802810669, + "logps/chosen": -1.4126559495925903, + "logps/rejected": -1.8508191108703613, + "loss": 2.3706, + "nll_loss": 0.5466091632843018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1412656009197235, + "rewards/margins": 0.043816305696964264, + "rewards/rejected": -0.18508189916610718, + "step": 482 + }, + { + "epoch": 1.2735662491760054, + "grad_norm": 9.518534660339355, + "learning_rate": 4.642413487133984e-06, + "log_odds_chosen": 0.19123250246047974, + "log_odds_ratio": -0.6104004383087158, + "logits/chosen": -1.1052263975143433, + "logits/rejected": -1.0724090337753296, + "logps/chosen": -1.2705307006835938, + "logps/rejected": -1.4086744785308838, + "loss": 2.1503, + "nll_loss": 0.4765278100967407, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12705306708812714, + "rewards/margins": 0.013814376667141914, + "rewards/rejected": -0.1408674567937851, + "step": 483 + }, + { + "epoch": 1.2762030323005933, + "grad_norm": 10.300300598144531, + "learning_rate": 4.635314995563443e-06, + "log_odds_chosen": 1.0402419567108154, + "log_odds_ratio": -0.3782433569431305, + "logits/chosen": -1.122322916984558, + "logits/rejected": -0.9631179571151733, + "logps/chosen": -1.3168840408325195, + "logps/rejected": -2.1896350383758545, + "loss": 2.2423, + "nll_loss": 0.5227570533752441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1316884160041809, + "rewards/margins": 0.08727509528398514, + "rewards/rejected": -0.21896350383758545, + "step": 484 + }, + { + "epoch": 1.2788398154251812, + "grad_norm": 9.907837867736816, + "learning_rate": 4.628216503992902e-06, + "log_odds_chosen": 0.49135133624076843, + "log_odds_ratio": -0.5001718401908875, + "logits/chosen": -1.0758206844329834, + "logits/rejected": -1.0195170640945435, + "logps/chosen": -1.3113082647323608, + "logps/rejected": -1.6820894479751587, + "loss": 2.7119, + "nll_loss": 0.6279576420783997, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13113084435462952, + "rewards/margins": 0.03707811236381531, + "rewards/rejected": -0.16820895671844482, + "step": 485 + }, + { + "epoch": 1.2814765985497694, + "grad_norm": 10.635579109191895, + "learning_rate": 4.62111801242236e-06, + "log_odds_chosen": 0.26804792881011963, + "log_odds_ratio": -0.579971432685852, + "logits/chosen": -1.1183302402496338, + "logits/rejected": -1.033354640007019, + "logps/chosen": -1.4729652404785156, + "logps/rejected": -1.677699327468872, + "loss": 2.7361, + "nll_loss": 0.6260372996330261, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14729654788970947, + "rewards/margins": 0.020473407581448555, + "rewards/rejected": -0.16776993870735168, + "step": 486 + }, + { + "epoch": 1.2841133816743573, + "grad_norm": 10.664183616638184, + "learning_rate": 4.614019520851819e-06, + "log_odds_chosen": 0.4930553436279297, + "log_odds_ratio": -0.4874056875705719, + "logits/chosen": -1.1609282493591309, + "logits/rejected": -0.9723719358444214, + "logps/chosen": -1.4261642694473267, + "logps/rejected": -1.8289806842803955, + "loss": 2.7762, + "nll_loss": 0.6453151702880859, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14261643588542938, + "rewards/margins": 0.040281638503074646, + "rewards/rejected": -0.18289807438850403, + "step": 487 + }, + { + "epoch": 1.2867501647989452, + "grad_norm": 9.715775489807129, + "learning_rate": 4.6069210292812774e-06, + "log_odds_chosen": 0.5041050910949707, + "log_odds_ratio": -0.4901432991027832, + "logits/chosen": -1.1087746620178223, + "logits/rejected": -1.0324528217315674, + "logps/chosen": -1.2873916625976562, + "logps/rejected": -1.6561278104782104, + "loss": 2.1677, + "nll_loss": 0.4929226040840149, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12873917818069458, + "rewards/margins": 0.03687359765172005, + "rewards/rejected": -0.16561277210712433, + "step": 488 + }, + { + "epoch": 1.2893869479235334, + "grad_norm": 9.831827163696289, + "learning_rate": 4.599822537710736e-06, + "log_odds_chosen": 0.4582131505012512, + "log_odds_ratio": -0.49508172273635864, + "logits/chosen": -1.1099534034729004, + "logits/rejected": -1.0009325742721558, + "logps/chosen": -1.3387107849121094, + "logps/rejected": -1.7003153562545776, + "loss": 2.3767, + "nll_loss": 0.544657826423645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13387107849121094, + "rewards/margins": 0.03616045415401459, + "rewards/rejected": -0.17003153264522552, + "step": 489 + }, + { + "epoch": 1.2920237310481213, + "grad_norm": 10.236985206604004, + "learning_rate": 4.592724046140195e-06, + "log_odds_chosen": 0.8142597675323486, + "log_odds_ratio": -0.46944791078567505, + "logits/chosen": -1.032392978668213, + "logits/rejected": -0.9740979075431824, + "logps/chosen": -1.5206587314605713, + "logps/rejected": -2.215409755706787, + "loss": 2.3121, + "nll_loss": 0.531091034412384, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15206588804721832, + "rewards/margins": 0.06947509944438934, + "rewards/rejected": -0.22154098749160767, + "step": 490 + }, + { + "epoch": 1.2946605141727092, + "grad_norm": 10.013334274291992, + "learning_rate": 4.585625554569653e-06, + "log_odds_chosen": 0.37788182497024536, + "log_odds_ratio": -0.5361526012420654, + "logits/chosen": -1.0860071182250977, + "logits/rejected": -1.032589077949524, + "logps/chosen": -1.5222413539886475, + "logps/rejected": -1.8303142786026, + "loss": 2.8472, + "nll_loss": 0.6581736207008362, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1522241234779358, + "rewards/margins": 0.030807292088866234, + "rewards/rejected": -0.18303140997886658, + "step": 491 + }, + { + "epoch": 1.2972972972972974, + "grad_norm": 9.102086067199707, + "learning_rate": 4.578527062999112e-06, + "log_odds_chosen": 0.35535067319869995, + "log_odds_ratio": -0.5479142665863037, + "logits/chosen": -1.008329153060913, + "logits/rejected": -0.9556934833526611, + "logps/chosen": -1.277017593383789, + "logps/rejected": -1.5601518154144287, + "loss": 1.8573, + "nll_loss": 0.4095434248447418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1277017742395401, + "rewards/margins": 0.028313428163528442, + "rewards/rejected": -0.15601518750190735, + "step": 492 + }, + { + "epoch": 1.2999340804218853, + "grad_norm": 9.600390434265137, + "learning_rate": 4.571428571428571e-06, + "log_odds_chosen": 0.5586351752281189, + "log_odds_ratio": -0.4633020758628845, + "logits/chosen": -1.1387877464294434, + "logits/rejected": -1.0374525785446167, + "logps/chosen": -1.492626428604126, + "logps/rejected": -1.9490209817886353, + "loss": 2.5633, + "nll_loss": 0.5945045351982117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14926263689994812, + "rewards/margins": 0.04563945531845093, + "rewards/rejected": -0.19490210711956024, + "step": 493 + }, + { + "epoch": 1.3025708635464732, + "grad_norm": 9.457123756408691, + "learning_rate": 4.56433007985803e-06, + "log_odds_chosen": 0.7423944473266602, + "log_odds_ratio": -0.39905303716659546, + "logits/chosen": -1.1221680641174316, + "logits/rejected": -0.9649347066879272, + "logps/chosen": -1.3206372261047363, + "logps/rejected": -1.889852523803711, + "loss": 2.355, + "nll_loss": 0.5488362312316895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13206373155117035, + "rewards/margins": 0.05692153424024582, + "rewards/rejected": -0.18898525834083557, + "step": 494 + }, + { + "epoch": 1.3052076466710614, + "grad_norm": 10.164525985717773, + "learning_rate": 4.557231588287489e-06, + "log_odds_chosen": 0.4164513349533081, + "log_odds_ratio": -0.5168020725250244, + "logits/chosen": -1.0461890697479248, + "logits/rejected": -0.9742856025695801, + "logps/chosen": -1.4978477954864502, + "logps/rejected": -1.8457560539245605, + "loss": 2.4777, + "nll_loss": 0.5677485466003418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14978477358818054, + "rewards/margins": 0.03479084372520447, + "rewards/rejected": -0.184575617313385, + "step": 495 + }, + { + "epoch": 1.3078444297956493, + "grad_norm": 9.663432121276855, + "learning_rate": 4.550133096716948e-06, + "log_odds_chosen": 0.5467130541801453, + "log_odds_ratio": -0.4778140187263489, + "logits/chosen": -1.1307505369186401, + "logits/rejected": -1.0496280193328857, + "logps/chosen": -1.345214605331421, + "logps/rejected": -1.7747489213943481, + "loss": 2.3803, + "nll_loss": 0.5472964644432068, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1345214694738388, + "rewards/margins": 0.04295342043042183, + "rewards/rejected": -0.17747488617897034, + "step": 496 + }, + { + "epoch": 1.3104812129202372, + "grad_norm": 9.685505867004395, + "learning_rate": 4.543034605146406e-06, + "log_odds_chosen": 0.4372747540473938, + "log_odds_ratio": -0.5114445686340332, + "logits/chosen": -1.1019073724746704, + "logits/rejected": -1.0229694843292236, + "logps/chosen": -1.3307362794876099, + "logps/rejected": -1.665488600730896, + "loss": 2.4608, + "nll_loss": 0.564062774181366, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.133073627948761, + "rewards/margins": 0.033475231379270554, + "rewards/rejected": -0.16654884815216064, + "step": 497 + }, + { + "epoch": 1.3131179960448254, + "grad_norm": 9.91350269317627, + "learning_rate": 4.535936113575866e-06, + "log_odds_chosen": 0.4812414348125458, + "log_odds_ratio": -0.4848937392234802, + "logits/chosen": -1.0916714668273926, + "logits/rejected": -1.0008459091186523, + "logps/chosen": -1.315626859664917, + "logps/rejected": -1.6926066875457764, + "loss": 2.1481, + "nll_loss": 0.48853224515914917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13156269490718842, + "rewards/margins": 0.037697985768318176, + "rewards/rejected": -0.1692606806755066, + "step": 498 + }, + { + "epoch": 1.3157547791694133, + "grad_norm": 10.163434982299805, + "learning_rate": 4.528837622005324e-06, + "log_odds_chosen": 0.37900176644325256, + "log_odds_ratio": -0.5468506217002869, + "logits/chosen": -1.0818631649017334, + "logits/rejected": -0.9458174705505371, + "logps/chosen": -1.6487318277359009, + "logps/rejected": -1.9673023223876953, + "loss": 2.5613, + "nll_loss": 0.5856290459632874, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1648731827735901, + "rewards/margins": 0.03185705095529556, + "rewards/rejected": -0.19673022627830505, + "step": 499 + }, + { + "epoch": 1.3183915622940012, + "grad_norm": 9.899470329284668, + "learning_rate": 4.521739130434782e-06, + "log_odds_chosen": 0.5170084238052368, + "log_odds_ratio": -0.4788874387741089, + "logits/chosen": -1.0836416482925415, + "logits/rejected": -0.9832676649093628, + "logps/chosen": -1.4521634578704834, + "logps/rejected": -1.8645068407058716, + "loss": 2.3863, + "nll_loss": 0.5486769676208496, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14521636068820953, + "rewards/margins": 0.04123431444168091, + "rewards/rejected": -0.18645067512989044, + "step": 500 + }, + { + "epoch": 1.3210283454185894, + "grad_norm": 9.3609037399292, + "learning_rate": 4.5146406388642415e-06, + "log_odds_chosen": 0.529761791229248, + "log_odds_ratio": -0.4753470718860626, + "logits/chosen": -1.024951696395874, + "logits/rejected": -0.975610613822937, + "logps/chosen": -1.1999605894088745, + "logps/rejected": -1.6050573587417603, + "loss": 1.7346, + "nll_loss": 0.3861042857170105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11999605596065521, + "rewards/margins": 0.040509678423404694, + "rewards/rejected": -0.1605057418346405, + "step": 501 + }, + { + "epoch": 1.3236651285431773, + "grad_norm": 10.891241073608398, + "learning_rate": 4.5075421472936995e-06, + "log_odds_chosen": 0.1583474576473236, + "log_odds_ratio": -0.6193140149116516, + "logits/chosen": -1.0535261631011963, + "logits/rejected": -1.0259627103805542, + "logps/chosen": -1.618718147277832, + "logps/rejected": -1.746124029159546, + "loss": 2.7019, + "nll_loss": 0.613548755645752, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16187182068824768, + "rewards/margins": 0.012740583159029484, + "rewards/rejected": -0.1746124029159546, + "step": 502 + }, + { + "epoch": 1.3263019116677652, + "grad_norm": 10.03843879699707, + "learning_rate": 4.500443655723159e-06, + "log_odds_chosen": 0.458172470331192, + "log_odds_ratio": -0.5375922918319702, + "logits/chosen": -1.1176626682281494, + "logits/rejected": -1.050915241241455, + "logps/chosen": -1.3184903860092163, + "logps/rejected": -1.667731523513794, + "loss": 2.4683, + "nll_loss": 0.5633128881454468, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13184905052185059, + "rewards/margins": 0.034924112260341644, + "rewards/rejected": -0.16677314043045044, + "step": 503 + }, + { + "epoch": 1.3289386947923534, + "grad_norm": 9.57889461517334, + "learning_rate": 4.493345164152617e-06, + "log_odds_chosen": 0.4582204818725586, + "log_odds_ratio": -0.5044894218444824, + "logits/chosen": -1.0578508377075195, + "logits/rejected": -0.9902914762496948, + "logps/chosen": -1.4905551671981812, + "logps/rejected": -1.8522902727127075, + "loss": 2.4895, + "nll_loss": 0.5719265937805176, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14905551075935364, + "rewards/margins": 0.03617352247238159, + "rewards/rejected": -0.18522903323173523, + "step": 504 + }, + { + "epoch": 1.3315754779169413, + "grad_norm": 9.206018447875977, + "learning_rate": 4.486246672582076e-06, + "log_odds_chosen": 0.5450261235237122, + "log_odds_ratio": -0.47390875220298767, + "logits/chosen": -1.1009422540664673, + "logits/rejected": -1.0099388360977173, + "logps/chosen": -1.2043448686599731, + "logps/rejected": -1.6163784265518188, + "loss": 2.1674, + "nll_loss": 0.49445146322250366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1204344779253006, + "rewards/margins": 0.04120335727930069, + "rewards/rejected": -0.16163784265518188, + "step": 505 + }, + { + "epoch": 1.3342122610415292, + "grad_norm": 9.322002410888672, + "learning_rate": 4.479148181011535e-06, + "log_odds_chosen": 0.8400002121925354, + "log_odds_ratio": -0.40598559379577637, + "logits/chosen": -1.0018384456634521, + "logits/rejected": -0.8976902961730957, + "logps/chosen": -1.2162275314331055, + "logps/rejected": -1.9030863046646118, + "loss": 1.7841, + "nll_loss": 0.405415415763855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12162275612354279, + "rewards/margins": 0.0686858743429184, + "rewards/rejected": -0.1903086155653, + "step": 506 + }, + { + "epoch": 1.3368490441661174, + "grad_norm": 10.626153945922852, + "learning_rate": 4.472049689440993e-06, + "log_odds_chosen": 0.2626584768295288, + "log_odds_ratio": -0.5733634233474731, + "logits/chosen": -1.1341722011566162, + "logits/rejected": -1.0358843803405762, + "logps/chosen": -1.5009350776672363, + "logps/rejected": -1.711402416229248, + "loss": 2.6765, + "nll_loss": 0.6117795705795288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15009349584579468, + "rewards/margins": 0.021046744659543037, + "rewards/rejected": -0.17114025354385376, + "step": 507 + }, + { + "epoch": 1.3394858272907053, + "grad_norm": 9.762642860412598, + "learning_rate": 4.464951197870452e-06, + "log_odds_chosen": 0.34729743003845215, + "log_odds_ratio": -0.5531666278839111, + "logits/chosen": -1.0909682512283325, + "logits/rejected": -0.999322772026062, + "logps/chosen": -1.3530230522155762, + "logps/rejected": -1.6313226222991943, + "loss": 2.3527, + "nll_loss": 0.5328519344329834, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13530230522155762, + "rewards/margins": 0.027829963713884354, + "rewards/rejected": -0.16313226521015167, + "step": 508 + }, + { + "epoch": 1.3421226104152932, + "grad_norm": 9.921836853027344, + "learning_rate": 4.457852706299911e-06, + "log_odds_chosen": 0.3846244215965271, + "log_odds_ratio": -0.5229079127311707, + "logits/chosen": -1.1582725048065186, + "logits/rejected": -1.0689759254455566, + "logps/chosen": -1.4322659969329834, + "logps/rejected": -1.7391772270202637, + "loss": 2.7252, + "nll_loss": 0.6290191411972046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14322659373283386, + "rewards/margins": 0.0306911189109087, + "rewards/rejected": -0.1739177256822586, + "step": 509 + }, + { + "epoch": 1.3447593935398814, + "grad_norm": 9.485255241394043, + "learning_rate": 4.45075421472937e-06, + "log_odds_chosen": 0.5635192394256592, + "log_odds_ratio": -0.4641314148902893, + "logits/chosen": -1.1391786336898804, + "logits/rejected": -1.0365322828292847, + "logps/chosen": -1.392361044883728, + "logps/rejected": -1.841564416885376, + "loss": 2.4514, + "nll_loss": 0.5664429664611816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13923612236976624, + "rewards/margins": 0.04492032527923584, + "rewards/rejected": -0.1841564178466797, + "step": 510 + }, + { + "epoch": 1.3473961766644693, + "grad_norm": 9.78370189666748, + "learning_rate": 4.443655723158828e-06, + "log_odds_chosen": 0.6639434099197388, + "log_odds_ratio": -0.45992475748062134, + "logits/chosen": -1.0509096384048462, + "logits/rejected": -0.9572964310646057, + "logps/chosen": -1.3928582668304443, + "logps/rejected": -1.928267478942871, + "loss": 2.2199, + "nll_loss": 0.5089808702468872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1392858326435089, + "rewards/margins": 0.053540922701358795, + "rewards/rejected": -0.1928267627954483, + "step": 511 + }, + { + "epoch": 1.3500329597890572, + "grad_norm": 9.828688621520996, + "learning_rate": 4.436557231588288e-06, + "log_odds_chosen": 0.5940926671028137, + "log_odds_ratio": -0.5060535669326782, + "logits/chosen": -1.0177522897720337, + "logits/rejected": -0.9550021886825562, + "logps/chosen": -1.2975274324417114, + "logps/rejected": -1.7164931297302246, + "loss": 2.29, + "nll_loss": 0.5219029188156128, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1297527402639389, + "rewards/margins": 0.041896574199199677, + "rewards/rejected": -0.17164930701255798, + "step": 512 + }, + { + "epoch": 1.3526697429136454, + "grad_norm": 9.234590530395508, + "learning_rate": 4.429458740017746e-06, + "log_odds_chosen": 0.46330970525741577, + "log_odds_ratio": -0.5083400011062622, + "logits/chosen": -1.0437920093536377, + "logits/rejected": -0.9815811514854431, + "logps/chosen": -1.2863333225250244, + "logps/rejected": -1.6402312517166138, + "loss": 2.0998, + "nll_loss": 0.47411662340164185, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1286333203315735, + "rewards/margins": 0.03538981080055237, + "rewards/rejected": -0.16402314603328705, + "step": 513 + }, + { + "epoch": 1.3553065260382333, + "grad_norm": 9.480605125427246, + "learning_rate": 4.4223602484472055e-06, + "log_odds_chosen": 0.4388739764690399, + "log_odds_ratio": -0.5134546756744385, + "logits/chosen": -1.045832872390747, + "logits/rejected": -0.9835963249206543, + "logps/chosen": -1.331667184829712, + "logps/rejected": -1.685315728187561, + "loss": 2.0702, + "nll_loss": 0.46619364619255066, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13316671550273895, + "rewards/margins": 0.03536485508084297, + "rewards/rejected": -0.16853156685829163, + "step": 514 + }, + { + "epoch": 1.3579433091628212, + "grad_norm": 9.261907577514648, + "learning_rate": 4.4152617568766635e-06, + "log_odds_chosen": 0.5002005100250244, + "log_odds_ratio": -0.48809945583343506, + "logits/chosen": -1.0778452157974243, + "logits/rejected": -1.015285611152649, + "logps/chosen": -1.3214576244354248, + "logps/rejected": -1.7165311574935913, + "loss": 2.4748, + "nll_loss": 0.5698955059051514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13214576244354248, + "rewards/margins": 0.03950735554099083, + "rewards/rejected": -0.1716531217098236, + "step": 515 + }, + { + "epoch": 1.3605800922874094, + "grad_norm": 10.901313781738281, + "learning_rate": 4.4081632653061216e-06, + "log_odds_chosen": 0.4575445055961609, + "log_odds_ratio": -0.49447035789489746, + "logits/chosen": -1.136452078819275, + "logits/rejected": -1.0185503959655762, + "logps/chosen": -1.5689027309417725, + "logps/rejected": -1.9473178386688232, + "loss": 2.9467, + "nll_loss": 0.6872245669364929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15689027309417725, + "rewards/margins": 0.03784151002764702, + "rewards/rejected": -0.19473177194595337, + "step": 516 + }, + { + "epoch": 1.3632168754119973, + "grad_norm": 11.190934181213379, + "learning_rate": 4.401064773735581e-06, + "log_odds_chosen": 0.5212029218673706, + "log_odds_ratio": -0.47070425748825073, + "logits/chosen": -1.108872890472412, + "logits/rejected": -1.0137286186218262, + "logps/chosen": -1.5634641647338867, + "logps/rejected": -1.9933662414550781, + "loss": 2.7669, + "nll_loss": 0.6446561813354492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1563464105129242, + "rewards/margins": 0.042990200221538544, + "rewards/rejected": -0.19933661818504333, + "step": 517 + }, + { + "epoch": 1.3658536585365852, + "grad_norm": 9.640596389770508, + "learning_rate": 4.393966282165039e-06, + "log_odds_chosen": 0.3897729218006134, + "log_odds_ratio": -0.5306410193443298, + "logits/chosen": -1.0666706562042236, + "logits/rejected": -1.018998146057129, + "logps/chosen": -1.2988317012786865, + "logps/rejected": -1.5959352254867554, + "loss": 2.1505, + "nll_loss": 0.4845691919326782, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12988317012786865, + "rewards/margins": 0.029710350558161736, + "rewards/rejected": -0.15959352254867554, + "step": 518 + }, + { + "epoch": 1.3684904416611734, + "grad_norm": 9.896682739257812, + "learning_rate": 4.386867790594498e-06, + "log_odds_chosen": 0.422545850276947, + "log_odds_ratio": -0.5287402272224426, + "logits/chosen": -0.9611405730247498, + "logits/rejected": -0.926899790763855, + "logps/chosen": -1.270667552947998, + "logps/rejected": -1.5901453495025635, + "loss": 2.1228, + "nll_loss": 0.4778139591217041, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12706676125526428, + "rewards/margins": 0.031947776675224304, + "rewards/rejected": -0.15901455283164978, + "step": 519 + }, + { + "epoch": 1.3711272247857613, + "grad_norm": 9.610637664794922, + "learning_rate": 4.379769299023957e-06, + "log_odds_chosen": 0.5789295434951782, + "log_odds_ratio": -0.4628753364086151, + "logits/chosen": -1.0623466968536377, + "logits/rejected": -0.9665452837944031, + "logps/chosen": -1.2811853885650635, + "logps/rejected": -1.7341910600662231, + "loss": 2.0906, + "nll_loss": 0.47636866569519043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12811852991580963, + "rewards/margins": 0.04530058056116104, + "rewards/rejected": -0.17341911792755127, + "step": 520 + }, + { + "epoch": 1.3737640079103492, + "grad_norm": 9.621949195861816, + "learning_rate": 4.372670807453416e-06, + "log_odds_chosen": 0.2818030118942261, + "log_odds_ratio": -0.5760889053344727, + "logits/chosen": -1.117762804031372, + "logits/rejected": -1.0882344245910645, + "logps/chosen": -1.3426880836486816, + "logps/rejected": -1.5510759353637695, + "loss": 2.2966, + "nll_loss": 0.5165323615074158, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13426882028579712, + "rewards/margins": 0.020838771015405655, + "rewards/rejected": -0.15510760247707367, + "step": 521 + }, + { + "epoch": 1.3764007910349374, + "grad_norm": 11.08607292175293, + "learning_rate": 4.365572315882875e-06, + "log_odds_chosen": 0.4651279151439667, + "log_odds_ratio": -0.5054472088813782, + "logits/chosen": -1.1419415473937988, + "logits/rejected": -1.0225671529769897, + "logps/chosen": -1.720205545425415, + "logps/rejected": -2.105118989944458, + "loss": 3.356, + "nll_loss": 0.7884448766708374, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1720205545425415, + "rewards/margins": 0.03849135339260101, + "rewards/rejected": -0.2105119228363037, + "step": 522 + }, + { + "epoch": 1.3790375741595253, + "grad_norm": 9.302968978881836, + "learning_rate": 4.358473824312334e-06, + "log_odds_chosen": 0.49724501371383667, + "log_odds_ratio": -0.48611587285995483, + "logits/chosen": -1.0597364902496338, + "logits/rejected": -0.9594701528549194, + "logps/chosen": -1.234031319618225, + "logps/rejected": -1.5941500663757324, + "loss": 2.0146, + "nll_loss": 0.4550420045852661, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1234031394124031, + "rewards/margins": 0.036011867225170135, + "rewards/rejected": -0.15941500663757324, + "step": 523 + }, + { + "epoch": 1.3816743572841133, + "grad_norm": 9.23997688293457, + "learning_rate": 4.351375332741792e-06, + "log_odds_chosen": 0.5455493927001953, + "log_odds_ratio": -0.4641241133213043, + "logits/chosen": -1.0696187019348145, + "logits/rejected": -0.96488356590271, + "logps/chosen": -1.2787021398544312, + "logps/rejected": -1.6961209774017334, + "loss": 1.9613, + "nll_loss": 0.4439108073711395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12787020206451416, + "rewards/margins": 0.04174188897013664, + "rewards/rejected": -0.1696121096611023, + "step": 524 + }, + { + "epoch": 1.3843111404087014, + "grad_norm": 10.52160930633545, + "learning_rate": 4.344276841171251e-06, + "log_odds_chosen": 0.1157563179731369, + "log_odds_ratio": -0.6474640965461731, + "logits/chosen": -1.0222586393356323, + "logits/rejected": -0.9553705453872681, + "logps/chosen": -1.728295087814331, + "logps/rejected": -1.821520209312439, + "loss": 2.8794, + "nll_loss": 0.6551076769828796, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1728295087814331, + "rewards/margins": 0.009322520345449448, + "rewards/rejected": -0.18215203285217285, + "step": 525 + }, + { + "epoch": 1.3869479235332893, + "grad_norm": 8.591838836669922, + "learning_rate": 4.33717834960071e-06, + "log_odds_chosen": 0.7126994132995605, + "log_odds_ratio": -0.4387003481388092, + "logits/chosen": -1.038390874862671, + "logits/rejected": -1.0139986276626587, + "logps/chosen": -1.054718017578125, + "logps/rejected": -1.5185861587524414, + "loss": 1.7589, + "nll_loss": 0.395847350358963, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10547180473804474, + "rewards/margins": 0.04638680815696716, + "rewards/rejected": -0.1518586277961731, + "step": 526 + }, + { + "epoch": 1.3895847066578773, + "grad_norm": 9.613905906677246, + "learning_rate": 4.330079858030168e-06, + "log_odds_chosen": 0.5708151459693909, + "log_odds_ratio": -0.4756304621696472, + "logits/chosen": -1.0896623134613037, + "logits/rejected": -0.9856705665588379, + "logps/chosen": -1.3225669860839844, + "logps/rejected": -1.78826904296875, + "loss": 2.3778, + "nll_loss": 0.5468798279762268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13225671648979187, + "rewards/margins": 0.04657018184661865, + "rewards/rejected": -0.17882689833641052, + "step": 527 + }, + { + "epoch": 1.3922214897824654, + "grad_norm": 8.894762992858887, + "learning_rate": 4.3229813664596275e-06, + "log_odds_chosen": 0.5077429413795471, + "log_odds_ratio": -0.476772665977478, + "logits/chosen": -1.020542860031128, + "logits/rejected": -0.9697209596633911, + "logps/chosen": -1.1626381874084473, + "logps/rejected": -1.54520583152771, + "loss": 1.6572, + "nll_loss": 0.36661040782928467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11626380681991577, + "rewards/margins": 0.03825676441192627, + "rewards/rejected": -0.15452057123184204, + "step": 528 + }, + { + "epoch": 1.3948582729070533, + "grad_norm": 9.532736778259277, + "learning_rate": 4.3158828748890856e-06, + "log_odds_chosen": 0.5159969925880432, + "log_odds_ratio": -0.482917457818985, + "logits/chosen": -0.9628971815109253, + "logits/rejected": -0.9138011336326599, + "logps/chosen": -1.3670532703399658, + "logps/rejected": -1.7700910568237305, + "loss": 2.2592, + "nll_loss": 0.5165103077888489, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13670533895492554, + "rewards/margins": 0.04030377417802811, + "rewards/rejected": -0.17700910568237305, + "step": 529 + }, + { + "epoch": 1.3974950560316415, + "grad_norm": 10.507089614868164, + "learning_rate": 4.3087843833185445e-06, + "log_odds_chosen": 0.26568925380706787, + "log_odds_ratio": -0.5747084021568298, + "logits/chosen": -1.159207820892334, + "logits/rejected": -1.0467674732208252, + "logps/chosen": -1.3419283628463745, + "logps/rejected": -1.5456748008728027, + "loss": 2.7376, + "nll_loss": 0.6269404292106628, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1341928392648697, + "rewards/margins": 0.02037465199828148, + "rewards/rejected": -0.15456749498844147, + "step": 530 + }, + { + "epoch": 1.4001318391562294, + "grad_norm": 9.721789360046387, + "learning_rate": 4.301685891748003e-06, + "log_odds_chosen": 0.36365044116973877, + "log_odds_ratio": -0.548660397529602, + "logits/chosen": -1.0611029863357544, + "logits/rejected": -1.0090358257293701, + "logps/chosen": -1.3427720069885254, + "logps/rejected": -1.5878149271011353, + "loss": 2.2581, + "nll_loss": 0.5096607208251953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13427719473838806, + "rewards/margins": 0.024504294618964195, + "rewards/rejected": -0.158781498670578, + "step": 531 + }, + { + "epoch": 1.4027686222808173, + "grad_norm": 9.990262985229492, + "learning_rate": 4.294587400177462e-06, + "log_odds_chosen": 0.2077985554933548, + "log_odds_ratio": -0.605369508266449, + "logits/chosen": -1.0180903673171997, + "logits/rejected": -0.9370006918907166, + "logps/chosen": -1.3981273174285889, + "logps/rejected": -1.5546506643295288, + "loss": 2.0418, + "nll_loss": 0.44991248846054077, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13981273770332336, + "rewards/margins": 0.015652332454919815, + "rewards/rejected": -0.15546506643295288, + "step": 532 + }, + { + "epoch": 1.4054054054054055, + "grad_norm": 9.432575225830078, + "learning_rate": 4.287488908606921e-06, + "log_odds_chosen": 0.6490946412086487, + "log_odds_ratio": -0.44073981046676636, + "logits/chosen": -1.0299392938613892, + "logits/rejected": -0.9620039463043213, + "logps/chosen": -1.2669686079025269, + "logps/rejected": -1.7586071491241455, + "loss": 2.1131, + "nll_loss": 0.48419713973999023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1266968548297882, + "rewards/margins": 0.04916385933756828, + "rewards/rejected": -0.1758607178926468, + "step": 533 + }, + { + "epoch": 1.4080421885299934, + "grad_norm": 9.760756492614746, + "learning_rate": 4.280390417036379e-06, + "log_odds_chosen": 0.5309910774230957, + "log_odds_ratio": -0.47286418080329895, + "logits/chosen": -1.0874316692352295, + "logits/rejected": -0.9884422421455383, + "logps/chosen": -1.1690242290496826, + "logps/rejected": -1.5218350887298584, + "loss": 2.1617, + "nll_loss": 0.4931284189224243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1169024258852005, + "rewards/margins": 0.035281069576740265, + "rewards/rejected": -0.15218350291252136, + "step": 534 + }, + { + "epoch": 1.4106789716545813, + "grad_norm": 11.032233238220215, + "learning_rate": 4.273291925465838e-06, + "log_odds_chosen": 0.32740920782089233, + "log_odds_ratio": -0.5480507016181946, + "logits/chosen": -1.1221764087677002, + "logits/rejected": -1.0313825607299805, + "logps/chosen": -1.4779345989227295, + "logps/rejected": -1.7428152561187744, + "loss": 3.2183, + "nll_loss": 0.7497601509094238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1477934718132019, + "rewards/margins": 0.026488065719604492, + "rewards/rejected": -0.1742815375328064, + "step": 535 + }, + { + "epoch": 1.4133157547791695, + "grad_norm": 10.23257827758789, + "learning_rate": 4.266193433895297e-06, + "log_odds_chosen": 0.5347425937652588, + "log_odds_ratio": -0.4779736399650574, + "logits/chosen": -1.0907363891601562, + "logits/rejected": -0.9816246032714844, + "logps/chosen": -1.4487974643707275, + "logps/rejected": -1.8768093585968018, + "loss": 2.5063, + "nll_loss": 0.5787882804870605, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14487972855567932, + "rewards/margins": 0.04280121624469757, + "rewards/rejected": -0.18768095970153809, + "step": 536 + }, + { + "epoch": 1.4159525379037574, + "grad_norm": 10.971465110778809, + "learning_rate": 4.259094942324756e-06, + "log_odds_chosen": 0.4730498492717743, + "log_odds_ratio": -0.49586889147758484, + "logits/chosen": -1.0929844379425049, + "logits/rejected": -0.9882125854492188, + "logps/chosen": -1.4657291173934937, + "logps/rejected": -1.8439651727676392, + "loss": 2.9872, + "nll_loss": 0.6972134113311768, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14657291769981384, + "rewards/margins": 0.037823598831892014, + "rewards/rejected": -0.18439652025699615, + "step": 537 + }, + { + "epoch": 1.4185893210283453, + "grad_norm": 9.797980308532715, + "learning_rate": 4.251996450754214e-06, + "log_odds_chosen": 0.24408963322639465, + "log_odds_ratio": -0.5894317626953125, + "logits/chosen": -1.027978777885437, + "logits/rejected": -0.9461379051208496, + "logps/chosen": -1.6278281211853027, + "logps/rejected": -1.8245741128921509, + "loss": 2.4087, + "nll_loss": 0.5432250499725342, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16278283298015594, + "rewards/margins": 0.01967458799481392, + "rewards/rejected": -0.18245741724967957, + "step": 538 + }, + { + "epoch": 1.4212261041529335, + "grad_norm": 10.824618339538574, + "learning_rate": 4.244897959183674e-06, + "log_odds_chosen": 0.3454328179359436, + "log_odds_ratio": -0.547885000705719, + "logits/chosen": -1.0612674951553345, + "logits/rejected": -1.0093002319335938, + "logps/chosen": -1.5636953115463257, + "logps/rejected": -1.837222695350647, + "loss": 2.5403, + "nll_loss": 0.580278217792511, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15636955201625824, + "rewards/margins": 0.027352729812264442, + "rewards/rejected": -0.18372228741645813, + "step": 539 + }, + { + "epoch": 1.4238628872775214, + "grad_norm": 8.917954444885254, + "learning_rate": 4.237799467613132e-06, + "log_odds_chosen": 0.5565085411071777, + "log_odds_ratio": -0.4850555658340454, + "logits/chosen": -1.04508638381958, + "logits/rejected": -0.9539642333984375, + "logps/chosen": -1.1825885772705078, + "logps/rejected": -1.594106912612915, + "loss": 1.8202, + "nll_loss": 0.4065341353416443, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11825884878635406, + "rewards/margins": 0.04115184396505356, + "rewards/rejected": -0.15941071510314941, + "step": 540 + }, + { + "epoch": 1.4264996704021096, + "grad_norm": 9.302490234375, + "learning_rate": 4.2307009760425915e-06, + "log_odds_chosen": 0.8080810904502869, + "log_odds_ratio": -0.4509783089160919, + "logits/chosen": -1.0430349111557007, + "logits/rejected": -0.9668057560920715, + "logps/chosen": -1.3211041688919067, + "logps/rejected": -1.9904488325119019, + "loss": 2.0318, + "nll_loss": 0.46285194158554077, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13211041688919067, + "rewards/margins": 0.0669344812631607, + "rewards/rejected": -0.19904489815235138, + "step": 541 + }, + { + "epoch": 1.4291364535266975, + "grad_norm": 10.374223709106445, + "learning_rate": 4.2236024844720496e-06, + "log_odds_chosen": 0.27454304695129395, + "log_odds_ratio": -0.5771620273590088, + "logits/chosen": -1.1492888927459717, + "logits/rejected": -1.0261468887329102, + "logps/chosen": -1.4748188257217407, + "logps/rejected": -1.6907557249069214, + "loss": 2.8768, + "nll_loss": 0.661479651927948, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14748188853263855, + "rewards/margins": 0.021593693643808365, + "rewards/rejected": -0.16907557845115662, + "step": 542 + }, + { + "epoch": 1.4317732366512854, + "grad_norm": 10.054798126220703, + "learning_rate": 4.216503992901508e-06, + "log_odds_chosen": 0.5260477066040039, + "log_odds_ratio": -0.47824597358703613, + "logits/chosen": -1.104248046875, + "logits/rejected": -1.0554959774017334, + "logps/chosen": -1.3430862426757812, + "logps/rejected": -1.7557330131530762, + "loss": 2.4337, + "nll_loss": 0.5605974197387695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13430863618850708, + "rewards/margins": 0.04126469045877457, + "rewards/rejected": -0.17557331919670105, + "step": 543 + }, + { + "epoch": 1.4344100197758736, + "grad_norm": 9.858833312988281, + "learning_rate": 4.209405501330967e-06, + "log_odds_chosen": 0.4538835883140564, + "log_odds_ratio": -0.49726781249046326, + "logits/chosen": -1.0619155168533325, + "logits/rejected": -0.9338789582252502, + "logps/chosen": -1.4854230880737305, + "logps/rejected": -1.848534345626831, + "loss": 2.51, + "nll_loss": 0.5777809023857117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14854231476783752, + "rewards/margins": 0.036311134696006775, + "rewards/rejected": -0.1848534345626831, + "step": 544 + }, + { + "epoch": 1.4370468029004615, + "grad_norm": 10.137702941894531, + "learning_rate": 4.202307009760425e-06, + "log_odds_chosen": 0.7637728452682495, + "log_odds_ratio": -0.44855618476867676, + "logits/chosen": -1.0617544651031494, + "logits/rejected": -0.960517406463623, + "logps/chosen": -1.394372820854187, + "logps/rejected": -2.043037176132202, + "loss": 2.3223, + "nll_loss": 0.5357198715209961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13943728804588318, + "rewards/margins": 0.06486643850803375, + "rewards/rejected": -0.20430371165275574, + "step": 545 + }, + { + "epoch": 1.4396835860250494, + "grad_norm": 9.720535278320312, + "learning_rate": 4.195208518189884e-06, + "log_odds_chosen": 0.7124000191688538, + "log_odds_ratio": -0.41061651706695557, + "logits/chosen": -1.0555087327957153, + "logits/rejected": -0.9663803577423096, + "logps/chosen": -1.3942837715148926, + "logps/rejected": -1.9674293994903564, + "loss": 2.0966, + "nll_loss": 0.4830893576145172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13942837715148926, + "rewards/margins": 0.057314563542604446, + "rewards/rejected": -0.1967429518699646, + "step": 546 + }, + { + "epoch": 1.4423203691496376, + "grad_norm": 9.361237525939941, + "learning_rate": 4.188110026619343e-06, + "log_odds_chosen": 0.46525129675865173, + "log_odds_ratio": -0.5079076290130615, + "logits/chosen": -1.0968852043151855, + "logits/rejected": -1.0257980823516846, + "logps/chosen": -1.176173448562622, + "logps/rejected": -1.51423978805542, + "loss": 2.3172, + "nll_loss": 0.5285149812698364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11761735379695892, + "rewards/margins": 0.033806636929512024, + "rewards/rejected": -0.15142399072647095, + "step": 547 + }, + { + "epoch": 1.4449571522742255, + "grad_norm": 10.04572868347168, + "learning_rate": 4.181011535048802e-06, + "log_odds_chosen": 0.4429911971092224, + "log_odds_ratio": -0.5006264448165894, + "logits/chosen": -1.0217193365097046, + "logits/rejected": -0.9494048357009888, + "logps/chosen": -1.330000638961792, + "logps/rejected": -1.6583576202392578, + "loss": 2.1335, + "nll_loss": 0.4833093583583832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13300006091594696, + "rewards/margins": 0.0328357107937336, + "rewards/rejected": -0.16583578288555145, + "step": 548 + }, + { + "epoch": 1.4475939353988134, + "grad_norm": 10.25400447845459, + "learning_rate": 4.17391304347826e-06, + "log_odds_chosen": 0.7248557806015015, + "log_odds_ratio": -0.4106558561325073, + "logits/chosen": -1.019441843032837, + "logits/rejected": -0.9044776558876038, + "logps/chosen": -1.3269296884536743, + "logps/rejected": -1.9024122953414917, + "loss": 2.1889, + "nll_loss": 0.5061638951301575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13269296288490295, + "rewards/margins": 0.057548269629478455, + "rewards/rejected": -0.1902412325143814, + "step": 549 + }, + { + "epoch": 1.4502307185234016, + "grad_norm": 10.040661811828613, + "learning_rate": 4.16681455190772e-06, + "log_odds_chosen": 0.36954087018966675, + "log_odds_ratio": -0.536496639251709, + "logits/chosen": -1.0806694030761719, + "logits/rejected": -0.9924178123474121, + "logps/chosen": -1.3371779918670654, + "logps/rejected": -1.626185417175293, + "loss": 2.1673, + "nll_loss": 0.4881811738014221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13371780514717102, + "rewards/margins": 0.028900746256113052, + "rewards/rejected": -0.16261856257915497, + "step": 550 + }, + { + "epoch": 1.4528675016479895, + "grad_norm": 10.27868938446045, + "learning_rate": 4.159716060337178e-06, + "log_odds_chosen": 0.6784330010414124, + "log_odds_ratio": -0.41786181926727295, + "logits/chosen": -1.0871484279632568, + "logits/rejected": -1.0069628953933716, + "logps/chosen": -1.372645378112793, + "logps/rejected": -1.9133880138397217, + "loss": 2.4847, + "nll_loss": 0.5793927907943726, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13726454973220825, + "rewards/margins": 0.05407426133751869, + "rewards/rejected": -0.19133880734443665, + "step": 551 + }, + { + "epoch": 1.4555042847725774, + "grad_norm": 9.83980655670166, + "learning_rate": 4.152617568766637e-06, + "log_odds_chosen": 0.744712233543396, + "log_odds_ratio": -0.4128277897834778, + "logits/chosen": -1.1573163270950317, + "logits/rejected": -0.9828042984008789, + "logps/chosen": -1.256611943244934, + "logps/rejected": -1.8465502262115479, + "loss": 2.3454, + "nll_loss": 0.5450708866119385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1256611943244934, + "rewards/margins": 0.058993831276893616, + "rewards/rejected": -0.18465502560138702, + "step": 552 + }, + { + "epoch": 1.4581410678971656, + "grad_norm": 9.363921165466309, + "learning_rate": 4.145519077196096e-06, + "log_odds_chosen": 0.5384882688522339, + "log_odds_ratio": -0.4847624897956848, + "logits/chosen": -1.004069447517395, + "logits/rejected": -0.9431107044219971, + "logps/chosen": -1.204410195350647, + "logps/rejected": -1.5975055694580078, + "loss": 1.843, + "nll_loss": 0.4122834801673889, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1204410195350647, + "rewards/margins": 0.039309531450271606, + "rewards/rejected": -0.1597505509853363, + "step": 553 + }, + { + "epoch": 1.4607778510217535, + "grad_norm": 10.366519927978516, + "learning_rate": 4.138420585625554e-06, + "log_odds_chosen": 0.45709162950515747, + "log_odds_ratio": -0.493826687335968, + "logits/chosen": -1.0873271226882935, + "logits/rejected": -0.9958299398422241, + "logps/chosen": -1.5129612684249878, + "logps/rejected": -1.873511791229248, + "loss": 2.9732, + "nll_loss": 0.6939066052436829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15129612386226654, + "rewards/margins": 0.0360550582408905, + "rewards/rejected": -0.18735116720199585, + "step": 554 + }, + { + "epoch": 1.4634146341463414, + "grad_norm": 10.013711929321289, + "learning_rate": 4.1313220940550136e-06, + "log_odds_chosen": 0.5878893136978149, + "log_odds_ratio": -0.44947680830955505, + "logits/chosen": -1.0896657705307007, + "logits/rejected": -1.0171089172363281, + "logps/chosen": -1.387203335762024, + "logps/rejected": -1.8584654331207275, + "loss": 2.221, + "nll_loss": 0.5102908611297607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1387203335762024, + "rewards/margins": 0.04712621867656708, + "rewards/rejected": -0.18584656715393066, + "step": 555 + }, + { + "epoch": 1.4660514172709296, + "grad_norm": 10.238524436950684, + "learning_rate": 4.124223602484472e-06, + "log_odds_chosen": 0.3882477879524231, + "log_odds_ratio": -0.5345123410224915, + "logits/chosen": -1.0624442100524902, + "logits/rejected": -1.014449119567871, + "logps/chosen": -1.318690299987793, + "logps/rejected": -1.590446949005127, + "loss": 2.6513, + "nll_loss": 0.609370231628418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13186903297901154, + "rewards/margins": 0.027175655588507652, + "rewards/rejected": -0.15904468297958374, + "step": 556 + }, + { + "epoch": 1.4686882003955175, + "grad_norm": 10.251365661621094, + "learning_rate": 4.1171251109139305e-06, + "log_odds_chosen": 0.6487367153167725, + "log_odds_ratio": -0.44119197130203247, + "logits/chosen": -0.9631329774856567, + "logits/rejected": -0.9411216974258423, + "logps/chosen": -1.4211435317993164, + "logps/rejected": -1.9619696140289307, + "loss": 1.8292, + "nll_loss": 0.4131847620010376, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14211437106132507, + "rewards/margins": 0.05408259108662605, + "rewards/rejected": -0.19619694352149963, + "step": 557 + }, + { + "epoch": 1.4713249835201054, + "grad_norm": 9.43930721282959, + "learning_rate": 4.110026619343389e-06, + "log_odds_chosen": 0.6696327924728394, + "log_odds_ratio": -0.4534551799297333, + "logits/chosen": -1.0974669456481934, + "logits/rejected": -0.9728105068206787, + "logps/chosen": -1.215304970741272, + "logps/rejected": -1.702378749847412, + "loss": 2.1915, + "nll_loss": 0.5025386214256287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12153048813343048, + "rewards/margins": 0.04870738089084625, + "rewards/rejected": -0.17023788392543793, + "step": 558 + }, + { + "epoch": 1.4739617666446936, + "grad_norm": 9.472445487976074, + "learning_rate": 4.102928127772848e-06, + "log_odds_chosen": 0.45684900879859924, + "log_odds_ratio": -0.5050070881843567, + "logits/chosen": -1.0497395992279053, + "logits/rejected": -0.9759555459022522, + "logps/chosen": -1.3387244939804077, + "logps/rejected": -1.6942470073699951, + "loss": 1.9954, + "nll_loss": 0.44833865761756897, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13387244939804077, + "rewards/margins": 0.0355522483587265, + "rewards/rejected": -0.16942471265792847, + "step": 559 + }, + { + "epoch": 1.4765985497692815, + "grad_norm": 10.136690139770508, + "learning_rate": 4.095829636202307e-06, + "log_odds_chosen": 0.5725353956222534, + "log_odds_ratio": -0.4533199667930603, + "logits/chosen": -1.0816022157669067, + "logits/rejected": -0.9956217408180237, + "logps/chosen": -1.3826810121536255, + "logps/rejected": -1.8414275646209717, + "loss": 2.3122, + "nll_loss": 0.5327115058898926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1382680982351303, + "rewards/margins": 0.04587465897202492, + "rewards/rejected": -0.18414275348186493, + "step": 560 + }, + { + "epoch": 1.4792353328938694, + "grad_norm": 10.132454872131348, + "learning_rate": 4.088731144631765e-06, + "log_odds_chosen": 0.4786339998245239, + "log_odds_ratio": -0.5015438199043274, + "logits/chosen": -1.1128901243209839, + "logits/rejected": -1.0538095235824585, + "logps/chosen": -1.345731496810913, + "logps/rejected": -1.7082313299179077, + "loss": 2.3008, + "nll_loss": 0.5250552296638489, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13457316160202026, + "rewards/margins": 0.03624997287988663, + "rewards/rejected": -0.1708231270313263, + "step": 561 + }, + { + "epoch": 1.4818721160184576, + "grad_norm": 8.780614852905273, + "learning_rate": 4.081632653061224e-06, + "log_odds_chosen": 0.8551339507102966, + "log_odds_ratio": -0.38810813426971436, + "logits/chosen": -0.9988934397697449, + "logits/rejected": -0.9784669876098633, + "logps/chosen": -1.1678969860076904, + "logps/rejected": -1.8136950731277466, + "loss": 1.6219, + "nll_loss": 0.36667054891586304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11678969860076904, + "rewards/margins": 0.0645797997713089, + "rewards/rejected": -0.18136951327323914, + "step": 562 + }, + { + "epoch": 1.4845088991430455, + "grad_norm": 9.880027770996094, + "learning_rate": 4.074534161490683e-06, + "log_odds_chosen": 0.42561155557632446, + "log_odds_ratio": -0.5181151032447815, + "logits/chosen": -1.0898230075836182, + "logits/rejected": -1.0091991424560547, + "logps/chosen": -1.339963436126709, + "logps/rejected": -1.6795657873153687, + "loss": 2.1117, + "nll_loss": 0.47612476348876953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13399635255336761, + "rewards/margins": 0.03396022692322731, + "rewards/rejected": -0.16795656085014343, + "step": 563 + }, + { + "epoch": 1.4871456822676334, + "grad_norm": 9.44789981842041, + "learning_rate": 4.067435669920142e-06, + "log_odds_chosen": 0.31164228916168213, + "log_odds_ratio": -0.5770187973976135, + "logits/chosen": -1.0990424156188965, + "logits/rejected": -0.9864567518234253, + "logps/chosen": -1.3892393112182617, + "logps/rejected": -1.6200555562973022, + "loss": 2.4427, + "nll_loss": 0.5529693961143494, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13892394304275513, + "rewards/margins": 0.023081636056303978, + "rewards/rejected": -0.16200555860996246, + "step": 564 + }, + { + "epoch": 1.4897824653922216, + "grad_norm": 9.407962799072266, + "learning_rate": 4.0603371783496e-06, + "log_odds_chosen": 0.4963398277759552, + "log_odds_ratio": -0.4997844099998474, + "logits/chosen": -1.1170499324798584, + "logits/rejected": -1.025063157081604, + "logps/chosen": -1.3016924858093262, + "logps/rejected": -1.642601728439331, + "loss": 2.4696, + "nll_loss": 0.5674134492874146, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13016925752162933, + "rewards/margins": 0.034090910106897354, + "rewards/rejected": -0.1642601639032364, + "step": 565 + }, + { + "epoch": 1.4924192485168095, + "grad_norm": 9.735568046569824, + "learning_rate": 4.05323868677906e-06, + "log_odds_chosen": 0.46814143657684326, + "log_odds_ratio": -0.49939918518066406, + "logits/chosen": -1.1061391830444336, + "logits/rejected": -0.9936612844467163, + "logps/chosen": -1.3603556156158447, + "logps/rejected": -1.7409652471542358, + "loss": 2.2104, + "nll_loss": 0.5026587843894958, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13603554666042328, + "rewards/margins": 0.03806097060441971, + "rewards/rejected": -0.17409652471542358, + "step": 566 + }, + { + "epoch": 1.4950560316413974, + "grad_norm": 9.404352188110352, + "learning_rate": 4.046140195208518e-06, + "log_odds_chosen": 0.5777267813682556, + "log_odds_ratio": -0.45638012886047363, + "logits/chosen": -1.0536627769470215, + "logits/rejected": -0.9947332143783569, + "logps/chosen": -1.2639999389648438, + "logps/rejected": -1.7096264362335205, + "loss": 2.0162, + "nll_loss": 0.4584140181541443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12639999389648438, + "rewards/margins": 0.044562652707099915, + "rewards/rejected": -0.1709626317024231, + "step": 567 + }, + { + "epoch": 1.4976928147659856, + "grad_norm": 9.94635009765625, + "learning_rate": 4.039041703637977e-06, + "log_odds_chosen": 0.3626604974269867, + "log_odds_ratio": -0.54862380027771, + "logits/chosen": -1.0114367008209229, + "logits/rejected": -0.8889374732971191, + "logps/chosen": -1.3621025085449219, + "logps/rejected": -1.6648523807525635, + "loss": 1.9989, + "nll_loss": 0.44487372040748596, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13621026277542114, + "rewards/margins": 0.03027498535811901, + "rewards/rejected": -0.1664852499961853, + "step": 568 + }, + { + "epoch": 1.5003295978905735, + "grad_norm": 10.303240776062012, + "learning_rate": 4.031943212067436e-06, + "log_odds_chosen": 0.5516713857650757, + "log_odds_ratio": -0.4821397066116333, + "logits/chosen": -1.1247400045394897, + "logits/rejected": -0.9605581760406494, + "logps/chosen": -1.6147916316986084, + "logps/rejected": -2.07356333732605, + "loss": 3.0803, + "nll_loss": 0.7218732237815857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1614791750907898, + "rewards/margins": 0.04587716609239578, + "rewards/rejected": -0.20735633373260498, + "step": 569 + }, + { + "epoch": 1.5003295978905735, + "eval_log_odds_chosen": 0.49622219800949097, + "eval_log_odds_ratio": -0.49882009625434875, + "eval_logits/chosen": -1.0913243293762207, + "eval_logits/rejected": -0.9999473690986633, + "eval_logps/chosen": -1.379929542541504, + "eval_logps/rejected": -1.7691881656646729, + "eval_loss": 0.5929180979728699, + "eval_nll_loss": 0.5430360436439514, + "eval_rewards/accuracies": 0.9053254723548889, + "eval_rewards/chosen": -0.1379929631948471, + "eval_rewards/margins": 0.03892587497830391, + "eval_rewards/rejected": -0.17691883444786072, + "eval_runtime": 129.4908, + "eval_samples_per_second": 2.61, + "eval_steps_per_second": 1.305, + "step": 569 + }, + { + "epoch": 1.5029663810151614, + "grad_norm": 9.44087028503418, + "learning_rate": 4.024844720496894e-06, + "log_odds_chosen": 0.5854195952415466, + "log_odds_ratio": -0.47436830401420593, + "logits/chosen": -1.1242841482162476, + "logits/rejected": -0.9996142387390137, + "logps/chosen": -1.2071850299835205, + "logps/rejected": -1.6707042455673218, + "loss": 2.0972, + "nll_loss": 0.4768558740615845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12071850150823593, + "rewards/margins": 0.046351924538612366, + "rewards/rejected": -0.1670704334974289, + "step": 570 + }, + { + "epoch": 1.5056031641397496, + "grad_norm": 11.15221881866455, + "learning_rate": 4.017746228926353e-06, + "log_odds_chosen": 0.1619393527507782, + "log_odds_ratio": -0.6348941326141357, + "logits/chosen": -1.0823087692260742, + "logits/rejected": -1.0478005409240723, + "logps/chosen": -1.3582721948623657, + "logps/rejected": -1.4940277338027954, + "loss": 2.7124, + "nll_loss": 0.6146115064620972, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1358272135257721, + "rewards/margins": 0.013575554825365543, + "rewards/rejected": -0.14940276741981506, + "step": 571 + }, + { + "epoch": 1.5082399472643375, + "grad_norm": 10.919987678527832, + "learning_rate": 4.0106477373558115e-06, + "log_odds_chosen": 0.37946444749832153, + "log_odds_ratio": -0.5383967757225037, + "logits/chosen": -1.1449512243270874, + "logits/rejected": -1.0578222274780273, + "logps/chosen": -1.4650253057479858, + "logps/rejected": -1.782175898551941, + "loss": 2.6081, + "nll_loss": 0.5981886386871338, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1465025246143341, + "rewards/margins": 0.03171507269144058, + "rewards/rejected": -0.17821760475635529, + "step": 572 + }, + { + "epoch": 1.5108767303889254, + "grad_norm": 9.33333969116211, + "learning_rate": 4.00354924578527e-06, + "log_odds_chosen": 0.3692682981491089, + "log_odds_ratio": -0.5407110452651978, + "logits/chosen": -0.9620389342308044, + "logits/rejected": -0.9305254817008972, + "logps/chosen": -1.3830729722976685, + "logps/rejected": -1.66476571559906, + "loss": 2.3393, + "nll_loss": 0.5307450890541077, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13830730319023132, + "rewards/margins": 0.028169266879558563, + "rewards/rejected": -0.1664765626192093, + "step": 573 + }, + { + "epoch": 1.5135135135135136, + "grad_norm": 10.451208114624023, + "learning_rate": 3.996450754214729e-06, + "log_odds_chosen": 0.23383724689483643, + "log_odds_ratio": -0.5925602912902832, + "logits/chosen": -1.1164716482162476, + "logits/rejected": -1.0261996984481812, + "logps/chosen": -1.4167859554290771, + "logps/rejected": -1.5901645421981812, + "loss": 2.6467, + "nll_loss": 0.6024162173271179, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1416786015033722, + "rewards/margins": 0.0173378624022007, + "rewards/rejected": -0.1590164601802826, + "step": 574 + }, + { + "epoch": 1.5161502966381015, + "grad_norm": 9.987966537475586, + "learning_rate": 3.989352262644188e-06, + "log_odds_chosen": 0.3409838080406189, + "log_odds_ratio": -0.5625724792480469, + "logits/chosen": -1.1221113204956055, + "logits/rejected": -0.9838091731071472, + "logps/chosen": -1.432151198387146, + "logps/rejected": -1.715986728668213, + "loss": 2.5528, + "nll_loss": 0.5819397568702698, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1432151198387146, + "rewards/margins": 0.028383558616042137, + "rewards/rejected": -0.1715986728668213, + "step": 575 + }, + { + "epoch": 1.5187870797626895, + "grad_norm": 10.189329147338867, + "learning_rate": 3.982253771073646e-06, + "log_odds_chosen": 0.294436514377594, + "log_odds_ratio": -0.5655215978622437, + "logits/chosen": -1.0958954095840454, + "logits/rejected": -1.0803096294403076, + "logps/chosen": -1.3242357969284058, + "logps/rejected": -1.5406522750854492, + "loss": 2.6916, + "nll_loss": 0.6163361072540283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13242359459400177, + "rewards/margins": 0.021641647443175316, + "rewards/rejected": -0.15406523644924164, + "step": 576 + }, + { + "epoch": 1.5214238628872776, + "grad_norm": 10.759533882141113, + "learning_rate": 3.975155279503105e-06, + "log_odds_chosen": 0.3902096748352051, + "log_odds_ratio": -0.5328852534294128, + "logits/chosen": -1.082053303718567, + "logits/rejected": -1.0068429708480835, + "logps/chosen": -1.6502498388290405, + "logps/rejected": -1.9628188610076904, + "loss": 2.8628, + "nll_loss": 0.6624106168746948, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.165024995803833, + "rewards/margins": 0.03125689923763275, + "rewards/rejected": -0.19628189504146576, + "step": 577 + }, + { + "epoch": 1.5240606460118655, + "grad_norm": 9.272303581237793, + "learning_rate": 3.968056787932564e-06, + "log_odds_chosen": 0.433498740196228, + "log_odds_ratio": -0.526934027671814, + "logits/chosen": -1.1131141185760498, + "logits/rejected": -0.9975295066833496, + "logps/chosen": -1.2005765438079834, + "logps/rejected": -1.4930449724197388, + "loss": 2.2626, + "nll_loss": 0.5129634737968445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12005767226219177, + "rewards/margins": 0.029246840626001358, + "rewards/rejected": -0.14930450916290283, + "step": 578 + }, + { + "epoch": 1.5266974291364535, + "grad_norm": 9.557279586791992, + "learning_rate": 3.960958296362023e-06, + "log_odds_chosen": 0.5534155964851379, + "log_odds_ratio": -0.47711971402168274, + "logits/chosen": -1.162174940109253, + "logits/rejected": -1.0576491355895996, + "logps/chosen": -1.363930583000183, + "logps/rejected": -1.8052783012390137, + "loss": 2.5042, + "nll_loss": 0.5783388614654541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13639307022094727, + "rewards/margins": 0.04413476586341858, + "rewards/rejected": -0.18052780628204346, + "step": 579 + }, + { + "epoch": 1.5293342122610416, + "grad_norm": 10.226247787475586, + "learning_rate": 3.953859804791482e-06, + "log_odds_chosen": 0.6164693832397461, + "log_odds_ratio": -0.477706640958786, + "logits/chosen": -1.073448896408081, + "logits/rejected": -0.9424384236335754, + "logps/chosen": -1.4734885692596436, + "logps/rejected": -2.000279426574707, + "loss": 2.2613, + "nll_loss": 0.5175544619560242, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14734885096549988, + "rewards/margins": 0.05267908796668053, + "rewards/rejected": -0.2000279426574707, + "step": 580 + }, + { + "epoch": 1.5319709953856295, + "grad_norm": 10.530021667480469, + "learning_rate": 3.946761313220941e-06, + "log_odds_chosen": 0.30705833435058594, + "log_odds_ratio": -0.562725841999054, + "logits/chosen": -1.0765693187713623, + "logits/rejected": -1.0258369445800781, + "logps/chosen": -1.5888984203338623, + "logps/rejected": -1.8333849906921387, + "loss": 2.9972, + "nll_loss": 0.6930161714553833, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15888984501361847, + "rewards/margins": 0.024448659271001816, + "rewards/rejected": -0.1833384931087494, + "step": 581 + }, + { + "epoch": 1.5346077785102175, + "grad_norm": 9.57894515991211, + "learning_rate": 3.9396628216504e-06, + "log_odds_chosen": 0.6514328122138977, + "log_odds_ratio": -0.4632219672203064, + "logits/chosen": -1.1260734796524048, + "logits/rejected": -1.0454940795898438, + "logps/chosen": -1.3779319524765015, + "logps/rejected": -1.9247746467590332, + "loss": 2.3697, + "nll_loss": 0.5461094379425049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1377931833267212, + "rewards/margins": 0.05468427389860153, + "rewards/rejected": -0.19247746467590332, + "step": 582 + }, + { + "epoch": 1.5372445616348056, + "grad_norm": 9.892532348632812, + "learning_rate": 3.932564330079858e-06, + "log_odds_chosen": 0.5860216617584229, + "log_odds_ratio": -0.44781792163848877, + "logits/chosen": -1.1517478227615356, + "logits/rejected": -1.0297623872756958, + "logps/chosen": -1.3620892763137817, + "logps/rejected": -1.8288545608520508, + "loss": 2.4398, + "nll_loss": 0.5651760101318359, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1362089365720749, + "rewards/margins": 0.046676523983478546, + "rewards/rejected": -0.18288546800613403, + "step": 583 + }, + { + "epoch": 1.5398813447593935, + "grad_norm": 9.396767616271973, + "learning_rate": 3.9254658385093166e-06, + "log_odds_chosen": 0.46145254373550415, + "log_odds_ratio": -0.4998391270637512, + "logits/chosen": -1.09555983543396, + "logits/rejected": -1.0095515251159668, + "logps/chosen": -1.219951868057251, + "logps/rejected": -1.5669710636138916, + "loss": 2.1258, + "nll_loss": 0.4814777970314026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12199518829584122, + "rewards/margins": 0.03470191732048988, + "rewards/rejected": -0.1566971093416214, + "step": 584 + }, + { + "epoch": 1.5425181278839815, + "grad_norm": 9.792346954345703, + "learning_rate": 3.9183673469387755e-06, + "log_odds_chosen": 0.4733620285987854, + "log_odds_ratio": -0.5020909309387207, + "logits/chosen": -1.044708013534546, + "logits/rejected": -0.9730866551399231, + "logps/chosen": -1.3304319381713867, + "logps/rejected": -1.70212721824646, + "loss": 2.0212, + "nll_loss": 0.45508724451065063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13304319977760315, + "rewards/margins": 0.03716951608657837, + "rewards/rejected": -0.1702127307653427, + "step": 585 + }, + { + "epoch": 1.5451549110085696, + "grad_norm": 10.44377326965332, + "learning_rate": 3.911268855368234e-06, + "log_odds_chosen": 0.40270447731018066, + "log_odds_ratio": -0.5341402292251587, + "logits/chosen": -1.0913063287734985, + "logits/rejected": -1.0041637420654297, + "logps/chosen": -1.365404486656189, + "logps/rejected": -1.6835623979568481, + "loss": 2.2716, + "nll_loss": 0.5144976377487183, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13654044270515442, + "rewards/margins": 0.031815797090530396, + "rewards/rejected": -0.16835623979568481, + "step": 586 + }, + { + "epoch": 1.5477916941331575, + "grad_norm": 8.901201248168945, + "learning_rate": 3.904170363797692e-06, + "log_odds_chosen": 0.7658116221427917, + "log_odds_ratio": -0.40815699100494385, + "logits/chosen": -1.0586168766021729, + "logits/rejected": -0.9915466904640198, + "logps/chosen": -0.9599915742874146, + "logps/rejected": -1.4701061248779297, + "loss": 1.775, + "nll_loss": 0.4029373228549957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09599915891885757, + "rewards/margins": 0.051011450588703156, + "rewards/rejected": -0.14701060950756073, + "step": 587 + }, + { + "epoch": 1.5504284772577455, + "grad_norm": 10.19491195678711, + "learning_rate": 3.897071872227151e-06, + "log_odds_chosen": 0.4995730221271515, + "log_odds_ratio": -0.48944902420043945, + "logits/chosen": -1.0612411499023438, + "logits/rejected": -1.0142158269882202, + "logps/chosen": -1.394554853439331, + "logps/rejected": -1.7762850522994995, + "loss": 2.2572, + "nll_loss": 0.5153642892837524, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13945548236370087, + "rewards/margins": 0.03817303106188774, + "rewards/rejected": -0.1776285171508789, + "step": 588 + }, + { + "epoch": 1.5530652603823336, + "grad_norm": 10.221670150756836, + "learning_rate": 3.88997338065661e-06, + "log_odds_chosen": 0.41200292110443115, + "log_odds_ratio": -0.5345177054405212, + "logits/chosen": -1.0595688819885254, + "logits/rejected": -0.9596129655838013, + "logps/chosen": -1.4647784233093262, + "logps/rejected": -1.8111674785614014, + "loss": 2.3963, + "nll_loss": 0.5456159710884094, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1464778333902359, + "rewards/margins": 0.0346389040350914, + "rewards/rejected": -0.1811167299747467, + "step": 589 + }, + { + "epoch": 1.5557020435069215, + "grad_norm": 10.246232032775879, + "learning_rate": 3.882874889086069e-06, + "log_odds_chosen": 0.4314562678337097, + "log_odds_ratio": -0.5253216028213501, + "logits/chosen": -1.1093804836273193, + "logits/rejected": -1.0065696239471436, + "logps/chosen": -1.4911247491836548, + "logps/rejected": -1.8563570976257324, + "loss": 2.6862, + "nll_loss": 0.6190296411514282, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1491124927997589, + "rewards/margins": 0.03652321547269821, + "rewards/rejected": -0.18563568592071533, + "step": 590 + }, + { + "epoch": 1.5583388266315095, + "grad_norm": 10.102618217468262, + "learning_rate": 3.875776397515528e-06, + "log_odds_chosen": 0.533229410648346, + "log_odds_ratio": -0.4733944535255432, + "logits/chosen": -1.0562225580215454, + "logits/rejected": -0.9820581078529358, + "logps/chosen": -1.501032829284668, + "logps/rejected": -1.9365155696868896, + "loss": 2.2097, + "nll_loss": 0.5050868391990662, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15010327100753784, + "rewards/margins": 0.04354827478528023, + "rewards/rejected": -0.19365155696868896, + "step": 591 + }, + { + "epoch": 1.5609756097560976, + "grad_norm": 9.822199821472168, + "learning_rate": 3.868677905944986e-06, + "log_odds_chosen": 0.15929587185382843, + "log_odds_ratio": -0.6341037750244141, + "logits/chosen": -1.176448941230774, + "logits/rejected": -1.0918364524841309, + "logps/chosen": -1.3583776950836182, + "logps/rejected": -1.4901213645935059, + "loss": 2.5741, + "nll_loss": 0.5801116228103638, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13583777844905853, + "rewards/margins": 0.013174369931221008, + "rewards/rejected": -0.14901213347911835, + "step": 592 + }, + { + "epoch": 1.5636123928806855, + "grad_norm": 9.36978530883789, + "learning_rate": 3.861579414374445e-06, + "log_odds_chosen": 0.6743534803390503, + "log_odds_ratio": -0.42430543899536133, + "logits/chosen": -1.0748366117477417, + "logits/rejected": -0.9873178005218506, + "logps/chosen": -1.2358392477035522, + "logps/rejected": -1.743302345275879, + "loss": 1.9504, + "nll_loss": 0.4451683759689331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12358392775058746, + "rewards/margins": 0.05074630305171013, + "rewards/rejected": -0.1743302345275879, + "step": 593 + }, + { + "epoch": 1.5662491760052735, + "grad_norm": 10.640802383422852, + "learning_rate": 3.854480922803904e-06, + "log_odds_chosen": 0.29617777466773987, + "log_odds_ratio": -0.5645712614059448, + "logits/chosen": -1.1450952291488647, + "logits/rejected": -1.1125893592834473, + "logps/chosen": -1.419756293296814, + "logps/rejected": -1.653576374053955, + "loss": 2.7612, + "nll_loss": 0.6338387131690979, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14197564125061035, + "rewards/margins": 0.023382004350423813, + "rewards/rejected": -0.16535764932632446, + "step": 594 + }, + { + "epoch": 1.5688859591298616, + "grad_norm": 10.288469314575195, + "learning_rate": 3.847382431233363e-06, + "log_odds_chosen": 0.47418349981307983, + "log_odds_ratio": -0.48888635635375977, + "logits/chosen": -1.091731309890747, + "logits/rejected": -1.0084561109542847, + "logps/chosen": -1.4172899723052979, + "logps/rejected": -1.795617699623108, + "loss": 2.3343, + "nll_loss": 0.5346803069114685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14172901213169098, + "rewards/margins": 0.037832751870155334, + "rewards/rejected": -0.1795617640018463, + "step": 595 + }, + { + "epoch": 1.5715227422544495, + "grad_norm": 10.708169937133789, + "learning_rate": 3.840283939662822e-06, + "log_odds_chosen": 0.373151034116745, + "log_odds_ratio": -0.5381395816802979, + "logits/chosen": -1.1400319337844849, + "logits/rejected": -0.9808732867240906, + "logps/chosen": -1.4813158512115479, + "logps/rejected": -1.7744696140289307, + "loss": 2.7531, + "nll_loss": 0.634470522403717, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1481315791606903, + "rewards/margins": 0.02931538037955761, + "rewards/rejected": -0.17744696140289307, + "step": 596 + }, + { + "epoch": 1.5741595253790375, + "grad_norm": 10.199514389038086, + "learning_rate": 3.8331854480922806e-06, + "log_odds_chosen": 0.514219343662262, + "log_odds_ratio": -0.4816381335258484, + "logits/chosen": -1.1541380882263184, + "logits/rejected": -1.0266348123550415, + "logps/chosen": -1.3262834548950195, + "logps/rejected": -1.7207541465759277, + "loss": 2.3532, + "nll_loss": 0.5401394367218018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13262835144996643, + "rewards/margins": 0.03944707661867142, + "rewards/rejected": -0.17207542061805725, + "step": 597 + }, + { + "epoch": 1.5767963085036256, + "grad_norm": 9.82446575164795, + "learning_rate": 3.8260869565217395e-06, + "log_odds_chosen": 0.33211588859558105, + "log_odds_ratio": -0.5527073740959167, + "logits/chosen": -1.0964913368225098, + "logits/rejected": -1.0414520502090454, + "logps/chosen": -1.3519885540008545, + "logps/rejected": -1.598536729812622, + "loss": 2.4916, + "nll_loss": 0.5676273107528687, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13519886136054993, + "rewards/margins": 0.024654816836118698, + "rewards/rejected": -0.15985366702079773, + "step": 598 + }, + { + "epoch": 1.5794330916282135, + "grad_norm": 10.105400085449219, + "learning_rate": 3.8189884649511975e-06, + "log_odds_chosen": 0.36954590678215027, + "log_odds_ratio": -0.5429477691650391, + "logits/chosen": -1.1735734939575195, + "logits/rejected": -1.0776454210281372, + "logps/chosen": -1.401150107383728, + "logps/rejected": -1.6895008087158203, + "loss": 2.8135, + "nll_loss": 0.64908766746521, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14011500775814056, + "rewards/margins": 0.028835097327828407, + "rewards/rejected": -0.16895011067390442, + "step": 599 + }, + { + "epoch": 1.5820698747528015, + "grad_norm": 8.94379997253418, + "learning_rate": 3.811889973380657e-06, + "log_odds_chosen": 0.38840794563293457, + "log_odds_ratio": -0.5276702642440796, + "logits/chosen": -1.0334696769714355, + "logits/rejected": -0.9777562618255615, + "logps/chosen": -1.1394922733306885, + "logps/rejected": -1.4258663654327393, + "loss": 1.7502, + "nll_loss": 0.38477957248687744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1139492467045784, + "rewards/margins": 0.028637398034334183, + "rewards/rejected": -0.14258664846420288, + "step": 600 + }, + { + "epoch": 1.5847066578773896, + "grad_norm": 10.189424514770508, + "learning_rate": 3.804791481810115e-06, + "log_odds_chosen": 0.5923186540603638, + "log_odds_ratio": -0.4600526690483093, + "logits/chosen": -1.0624918937683105, + "logits/rejected": -0.9810973405838013, + "logps/chosen": -1.3630549907684326, + "logps/rejected": -1.8275552988052368, + "loss": 2.5046, + "nll_loss": 0.5801517367362976, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13630549609661102, + "rewards/margins": 0.04645005241036415, + "rewards/rejected": -0.18275552988052368, + "step": 601 + }, + { + "epoch": 1.5873434410019776, + "grad_norm": 8.882495880126953, + "learning_rate": 3.797692990239574e-06, + "log_odds_chosen": 0.5100771188735962, + "log_odds_ratio": -0.4908636808395386, + "logits/chosen": -1.0376583337783813, + "logits/rejected": -0.9982375502586365, + "logps/chosen": -1.1316465139389038, + "logps/rejected": -1.525282621383667, + "loss": 1.6229, + "nll_loss": 0.35662680864334106, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11316464841365814, + "rewards/margins": 0.03936360031366348, + "rewards/rejected": -0.15252825617790222, + "step": 602 + }, + { + "epoch": 1.5899802241265655, + "grad_norm": 9.369660377502441, + "learning_rate": 3.7905944986690327e-06, + "log_odds_chosen": 0.918658435344696, + "log_odds_ratio": -0.3525279760360718, + "logits/chosen": -1.0274196863174438, + "logits/rejected": -0.9446275234222412, + "logps/chosen": -1.2396726608276367, + "logps/rejected": -1.971005916595459, + "loss": 1.6603, + "nll_loss": 0.37981170415878296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12396727502346039, + "rewards/margins": 0.07313331961631775, + "rewards/rejected": -0.19710060954093933, + "step": 603 + }, + { + "epoch": 1.5926170072511536, + "grad_norm": 10.286942481994629, + "learning_rate": 3.783496007098491e-06, + "log_odds_chosen": 0.413002073764801, + "log_odds_ratio": -0.5234980583190918, + "logits/chosen": -1.0057356357574463, + "logits/rejected": -1.0041232109069824, + "logps/chosen": -1.2989201545715332, + "logps/rejected": -1.6251273155212402, + "loss": 1.8832, + "nll_loss": 0.4184497594833374, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1298920214176178, + "rewards/margins": 0.03262072056531906, + "rewards/rejected": -0.16251273453235626, + "step": 604 + }, + { + "epoch": 1.5952537903757416, + "grad_norm": 9.780966758728027, + "learning_rate": 3.77639751552795e-06, + "log_odds_chosen": 0.5441007018089294, + "log_odds_ratio": -0.4700562357902527, + "logits/chosen": -1.1455645561218262, + "logits/rejected": -1.0333596467971802, + "logps/chosen": -1.3926197290420532, + "logps/rejected": -1.8256686925888062, + "loss": 2.4827, + "nll_loss": 0.5736803412437439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13926197588443756, + "rewards/margins": 0.04330489784479141, + "rewards/rejected": -0.18256688117980957, + "step": 605 + }, + { + "epoch": 1.5978905735003295, + "grad_norm": 9.922493934631348, + "learning_rate": 3.769299023957409e-06, + "log_odds_chosen": 0.3742057979106903, + "log_odds_ratio": -0.5399380922317505, + "logits/chosen": -1.1849782466888428, + "logits/rejected": -0.9935852885246277, + "logps/chosen": -1.4529054164886475, + "logps/rejected": -1.7377817630767822, + "loss": 2.6458, + "nll_loss": 0.6074674725532532, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1452905535697937, + "rewards/margins": 0.02848763018846512, + "rewards/rejected": -0.17377817630767822, + "step": 606 + }, + { + "epoch": 1.6005273566249176, + "grad_norm": 9.23854923248291, + "learning_rate": 3.762200532386868e-06, + "log_odds_chosen": 0.41105201840400696, + "log_odds_ratio": -0.5460788607597351, + "logits/chosen": -1.0429350137710571, + "logits/rejected": -0.9836790561676025, + "logps/chosen": -1.264663577079773, + "logps/rejected": -1.5987305641174316, + "loss": 2.1045, + "nll_loss": 0.47150543332099915, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12646636366844177, + "rewards/margins": 0.03340669721364975, + "rewards/rejected": -0.15987306833267212, + "step": 607 + }, + { + "epoch": 1.6031641397495056, + "grad_norm": 10.25296688079834, + "learning_rate": 3.7551020408163264e-06, + "log_odds_chosen": 0.4063701331615448, + "log_odds_ratio": -0.5314669013023376, + "logits/chosen": -1.085684061050415, + "logits/rejected": -0.9549438953399658, + "logps/chosen": -1.543460726737976, + "logps/rejected": -1.8915095329284668, + "loss": 2.8871, + "nll_loss": 0.6686206459999084, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15434607863426208, + "rewards/margins": 0.03480488061904907, + "rewards/rejected": -0.18915095925331116, + "step": 608 + }, + { + "epoch": 1.6058009228740935, + "grad_norm": 10.205924987792969, + "learning_rate": 3.748003549245785e-06, + "log_odds_chosen": 0.4665653109550476, + "log_odds_ratio": -0.515164852142334, + "logits/chosen": -1.05836021900177, + "logits/rejected": -1.031651258468628, + "logps/chosen": -1.393672227859497, + "logps/rejected": -1.7077929973602295, + "loss": 2.4482, + "nll_loss": 0.5605322122573853, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1393672227859497, + "rewards/margins": 0.03141207620501518, + "rewards/rejected": -0.170779287815094, + "step": 609 + }, + { + "epoch": 1.6084377059986816, + "grad_norm": 9.898493766784668, + "learning_rate": 3.7409050576752437e-06, + "log_odds_chosen": 0.41122180223464966, + "log_odds_ratio": -0.514970600605011, + "logits/chosen": -1.1111037731170654, + "logits/rejected": -1.0361148118972778, + "logps/chosen": -1.3642151355743408, + "logps/rejected": -1.6770347356796265, + "loss": 2.3191, + "nll_loss": 0.528271496295929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13642151653766632, + "rewards/margins": 0.031281955540180206, + "rewards/rejected": -0.16770347952842712, + "step": 610 + }, + { + "epoch": 1.6110744891232696, + "grad_norm": 9.282876968383789, + "learning_rate": 3.7338065661047026e-06, + "log_odds_chosen": 0.6254175901412964, + "log_odds_ratio": -0.43851619958877563, + "logits/chosen": -1.1438246965408325, + "logits/rejected": -1.0386680364608765, + "logps/chosen": -1.3083348274230957, + "logps/rejected": -1.8028080463409424, + "loss": 2.2901, + "nll_loss": 0.5286656618118286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1308334767818451, + "rewards/margins": 0.04944733530282974, + "rewards/rejected": -0.18028083443641663, + "step": 611 + }, + { + "epoch": 1.6137112722478575, + "grad_norm": 10.03630542755127, + "learning_rate": 3.726708074534161e-06, + "log_odds_chosen": 0.7499800324440002, + "log_odds_ratio": -0.44167301058769226, + "logits/chosen": -1.148216724395752, + "logits/rejected": -1.0289642810821533, + "logps/chosen": -1.3351669311523438, + "logps/rejected": -1.9622267484664917, + "loss": 2.2255, + "nll_loss": 0.5122069120407104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13351669907569885, + "rewards/margins": 0.06270598620176315, + "rewards/rejected": -0.1962226778268814, + "step": 612 + }, + { + "epoch": 1.6163480553724456, + "grad_norm": 10.207561492919922, + "learning_rate": 3.71960958296362e-06, + "log_odds_chosen": 0.5388544797897339, + "log_odds_ratio": -0.46620678901672363, + "logits/chosen": -1.0826255083084106, + "logits/rejected": -1.0181787014007568, + "logps/chosen": -1.3552072048187256, + "logps/rejected": -1.7654448747634888, + "loss": 2.3471, + "nll_loss": 0.5401521325111389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13552072644233704, + "rewards/margins": 0.041023775935173035, + "rewards/rejected": -0.17654448747634888, + "step": 613 + }, + { + "epoch": 1.6189848384970338, + "grad_norm": 9.864588737487793, + "learning_rate": 3.712511091393079e-06, + "log_odds_chosen": 0.5364480018615723, + "log_odds_ratio": -0.4759971797466278, + "logits/chosen": -1.0452721118927002, + "logits/rejected": -0.9847467541694641, + "logps/chosen": -1.3223601579666138, + "logps/rejected": -1.7249159812927246, + "loss": 2.3345, + "nll_loss": 0.5360279083251953, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13223600387573242, + "rewards/margins": 0.0402555987238884, + "rewards/rejected": -0.17249161005020142, + "step": 614 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 10.773433685302734, + "learning_rate": 3.705412599822538e-06, + "log_odds_chosen": 0.2956734895706177, + "log_odds_ratio": -0.5695884823799133, + "logits/chosen": -1.180643081665039, + "logits/rejected": -1.0363719463348389, + "logps/chosen": -1.4985196590423584, + "logps/rejected": -1.7395797967910767, + "loss": 3.0056, + "nll_loss": 0.6944411396980286, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1498519778251648, + "rewards/margins": 0.024106010794639587, + "rewards/rejected": -0.1739579737186432, + "step": 615 + }, + { + "epoch": 1.6242584047462096, + "grad_norm": 9.767648696899414, + "learning_rate": 3.6983141082519963e-06, + "log_odds_chosen": 0.5799336433410645, + "log_odds_ratio": -0.5387483835220337, + "logits/chosen": -1.058786153793335, + "logits/rejected": -0.9933582544326782, + "logps/chosen": -1.226000428199768, + "logps/rejected": -1.7379118204116821, + "loss": 1.9045, + "nll_loss": 0.42225104570388794, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12260004132986069, + "rewards/margins": 0.05119115114212036, + "rewards/rejected": -0.17379119992256165, + "step": 616 + }, + { + "epoch": 1.6268951878707978, + "grad_norm": 9.380396842956543, + "learning_rate": 3.691215616681455e-06, + "log_odds_chosen": 0.7051491737365723, + "log_odds_ratio": -0.43674933910369873, + "logits/chosen": -1.0409693717956543, + "logits/rejected": -0.9513552188873291, + "logps/chosen": -1.3766884803771973, + "logps/rejected": -1.9633386135101318, + "loss": 1.7689, + "nll_loss": 0.3985414505004883, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13766884803771973, + "rewards/margins": 0.058665018528699875, + "rewards/rejected": -0.1963338702917099, + "step": 617 + }, + { + "epoch": 1.6295319709953855, + "grad_norm": 10.475168228149414, + "learning_rate": 3.6841171251109137e-06, + "log_odds_chosen": 0.5606287717819214, + "log_odds_ratio": -0.4687998294830322, + "logits/chosen": -1.099635124206543, + "logits/rejected": -1.0176622867584229, + "logps/chosen": -1.3984782695770264, + "logps/rejected": -1.842691421508789, + "loss": 2.3243, + "nll_loss": 0.5342031121253967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13984781503677368, + "rewards/margins": 0.044421326369047165, + "rewards/rejected": -0.18426913022994995, + "step": 618 + }, + { + "epoch": 1.6321687541199736, + "grad_norm": 11.261204719543457, + "learning_rate": 3.677018633540372e-06, + "log_odds_chosen": 0.3890281617641449, + "log_odds_ratio": -0.5289594531059265, + "logits/chosen": -1.129044771194458, + "logits/rejected": -1.0024116039276123, + "logps/chosen": -1.541863203048706, + "logps/rejected": -1.8613746166229248, + "loss": 2.7641, + "nll_loss": 0.638132631778717, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15418632328510284, + "rewards/margins": 0.03195112943649292, + "rewards/rejected": -0.18613745272159576, + "step": 619 + }, + { + "epoch": 1.6348055372445618, + "grad_norm": 10.402582168579102, + "learning_rate": 3.669920141969831e-06, + "log_odds_chosen": 0.18877491354942322, + "log_odds_ratio": -0.604672908782959, + "logits/chosen": -1.2087739706039429, + "logits/rejected": -1.108641505241394, + "logps/chosen": -1.398322582244873, + "logps/rejected": -1.5473822355270386, + "loss": 2.986, + "nll_loss": 0.6860237717628479, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1398322582244873, + "rewards/margins": 0.01490597240626812, + "rewards/rejected": -0.15473823249340057, + "step": 620 + }, + { + "epoch": 1.6374423203691495, + "grad_norm": 11.109515190124512, + "learning_rate": 3.66282165039929e-06, + "log_odds_chosen": 0.43903154134750366, + "log_odds_ratio": -0.5070865750312805, + "logits/chosen": -1.1722849607467651, + "logits/rejected": -1.0782361030578613, + "logps/chosen": -1.3067762851715088, + "logps/rejected": -1.6321287155151367, + "loss": 2.6851, + "nll_loss": 0.620557427406311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13067764043807983, + "rewards/margins": 0.03253525123000145, + "rewards/rejected": -0.16321289539337158, + "step": 621 + }, + { + "epoch": 1.6400791034937376, + "grad_norm": 11.161482810974121, + "learning_rate": 3.655723158828749e-06, + "log_odds_chosen": 0.3513215184211731, + "log_odds_ratio": -0.5436182022094727, + "logits/chosen": -1.08698308467865, + "logits/rejected": -1.0150874853134155, + "logps/chosen": -1.435055136680603, + "logps/rejected": -1.711066722869873, + "loss": 3.0174, + "nll_loss": 0.699986457824707, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1435055136680603, + "rewards/margins": 0.027601167559623718, + "rewards/rejected": -0.17110668122768402, + "step": 622 + }, + { + "epoch": 1.6427158866183258, + "grad_norm": 10.726851463317871, + "learning_rate": 3.6486246672582073e-06, + "log_odds_chosen": 0.6032743453979492, + "log_odds_ratio": -0.4820668697357178, + "logits/chosen": -1.1473723649978638, + "logits/rejected": -0.9986626505851746, + "logps/chosen": -1.5315998792648315, + "logps/rejected": -2.037564754486084, + "loss": 2.7735, + "nll_loss": 0.6451709270477295, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1531599760055542, + "rewards/margins": 0.050596512854099274, + "rewards/rejected": -0.20375649631023407, + "step": 623 + }, + { + "epoch": 1.6453526697429135, + "grad_norm": 10.222211837768555, + "learning_rate": 3.641526175687666e-06, + "log_odds_chosen": 0.6348525881767273, + "log_odds_ratio": -0.4367481768131256, + "logits/chosen": -1.122359037399292, + "logits/rejected": -1.0492609739303589, + "logps/chosen": -1.326279878616333, + "logps/rejected": -1.822200059890747, + "loss": 2.4124, + "nll_loss": 0.559432864189148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13262799382209778, + "rewards/margins": 0.04959201067686081, + "rewards/rejected": -0.182219997048378, + "step": 624 + }, + { + "epoch": 1.6479894528675016, + "grad_norm": 9.407581329345703, + "learning_rate": 3.634427684117125e-06, + "log_odds_chosen": 0.495173841714859, + "log_odds_ratio": -0.4865230321884155, + "logits/chosen": -1.130155324935913, + "logits/rejected": -1.0504111051559448, + "logps/chosen": -1.3680860996246338, + "logps/rejected": -1.7636840343475342, + "loss": 2.2616, + "nll_loss": 0.5167529582977295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1368086189031601, + "rewards/margins": 0.03955978527665138, + "rewards/rejected": -0.17636841535568237, + "step": 625 + }, + { + "epoch": 1.6506262359920898, + "grad_norm": 10.777320861816406, + "learning_rate": 3.627329192546584e-06, + "log_odds_chosen": 0.5994431376457214, + "log_odds_ratio": -0.45564326643943787, + "logits/chosen": -1.0558720827102661, + "logits/rejected": -0.9743368029594421, + "logps/chosen": -1.4363338947296143, + "logps/rejected": -1.9244080781936646, + "loss": 2.1948, + "nll_loss": 0.5031373500823975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1436333954334259, + "rewards/margins": 0.04880741238594055, + "rewards/rejected": -0.19244080781936646, + "step": 626 + }, + { + "epoch": 1.6532630191166775, + "grad_norm": 9.53264331817627, + "learning_rate": 3.620230700976042e-06, + "log_odds_chosen": 0.46930187940597534, + "log_odds_ratio": -0.4994354844093323, + "logits/chosen": -1.1431329250335693, + "logits/rejected": -1.043256402015686, + "logps/chosen": -1.3216546773910522, + "logps/rejected": -1.6822595596313477, + "loss": 2.3107, + "nll_loss": 0.5277235507965088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13216547667980194, + "rewards/margins": 0.03606047108769417, + "rewards/rejected": -0.1682259440422058, + "step": 627 + }, + { + "epoch": 1.6558998022412657, + "grad_norm": 10.413905143737793, + "learning_rate": 3.613132209405501e-06, + "log_odds_chosen": 0.37333759665489197, + "log_odds_ratio": -0.5379760265350342, + "logits/chosen": -1.1401424407958984, + "logits/rejected": -1.060665249824524, + "logps/chosen": -1.4060555696487427, + "logps/rejected": -1.7031787633895874, + "loss": 2.4641, + "nll_loss": 0.5622209310531616, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14060556888580322, + "rewards/margins": 0.02971232309937477, + "rewards/rejected": -0.1703178882598877, + "step": 628 + }, + { + "epoch": 1.6585365853658538, + "grad_norm": 9.83200454711914, + "learning_rate": 3.60603371783496e-06, + "log_odds_chosen": 0.6011961698532104, + "log_odds_ratio": -0.449532151222229, + "logits/chosen": -1.0809606313705444, + "logits/rejected": -0.9807281494140625, + "logps/chosen": -1.0397064685821533, + "logps/rejected": -1.4554616212844849, + "loss": 1.9358, + "nll_loss": 0.43898892402648926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10397064685821533, + "rewards/margins": 0.04157552123069763, + "rewards/rejected": -0.14554616808891296, + "step": 629 + }, + { + "epoch": 1.6611733684904415, + "grad_norm": 10.28779125213623, + "learning_rate": 3.5989352262644188e-06, + "log_odds_chosen": 0.6239845156669617, + "log_odds_ratio": -0.47742146253585815, + "logits/chosen": -1.1649227142333984, + "logits/rejected": -1.0624198913574219, + "logps/chosen": -1.4135284423828125, + "logps/rejected": -1.8926094770431519, + "loss": 2.7276, + "nll_loss": 0.634151816368103, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14135286211967468, + "rewards/margins": 0.04790811240673065, + "rewards/rejected": -0.18926095962524414, + "step": 630 + }, + { + "epoch": 1.6638101516150297, + "grad_norm": 9.883668899536133, + "learning_rate": 3.5918367346938772e-06, + "log_odds_chosen": 0.6382372379302979, + "log_odds_ratio": -0.4400421679019928, + "logits/chosen": -1.172858715057373, + "logits/rejected": -1.0197105407714844, + "logps/chosen": -1.3074352741241455, + "logps/rejected": -1.8067971467971802, + "loss": 2.3467, + "nll_loss": 0.5426616668701172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13074351847171783, + "rewards/margins": 0.0499361976981163, + "rewards/rejected": -0.18067970871925354, + "step": 631 + }, + { + "epoch": 1.6664469347396178, + "grad_norm": 9.418743133544922, + "learning_rate": 3.584738243123336e-06, + "log_odds_chosen": 0.840705156326294, + "log_odds_ratio": -0.3766203820705414, + "logits/chosen": -1.1018195152282715, + "logits/rejected": -0.9619787335395813, + "logps/chosen": -1.2872685194015503, + "logps/rejected": -1.9614143371582031, + "loss": 2.0948, + "nll_loss": 0.4860471487045288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12872685492038727, + "rewards/margins": 0.06741458177566528, + "rewards/rejected": -0.19614143669605255, + "step": 632 + }, + { + "epoch": 1.6690837178642055, + "grad_norm": 10.141168594360352, + "learning_rate": 3.577639751552795e-06, + "log_odds_chosen": 0.45387399196624756, + "log_odds_ratio": -0.507139265537262, + "logits/chosen": -1.111092448234558, + "logits/rejected": -1.0183961391448975, + "logps/chosen": -1.3733915090560913, + "logps/rejected": -1.7340645790100098, + "loss": 2.2225, + "nll_loss": 0.5049020051956177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13733914494514465, + "rewards/margins": 0.03606731444597244, + "rewards/rejected": -0.1734064519405365, + "step": 633 + }, + { + "epoch": 1.6717205009887937, + "grad_norm": 9.883380889892578, + "learning_rate": 3.570541259982254e-06, + "log_odds_chosen": 0.5376417636871338, + "log_odds_ratio": -0.4772071838378906, + "logits/chosen": -1.1129980087280273, + "logits/rejected": -1.0229228734970093, + "logps/chosen": -1.2614349126815796, + "logps/rejected": -1.6619253158569336, + "loss": 2.5366, + "nll_loss": 0.5864204168319702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12614348530769348, + "rewards/margins": 0.04004904255270958, + "rewards/rejected": -0.16619253158569336, + "step": 634 + }, + { + "epoch": 1.6743572841133818, + "grad_norm": 10.11703872680664, + "learning_rate": 3.5634427684117124e-06, + "log_odds_chosen": 0.414914071559906, + "log_odds_ratio": -0.5368670225143433, + "logits/chosen": -1.1180551052093506, + "logits/rejected": -1.0814414024353027, + "logps/chosen": -1.423435926437378, + "logps/rejected": -1.7301952838897705, + "loss": 2.6061, + "nll_loss": 0.5978296399116516, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14234361052513123, + "rewards/margins": 0.03067592903971672, + "rewards/rejected": -0.17301952838897705, + "step": 635 + }, + { + "epoch": 1.6769940672379697, + "grad_norm": 10.515578269958496, + "learning_rate": 3.556344276841171e-06, + "log_odds_chosen": 0.5427382588386536, + "log_odds_ratio": -0.4689965546131134, + "logits/chosen": -1.1191020011901855, + "logits/rejected": -0.9840205907821655, + "logps/chosen": -1.515913724899292, + "logps/rejected": -1.9586641788482666, + "loss": 2.5874, + "nll_loss": 0.5999395251274109, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15159136056900024, + "rewards/margins": 0.044275060296058655, + "rewards/rejected": -0.1958664357662201, + "step": 636 + }, + { + "epoch": 1.6796308503625577, + "grad_norm": 9.867798805236816, + "learning_rate": 3.5492457852706298e-06, + "log_odds_chosen": 0.37667518854141235, + "log_odds_ratio": -0.5419520735740662, + "logits/chosen": -1.0526944398880005, + "logits/rejected": -1.015201210975647, + "logps/chosen": -1.3632912635803223, + "logps/rejected": -1.6562535762786865, + "loss": 2.1737, + "nll_loss": 0.48923933506011963, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13632912933826447, + "rewards/margins": 0.02929622307419777, + "rewards/rejected": -0.16562534868717194, + "step": 637 + }, + { + "epoch": 1.6822676334871458, + "grad_norm": 9.446643829345703, + "learning_rate": 3.5421472937000883e-06, + "log_odds_chosen": 0.6284515857696533, + "log_odds_ratio": -0.45431938767433167, + "logits/chosen": -1.0685917139053345, + "logits/rejected": -1.0092133283615112, + "logps/chosen": -1.367841124534607, + "logps/rejected": -1.8739535808563232, + "loss": 2.0596, + "nll_loss": 0.4694611132144928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1367841213941574, + "rewards/margins": 0.050611257553100586, + "rewards/rejected": -0.187395378947258, + "step": 638 + }, + { + "epoch": 1.6849044166117337, + "grad_norm": 10.240177154541016, + "learning_rate": 3.535048802129547e-06, + "log_odds_chosen": 0.539044976234436, + "log_odds_ratio": -0.5708103775978088, + "logits/chosen": -1.1326308250427246, + "logits/rejected": -1.0996580123901367, + "logps/chosen": -1.348641037940979, + "logps/rejected": -1.8032280206680298, + "loss": 2.7263, + "nll_loss": 0.6244887113571167, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13486409187316895, + "rewards/margins": 0.04545869678258896, + "rewards/rejected": -0.1803227961063385, + "step": 639 + }, + { + "epoch": 1.6875411997363217, + "grad_norm": 10.046351432800293, + "learning_rate": 3.527950310559006e-06, + "log_odds_chosen": 0.47341519594192505, + "log_odds_ratio": -0.5111666321754456, + "logits/chosen": -1.1026053428649902, + "logits/rejected": -1.0265198945999146, + "logps/chosen": -1.374668002128601, + "logps/rejected": -1.7360124588012695, + "loss": 2.3947, + "nll_loss": 0.5475689172744751, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13746680319309235, + "rewards/margins": 0.036134447902441025, + "rewards/rejected": -0.17360125482082367, + "step": 640 + }, + { + "epoch": 1.6901779828609098, + "grad_norm": 10.694351196289062, + "learning_rate": 3.520851818988465e-06, + "log_odds_chosen": 0.41393715143203735, + "log_odds_ratio": -0.5214405059814453, + "logits/chosen": -1.1589573621749878, + "logits/rejected": -1.0569067001342773, + "logps/chosen": -1.2749884128570557, + "logps/rejected": -1.5959004163742065, + "loss": 2.509, + "nll_loss": 0.575117826461792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12749885022640228, + "rewards/margins": 0.03209120035171509, + "rewards/rejected": -0.15959003567695618, + "step": 641 + }, + { + "epoch": 1.6928147659854977, + "grad_norm": 10.566948890686035, + "learning_rate": 3.5137533274179234e-06, + "log_odds_chosen": 0.6312991380691528, + "log_odds_ratio": -0.45921069383621216, + "logits/chosen": -1.1022566556930542, + "logits/rejected": -1.0211251974105835, + "logps/chosen": -1.4295891523361206, + "logps/rejected": -1.9132863283157349, + "loss": 2.6099, + "nll_loss": 0.6065584421157837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14295890927314758, + "rewards/margins": 0.04836973175406456, + "rewards/rejected": -0.19132864475250244, + "step": 642 + }, + { + "epoch": 1.6954515491100857, + "grad_norm": 9.972731590270996, + "learning_rate": 3.5066548358473823e-06, + "log_odds_chosen": 0.48382002115249634, + "log_odds_ratio": -0.49584776163101196, + "logits/chosen": -1.0859644412994385, + "logits/rejected": -0.9947874546051025, + "logps/chosen": -1.409611701965332, + "logps/rejected": -1.791120171546936, + "loss": 2.2081, + "nll_loss": 0.502432644367218, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1409611701965332, + "rewards/margins": 0.038150854408741, + "rewards/rejected": -0.1791120171546936, + "step": 643 + }, + { + "epoch": 1.6980883322346738, + "grad_norm": 9.329258918762207, + "learning_rate": 3.4995563442768412e-06, + "log_odds_chosen": 0.4627387821674347, + "log_odds_ratio": -0.4984897971153259, + "logits/chosen": -1.1354320049285889, + "logits/rejected": -1.0707372426986694, + "logps/chosen": -1.1951978206634521, + "logps/rejected": -1.5313777923583984, + "loss": 2.1396, + "nll_loss": 0.4850457012653351, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11951977759599686, + "rewards/margins": 0.03361799567937851, + "rewards/rejected": -0.15313777327537537, + "step": 644 + }, + { + "epoch": 1.7007251153592617, + "grad_norm": 10.280769348144531, + "learning_rate": 3.4924578527062997e-06, + "log_odds_chosen": 0.6402459144592285, + "log_odds_ratio": -0.4437893331050873, + "logits/chosen": -1.133689522743225, + "logits/rejected": -1.0183706283569336, + "logps/chosen": -1.5126464366912842, + "logps/rejected": -2.0405571460723877, + "loss": 2.5914, + "nll_loss": 0.6034782528877258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15126465260982513, + "rewards/margins": 0.05279106646776199, + "rewards/rejected": -0.20405571162700653, + "step": 645 + }, + { + "epoch": 1.7033618984838497, + "grad_norm": 10.134493827819824, + "learning_rate": 3.485359361135758e-06, + "log_odds_chosen": 0.3840335011482239, + "log_odds_ratio": -0.5229801535606384, + "logits/chosen": -1.0866787433624268, + "logits/rejected": -1.0402109622955322, + "logps/chosen": -1.4302546977996826, + "logps/rejected": -1.7201024293899536, + "loss": 2.6237, + "nll_loss": 0.6036279201507568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1430254727602005, + "rewards/margins": 0.02898477017879486, + "rewards/rejected": -0.17201025784015656, + "step": 646 + }, + { + "epoch": 1.7059986816084378, + "grad_norm": 10.103415489196777, + "learning_rate": 3.478260869565217e-06, + "log_odds_chosen": 0.4003719091415405, + "log_odds_ratio": -0.5274068713188171, + "logits/chosen": -1.156123161315918, + "logits/rejected": -1.0898571014404297, + "logps/chosen": -1.3229293823242188, + "logps/rejected": -1.638416051864624, + "loss": 2.4792, + "nll_loss": 0.5670531988143921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1322929412126541, + "rewards/margins": 0.031548675149679184, + "rewards/rejected": -0.1638416200876236, + "step": 647 + }, + { + "epoch": 1.7086354647330257, + "grad_norm": 10.304222106933594, + "learning_rate": 3.471162377994676e-06, + "log_odds_chosen": 0.3370283842086792, + "log_odds_ratio": -0.5651724338531494, + "logits/chosen": -1.086463212966919, + "logits/rejected": -1.0409562587738037, + "logps/chosen": -1.2855072021484375, + "logps/rejected": -1.5236806869506836, + "loss": 2.0769, + "nll_loss": 0.46271374821662903, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1285507082939148, + "rewards/margins": 0.023817352950572968, + "rewards/rejected": -0.15236806869506836, + "step": 648 + }, + { + "epoch": 1.7112722478576137, + "grad_norm": 9.503119468688965, + "learning_rate": 3.464063886424135e-06, + "log_odds_chosen": 0.6528380513191223, + "log_odds_ratio": -0.44192853569984436, + "logits/chosen": -1.104736328125, + "logits/rejected": -1.0059311389923096, + "logps/chosen": -1.231282114982605, + "logps/rejected": -1.757354497909546, + "loss": 2.0768, + "nll_loss": 0.4750056862831116, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12312820553779602, + "rewards/margins": 0.05260723829269409, + "rewards/rejected": -0.1757354587316513, + "step": 649 + }, + { + "epoch": 1.7139090309822018, + "grad_norm": 10.083404541015625, + "learning_rate": 3.4569653948535934e-06, + "log_odds_chosen": 0.5733547806739807, + "log_odds_ratio": -0.46470165252685547, + "logits/chosen": -1.1071587800979614, + "logits/rejected": -1.0430916547775269, + "logps/chosen": -1.440490484237671, + "logps/rejected": -1.9038130044937134, + "loss": 2.3848, + "nll_loss": 0.5497271418571472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1440490484237671, + "rewards/margins": 0.04633225500583649, + "rewards/rejected": -0.19038130342960358, + "step": 650 + }, + { + "epoch": 1.7165458141067897, + "grad_norm": 10.582185745239258, + "learning_rate": 3.4498669032830523e-06, + "log_odds_chosen": 0.26057717204093933, + "log_odds_ratio": -0.5819696187973022, + "logits/chosen": -1.1635925769805908, + "logits/rejected": -1.1211692094802856, + "logps/chosen": -1.4908151626586914, + "logps/rejected": -1.7052472829818726, + "loss": 3.023, + "nll_loss": 0.6975415945053101, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1490815281867981, + "rewards/margins": 0.021443195641040802, + "rewards/rejected": -0.1705247163772583, + "step": 651 + }, + { + "epoch": 1.7191825972313777, + "grad_norm": 10.465653419494629, + "learning_rate": 3.442768411712511e-06, + "log_odds_chosen": 0.50111985206604, + "log_odds_ratio": -0.4852719306945801, + "logits/chosen": -1.1733665466308594, + "logits/rejected": -1.0618579387664795, + "logps/chosen": -1.5112289190292358, + "logps/rejected": -1.9103684425354004, + "loss": 2.8812, + "nll_loss": 0.6717795133590698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15112288296222687, + "rewards/margins": 0.03991395980119705, + "rewards/rejected": -0.19103685021400452, + "step": 652 + }, + { + "epoch": 1.7218193803559658, + "grad_norm": 9.639845848083496, + "learning_rate": 3.43566992014197e-06, + "log_odds_chosen": 0.46708834171295166, + "log_odds_ratio": -0.508224606513977, + "logits/chosen": -1.155930519104004, + "logits/rejected": -1.038315773010254, + "logps/chosen": -1.2967246770858765, + "logps/rejected": -1.6591012477874756, + "loss": 2.3088, + "nll_loss": 0.5263881683349609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12967246770858765, + "rewards/margins": 0.036237671971321106, + "rewards/rejected": -0.16591013967990875, + "step": 653 + }, + { + "epoch": 1.7244561634805537, + "grad_norm": 10.27575969696045, + "learning_rate": 3.428571428571428e-06, + "log_odds_chosen": 0.40247106552124023, + "log_odds_ratio": -0.5232669115066528, + "logits/chosen": -1.174773931503296, + "logits/rejected": -1.116478681564331, + "logps/chosen": -1.394790530204773, + "logps/rejected": -1.696317195892334, + "loss": 2.5231, + "nll_loss": 0.5784451961517334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13947907090187073, + "rewards/margins": 0.030152656137943268, + "rewards/rejected": -0.1696317195892334, + "step": 654 + }, + { + "epoch": 1.7270929466051417, + "grad_norm": 8.808902740478516, + "learning_rate": 3.421472937000887e-06, + "log_odds_chosen": 0.7139609456062317, + "log_odds_ratio": -0.4108840823173523, + "logits/chosen": -1.1678225994110107, + "logits/rejected": -1.0610462427139282, + "logps/chosen": -1.0931427478790283, + "logps/rejected": -1.6255385875701904, + "loss": 1.903, + "nll_loss": 0.43466371297836304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10931427776813507, + "rewards/margins": 0.05323958396911621, + "rewards/rejected": -0.16255386173725128, + "step": 655 + }, + { + "epoch": 1.7297297297297298, + "grad_norm": 9.192036628723145, + "learning_rate": 3.414374445430346e-06, + "log_odds_chosen": 0.4236711859703064, + "log_odds_ratio": -0.5087260603904724, + "logits/chosen": -1.1298270225524902, + "logits/rejected": -1.0678911209106445, + "logps/chosen": -1.243565559387207, + "logps/rejected": -1.5648874044418335, + "loss": 1.9972, + "nll_loss": 0.448428213596344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12435655295848846, + "rewards/margins": 0.032132189720869064, + "rewards/rejected": -0.15648874640464783, + "step": 656 + }, + { + "epoch": 1.7323665128543178, + "grad_norm": 10.163776397705078, + "learning_rate": 3.4072759538598044e-06, + "log_odds_chosen": 0.44237345457077026, + "log_odds_ratio": -0.5055872201919556, + "logits/chosen": -1.174792766571045, + "logits/rejected": -0.9949660301208496, + "logps/chosen": -1.4597645998001099, + "logps/rejected": -1.805436134338379, + "loss": 2.5862, + "nll_loss": 0.5959893465042114, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1459764540195465, + "rewards/margins": 0.034567151218652725, + "rewards/rejected": -0.18054361641407013, + "step": 657 + }, + { + "epoch": 1.7350032959789057, + "grad_norm": 10.466496467590332, + "learning_rate": 3.4001774622892633e-06, + "log_odds_chosen": 0.3946349024772644, + "log_odds_ratio": -0.5299810767173767, + "logits/chosen": -1.0455631017684937, + "logits/rejected": -1.0237305164337158, + "logps/chosen": -1.3399708271026611, + "logps/rejected": -1.635288953781128, + "loss": 2.9692, + "nll_loss": 0.6893136501312256, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13399706780910492, + "rewards/margins": 0.029531806707382202, + "rewards/rejected": -0.16352888941764832, + "step": 658 + }, + { + "epoch": 1.7376400791034938, + "grad_norm": 10.198365211486816, + "learning_rate": 3.393078970718722e-06, + "log_odds_chosen": 0.13144664466381073, + "log_odds_ratio": -0.6374571323394775, + "logits/chosen": -1.0861074924468994, + "logits/rejected": -1.044995665550232, + "logps/chosen": -1.3439302444458008, + "logps/rejected": -1.4365148544311523, + "loss": 2.4502, + "nll_loss": 0.5487998723983765, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13439303636550903, + "rewards/margins": 0.009258460253477097, + "rewards/rejected": -0.14365148544311523, + "step": 659 + }, + { + "epoch": 1.7402768622280818, + "grad_norm": 10.744078636169434, + "learning_rate": 3.385980479148181e-06, + "log_odds_chosen": 0.38164791464805603, + "log_odds_ratio": -0.5427396893501282, + "logits/chosen": -1.1259647607803345, + "logits/rejected": -1.0375760793685913, + "logps/chosen": -1.4524257183074951, + "logps/rejected": -1.7625154256820679, + "loss": 2.9398, + "nll_loss": 0.6806696653366089, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1452425718307495, + "rewards/margins": 0.031008977442979813, + "rewards/rejected": -0.17625154554843903, + "step": 660 + }, + { + "epoch": 1.7429136453526697, + "grad_norm": 10.451824188232422, + "learning_rate": 3.3788819875776396e-06, + "log_odds_chosen": 0.4451256990432739, + "log_odds_ratio": -0.5080350637435913, + "logits/chosen": -1.1427664756774902, + "logits/rejected": -1.0376893281936646, + "logps/chosen": -1.5254104137420654, + "logps/rejected": -1.8743579387664795, + "loss": 2.5769, + "nll_loss": 0.5934144258499146, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15254104137420654, + "rewards/margins": 0.03489474207162857, + "rewards/rejected": -0.1874357908964157, + "step": 661 + }, + { + "epoch": 1.7455504284772578, + "grad_norm": 10.607963562011719, + "learning_rate": 3.3717834960070985e-06, + "log_odds_chosen": 0.363365113735199, + "log_odds_ratio": -0.5402311682701111, + "logits/chosen": -1.1337553262710571, + "logits/rejected": -1.0186294317245483, + "logps/chosen": -1.41310453414917, + "logps/rejected": -1.7071406841278076, + "loss": 2.5018, + "nll_loss": 0.5714316964149475, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.141310453414917, + "rewards/margins": 0.029403626918792725, + "rewards/rejected": -0.17071406543254852, + "step": 662 + }, + { + "epoch": 1.7481872116018458, + "grad_norm": 9.786578178405762, + "learning_rate": 3.364685004436557e-06, + "log_odds_chosen": 0.6023552417755127, + "log_odds_ratio": -0.4504116177558899, + "logits/chosen": -1.1151041984558105, + "logits/rejected": -1.0372217893600464, + "logps/chosen": -1.2382469177246094, + "logps/rejected": -1.697322964668274, + "loss": 2.2844, + "nll_loss": 0.5260693430900574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12382469326257706, + "rewards/margins": 0.04590759426355362, + "rewards/rejected": -0.16973230242729187, + "step": 663 + }, + { + "epoch": 1.7508239947264337, + "grad_norm": 9.943042755126953, + "learning_rate": 3.357586512866016e-06, + "log_odds_chosen": 0.5671270489692688, + "log_odds_ratio": -0.46319738030433655, + "logits/chosen": -1.1743613481521606, + "logits/rejected": -1.0710899829864502, + "logps/chosen": -1.3550426959991455, + "logps/rejected": -1.797658085823059, + "loss": 2.4681, + "nll_loss": 0.5707059502601624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13550427556037903, + "rewards/margins": 0.04426154866814613, + "rewards/rejected": -0.17976582050323486, + "step": 664 + }, + { + "epoch": 1.7534607778510218, + "grad_norm": 9.573928833007812, + "learning_rate": 3.3504880212954743e-06, + "log_odds_chosen": 0.34388411045074463, + "log_odds_ratio": -0.5426332950592041, + "logits/chosen": -1.1098031997680664, + "logits/rejected": -1.0429176092147827, + "logps/chosen": -1.3713771104812622, + "logps/rejected": -1.6443697214126587, + "loss": 2.3063, + "nll_loss": 0.5223134160041809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13713771104812622, + "rewards/margins": 0.02729925885796547, + "rewards/rejected": -0.1644369661808014, + "step": 665 + }, + { + "epoch": 1.7560975609756098, + "grad_norm": 10.591869354248047, + "learning_rate": 3.3433895297249332e-06, + "log_odds_chosen": 0.2349964678287506, + "log_odds_ratio": -0.5956152677536011, + "logits/chosen": -1.1156516075134277, + "logits/rejected": -1.0448639392852783, + "logps/chosen": -1.4059268236160278, + "logps/rejected": -1.5864968299865723, + "loss": 2.4255, + "nll_loss": 0.5468214154243469, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14059269428253174, + "rewards/margins": 0.018056996166706085, + "rewards/rejected": -0.15864968299865723, + "step": 666 + }, + { + "epoch": 1.7587343441001977, + "grad_norm": 9.876556396484375, + "learning_rate": 3.336291038154392e-06, + "log_odds_chosen": 0.42307838797569275, + "log_odds_ratio": -0.524172306060791, + "logits/chosen": -1.1576995849609375, + "logits/rejected": -1.0660154819488525, + "logps/chosen": -1.4507625102996826, + "logps/rejected": -1.7858595848083496, + "loss": 2.8506, + "nll_loss": 0.6602423191070557, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14507625997066498, + "rewards/margins": 0.03350970149040222, + "rewards/rejected": -0.178585946559906, + "step": 667 + }, + { + "epoch": 1.7613711272247858, + "grad_norm": 10.308706283569336, + "learning_rate": 3.329192546583851e-06, + "log_odds_chosen": 0.4553511142730713, + "log_odds_ratio": -0.49892446398735046, + "logits/chosen": -1.0694628953933716, + "logits/rejected": -0.959567666053772, + "logps/chosen": -1.423482060432434, + "logps/rejected": -1.7801345586776733, + "loss": 2.2942, + "nll_loss": 0.5236632227897644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14234821498394012, + "rewards/margins": 0.03566524386405945, + "rewards/rejected": -0.17801345884799957, + "step": 668 + }, + { + "epoch": 1.7640079103493738, + "grad_norm": 9.855831146240234, + "learning_rate": 3.3220940550133095e-06, + "log_odds_chosen": 0.5339704751968384, + "log_odds_ratio": -0.4648168087005615, + "logits/chosen": -1.113582730293274, + "logits/rejected": -0.9964568614959717, + "logps/chosen": -1.4431257247924805, + "logps/rejected": -1.8742222785949707, + "loss": 2.4579, + "nll_loss": 0.567988932132721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1443125605583191, + "rewards/margins": 0.04310966283082962, + "rewards/rejected": -0.1874222308397293, + "step": 669 + }, + { + "epoch": 1.7666446934739617, + "grad_norm": 9.270071983337402, + "learning_rate": 3.3149955634427684e-06, + "log_odds_chosen": 0.7656106352806091, + "log_odds_ratio": -0.39729052782058716, + "logits/chosen": -1.0145714282989502, + "logits/rejected": -0.9721503257751465, + "logps/chosen": -1.1446309089660645, + "logps/rejected": -1.6687047481536865, + "loss": 1.6998, + "nll_loss": 0.3852230906486511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11446309089660645, + "rewards/margins": 0.0524073988199234, + "rewards/rejected": -0.16687047481536865, + "step": 670 + }, + { + "epoch": 1.7692814765985498, + "grad_norm": 9.544934272766113, + "learning_rate": 3.3078970718722273e-06, + "log_odds_chosen": 0.3408544063568115, + "log_odds_ratio": -0.5713138580322266, + "logits/chosen": -1.1565693616867065, + "logits/rejected": -1.080590009689331, + "logps/chosen": -1.2859318256378174, + "logps/rejected": -1.5526658296585083, + "loss": 2.1821, + "nll_loss": 0.48839300870895386, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12859319150447845, + "rewards/margins": 0.026673391461372375, + "rewards/rejected": -0.15526658296585083, + "step": 671 + }, + { + "epoch": 1.7719182597231378, + "grad_norm": 10.113846778869629, + "learning_rate": 3.3007985803016858e-06, + "log_odds_chosen": 0.46357882022857666, + "log_odds_ratio": -0.5100479125976562, + "logits/chosen": -1.1293085813522339, + "logits/rejected": -1.0153019428253174, + "logps/chosen": -1.4023265838623047, + "logps/rejected": -1.7749102115631104, + "loss": 2.3904, + "nll_loss": 0.546592652797699, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14023266732692719, + "rewards/margins": 0.03725834935903549, + "rewards/rejected": -0.17749102413654327, + "step": 672 + }, + { + "epoch": 1.7745550428477257, + "grad_norm": 10.42104434967041, + "learning_rate": 3.2937000887311442e-06, + "log_odds_chosen": 0.2633205056190491, + "log_odds_ratio": -0.5774643421173096, + "logits/chosen": -1.1776223182678223, + "logits/rejected": -1.0979485511779785, + "logps/chosen": -1.4247124195098877, + "logps/rejected": -1.639265537261963, + "loss": 3.2279, + "nll_loss": 0.7492352724075317, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14247125387191772, + "rewards/margins": 0.02145530842244625, + "rewards/rejected": -0.16392655670642853, + "step": 673 + }, + { + "epoch": 1.7771918259723138, + "grad_norm": 10.222952842712402, + "learning_rate": 3.286601597160603e-06, + "log_odds_chosen": 0.6649592518806458, + "log_odds_ratio": -0.429875910282135, + "logits/chosen": -1.1035183668136597, + "logits/rejected": -0.9902955889701843, + "logps/chosen": -1.2553927898406982, + "logps/rejected": -1.788110375404358, + "loss": 2.124, + "nll_loss": 0.4880125820636749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12553928792476654, + "rewards/margins": 0.05327175557613373, + "rewards/rejected": -0.17881104350090027, + "step": 674 + }, + { + "epoch": 1.7798286090969018, + "grad_norm": 9.873235702514648, + "learning_rate": 3.279503105590062e-06, + "log_odds_chosen": 0.9217997193336487, + "log_odds_ratio": -0.4081748425960541, + "logits/chosen": -1.1465270519256592, + "logits/rejected": -0.9990952014923096, + "logps/chosen": -1.3967903852462769, + "logps/rejected": -2.183521270751953, + "loss": 2.67, + "nll_loss": 0.6266850233078003, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13967904448509216, + "rewards/margins": 0.07867306470870972, + "rewards/rejected": -0.21835210919380188, + "step": 675 + }, + { + "epoch": 1.7824653922214897, + "grad_norm": 10.12005615234375, + "learning_rate": 3.2724046140195205e-06, + "log_odds_chosen": 0.5252061486244202, + "log_odds_ratio": -0.5032826662063599, + "logits/chosen": -1.140293002128601, + "logits/rejected": -1.0709006786346436, + "logps/chosen": -1.2078694105148315, + "logps/rejected": -1.5759236812591553, + "loss": 2.4712, + "nll_loss": 0.5674687623977661, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12078694254159927, + "rewards/margins": 0.03680543974041939, + "rewards/rejected": -0.15759238600730896, + "step": 676 + }, + { + "epoch": 1.7851021753460778, + "grad_norm": 10.111419677734375, + "learning_rate": 3.2653061224489794e-06, + "log_odds_chosen": 0.7717751264572144, + "log_odds_ratio": -0.3943026661872864, + "logits/chosen": -1.1302120685577393, + "logits/rejected": -1.0362975597381592, + "logps/chosen": -1.3001060485839844, + "logps/rejected": -1.917264699935913, + "loss": 2.2049, + "nll_loss": 0.5117934942245483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13001060485839844, + "rewards/margins": 0.06171587109565735, + "rewards/rejected": -0.1917264759540558, + "step": 677 + }, + { + "epoch": 1.7877389584706658, + "grad_norm": 10.019546508789062, + "learning_rate": 3.2582076308784383e-06, + "log_odds_chosen": 0.4805706739425659, + "log_odds_ratio": -0.5003248453140259, + "logits/chosen": -1.0562057495117188, + "logits/rejected": -0.979279637336731, + "logps/chosen": -1.4084614515304565, + "logps/rejected": -1.7890034914016724, + "loss": 2.2081, + "nll_loss": 0.5019981861114502, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1408461481332779, + "rewards/margins": 0.038054209202528, + "rewards/rejected": -0.1789003610610962, + "step": 678 + }, + { + "epoch": 1.7903757415952537, + "grad_norm": 9.820355415344238, + "learning_rate": 3.2511091393078972e-06, + "log_odds_chosen": 0.7213483452796936, + "log_odds_ratio": -0.4192531704902649, + "logits/chosen": -1.0981171131134033, + "logits/rejected": -0.9715542793273926, + "logps/chosen": -1.3471983671188354, + "logps/rejected": -1.9289729595184326, + "loss": 2.6386, + "nll_loss": 0.6177271008491516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1347198337316513, + "rewards/margins": 0.05817745625972748, + "rewards/rejected": -0.19289728999137878, + "step": 679 + }, + { + "epoch": 1.7930125247198418, + "grad_norm": 10.302452087402344, + "learning_rate": 3.2440106477373553e-06, + "log_odds_chosen": 0.503058910369873, + "log_odds_ratio": -0.4829460382461548, + "logits/chosen": -1.03379225730896, + "logits/rejected": -0.9386270046234131, + "logps/chosen": -1.3021060228347778, + "logps/rejected": -1.692925214767456, + "loss": 2.4338, + "nll_loss": 0.5601641535758972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13021060824394226, + "rewards/margins": 0.039081912487745285, + "rewards/rejected": -0.16929250955581665, + "step": 680 + }, + { + "epoch": 1.7956493078444298, + "grad_norm": 10.225312232971191, + "learning_rate": 3.236912156166814e-06, + "log_odds_chosen": 0.6208136677742004, + "log_odds_ratio": -0.45568156242370605, + "logits/chosen": -1.1496500968933105, + "logits/rejected": -1.0104174613952637, + "logps/chosen": -1.4223476648330688, + "logps/rejected": -1.9421371221542358, + "loss": 2.38, + "nll_loss": 0.5494294762611389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14223477244377136, + "rewards/margins": 0.051978956907987595, + "rewards/rejected": -0.19421373307704926, + "step": 681 + }, + { + "epoch": 1.7982860909690177, + "grad_norm": 10.216060638427734, + "learning_rate": 3.229813664596273e-06, + "log_odds_chosen": 0.4453953504562378, + "log_odds_ratio": -0.5002199411392212, + "logits/chosen": -1.0342788696289062, + "logits/rejected": -0.9758845567703247, + "logps/chosen": -1.4600276947021484, + "logps/rejected": -1.8262362480163574, + "loss": 2.1552, + "nll_loss": 0.4887891411781311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14600275456905365, + "rewards/margins": 0.03662087768316269, + "rewards/rejected": -0.18262363970279694, + "step": 682 + }, + { + "epoch": 1.8009228740936059, + "grad_norm": 9.598742485046387, + "learning_rate": 3.222715173025732e-06, + "log_odds_chosen": 0.7848517894744873, + "log_odds_ratio": -0.3915405571460724, + "logits/chosen": -1.111115574836731, + "logits/rejected": -1.0194597244262695, + "logps/chosen": -1.210547685623169, + "logps/rejected": -1.796340823173523, + "loss": 1.9571, + "nll_loss": 0.450122594833374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1210547685623169, + "rewards/margins": 0.05857931450009346, + "rewards/rejected": -0.17963406443595886, + "step": 683 + }, + { + "epoch": 1.8035596572181938, + "grad_norm": 10.945537567138672, + "learning_rate": 3.2156166814551904e-06, + "log_odds_chosen": 0.5689574480056763, + "log_odds_ratio": -0.4607033133506775, + "logits/chosen": -1.1433016061782837, + "logits/rejected": -1.083170771598816, + "logps/chosen": -1.33827543258667, + "logps/rejected": -1.7556018829345703, + "loss": 2.7518, + "nll_loss": 0.64188551902771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13382753729820251, + "rewards/margins": 0.04173264652490616, + "rewards/rejected": -0.17556019127368927, + "step": 684 + }, + { + "epoch": 1.8061964403427817, + "grad_norm": 11.310013771057129, + "learning_rate": 3.2085181898846493e-06, + "log_odds_chosen": 0.7163268327713013, + "log_odds_ratio": -0.4253354072570801, + "logits/chosen": -1.195902705192566, + "logits/rejected": -1.0543447732925415, + "logps/chosen": -1.698023796081543, + "logps/rejected": -2.255995750427246, + "loss": 3.9093, + "nll_loss": 0.9347800016403198, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16980236768722534, + "rewards/margins": 0.05579721927642822, + "rewards/rejected": -0.22559958696365356, + "step": 685 + }, + { + "epoch": 1.8088332234673699, + "grad_norm": 9.637194633483887, + "learning_rate": 3.2014196983141082e-06, + "log_odds_chosen": 0.6438184380531311, + "log_odds_ratio": -0.4424562454223633, + "logits/chosen": -1.0405304431915283, + "logits/rejected": -0.9892370104789734, + "logps/chosen": -1.1752492189407349, + "logps/rejected": -1.665137767791748, + "loss": 1.9299, + "nll_loss": 0.43822696805000305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11752492189407349, + "rewards/margins": 0.04898886755108833, + "rewards/rejected": -0.16651378571987152, + "step": 686 + }, + { + "epoch": 1.8114700065919578, + "grad_norm": 9.509078979492188, + "learning_rate": 3.194321206743567e-06, + "log_odds_chosen": 0.8525099754333496, + "log_odds_ratio": -0.41228288412094116, + "logits/chosen": -1.0589873790740967, + "logits/rejected": -0.970712423324585, + "logps/chosen": -1.2163913249969482, + "logps/rejected": -1.9108552932739258, + "loss": 1.818, + "nll_loss": 0.4132769703865051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12163914740085602, + "rewards/margins": 0.0694463849067688, + "rewards/rejected": -0.19108553230762482, + "step": 687 + }, + { + "epoch": 1.8141067897165457, + "grad_norm": 10.51211166381836, + "learning_rate": 3.1872227151730256e-06, + "log_odds_chosen": 0.5122712850570679, + "log_odds_ratio": -0.49355193972587585, + "logits/chosen": -1.0940035581588745, + "logits/rejected": -1.0186409950256348, + "logps/chosen": -1.4527509212493896, + "logps/rejected": -1.883984923362732, + "loss": 2.2886, + "nll_loss": 0.522806704044342, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14527510106563568, + "rewards/margins": 0.04312339052557945, + "rewards/rejected": -0.18839848041534424, + "step": 688 + }, + { + "epoch": 1.8167435728411339, + "grad_norm": 10.046796798706055, + "learning_rate": 3.180124223602484e-06, + "log_odds_chosen": 0.5925272703170776, + "log_odds_ratio": -0.4454019069671631, + "logits/chosen": -1.0761380195617676, + "logits/rejected": -0.9809951782226562, + "logps/chosen": -1.2984421253204346, + "logps/rejected": -1.7660629749298096, + "loss": 2.0259, + "nll_loss": 0.46194547414779663, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12984421849250793, + "rewards/margins": 0.04676208272576332, + "rewards/rejected": -0.17660629749298096, + "step": 689 + }, + { + "epoch": 1.8193803559657218, + "grad_norm": 9.937018394470215, + "learning_rate": 3.173025732031943e-06, + "log_odds_chosen": 0.5163881778717041, + "log_odds_ratio": -0.49798330664634705, + "logits/chosen": -1.1294372081756592, + "logits/rejected": -1.052258014678955, + "logps/chosen": -1.21603262424469, + "logps/rejected": -1.6259233951568604, + "loss": 2.3316, + "nll_loss": 0.5330907106399536, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12160325795412064, + "rewards/margins": 0.04098907858133316, + "rewards/rejected": -0.1625923365354538, + "step": 690 + }, + { + "epoch": 1.8220171390903097, + "grad_norm": 9.205368995666504, + "learning_rate": 3.165927240461402e-06, + "log_odds_chosen": 0.6621567010879517, + "log_odds_ratio": -0.4355185329914093, + "logits/chosen": -1.142890453338623, + "logits/rejected": -1.0354702472686768, + "logps/chosen": -1.113405704498291, + "logps/rejected": -1.6204802989959717, + "loss": 1.9372, + "nll_loss": 0.44075143337249756, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11134056746959686, + "rewards/margins": 0.050707463175058365, + "rewards/rejected": -0.16204802691936493, + "step": 691 + }, + { + "epoch": 1.8246539222148979, + "grad_norm": 10.018095970153809, + "learning_rate": 3.1588287488908604e-06, + "log_odds_chosen": 1.0003513097763062, + "log_odds_ratio": -0.37271711230278015, + "logits/chosen": -1.0820488929748535, + "logits/rejected": -0.9637270569801331, + "logps/chosen": -1.327383279800415, + "logps/rejected": -2.162868022918701, + "loss": 1.9861, + "nll_loss": 0.4592553675174713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13273833692073822, + "rewards/margins": 0.08354848623275757, + "rewards/rejected": -0.2162868231534958, + "step": 692 + }, + { + "epoch": 1.8272907053394858, + "grad_norm": 10.114798545837402, + "learning_rate": 3.1517302573203193e-06, + "log_odds_chosen": 0.6528754234313965, + "log_odds_ratio": -0.43074536323547363, + "logits/chosen": -1.0850951671600342, + "logits/rejected": -1.0183665752410889, + "logps/chosen": -1.5209827423095703, + "logps/rejected": -2.047553539276123, + "loss": 2.368, + "nll_loss": 0.5489242672920227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15209826827049255, + "rewards/margins": 0.052657097578048706, + "rewards/rejected": -0.20475536584854126, + "step": 693 + }, + { + "epoch": 1.8299274884640737, + "grad_norm": 10.068489074707031, + "learning_rate": 3.144631765749778e-06, + "log_odds_chosen": 0.4577704966068268, + "log_odds_ratio": -0.50062495470047, + "logits/chosen": -1.0895400047302246, + "logits/rejected": -0.971296489238739, + "logps/chosen": -1.5363192558288574, + "logps/rejected": -1.910767912864685, + "loss": 2.5335, + "nll_loss": 0.5833237171173096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15363194048404694, + "rewards/margins": 0.037444859743118286, + "rewards/rejected": -0.19107678532600403, + "step": 694 + }, + { + "epoch": 1.8325642715886619, + "grad_norm": 10.53513240814209, + "learning_rate": 3.1375332741792366e-06, + "log_odds_chosen": 0.3243619203567505, + "log_odds_ratio": -0.5468412041664124, + "logits/chosen": -1.164198637008667, + "logits/rejected": -1.0807690620422363, + "logps/chosen": -1.4449284076690674, + "logps/rejected": -1.705177903175354, + "loss": 2.6796, + "nll_loss": 0.6152034997940063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14449283480644226, + "rewards/margins": 0.0260249562561512, + "rewards/rejected": -0.17051780223846436, + "step": 695 + }, + { + "epoch": 1.8352010547132498, + "grad_norm": 9.664958000183105, + "learning_rate": 3.1304347826086955e-06, + "log_odds_chosen": 0.451429158449173, + "log_odds_ratio": -0.5083733201026917, + "logits/chosen": -1.153422474861145, + "logits/rejected": -1.094710111618042, + "logps/chosen": -1.4420045614242554, + "logps/rejected": -1.8038504123687744, + "loss": 2.3653, + "nll_loss": 0.540488600730896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14420045912265778, + "rewards/margins": 0.03618457168340683, + "rewards/rejected": -0.1803850382566452, + "step": 696 + }, + { + "epoch": 1.8378378378378377, + "grad_norm": 10.225156784057617, + "learning_rate": 3.1233362910381544e-06, + "log_odds_chosen": 0.2400350570678711, + "log_odds_ratio": -0.5829436182975769, + "logits/chosen": -1.1770219802856445, + "logits/rejected": -1.1034917831420898, + "logps/chosen": -1.3735429048538208, + "logps/rejected": -1.5605123043060303, + "loss": 2.5206, + "nll_loss": 0.5718554258346558, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1373542845249176, + "rewards/margins": 0.018696939572691917, + "rewards/rejected": -0.15605121850967407, + "step": 697 + }, + { + "epoch": 1.8404746209624259, + "grad_norm": 9.104494094848633, + "learning_rate": 3.116237799467613e-06, + "log_odds_chosen": 1.035887360572815, + "log_odds_ratio": -0.4015609323978424, + "logits/chosen": -1.1364988088607788, + "logits/rejected": -1.0374042987823486, + "logps/chosen": -1.1948455572128296, + "logps/rejected": -1.992734432220459, + "loss": 2.1548, + "nll_loss": 0.4985518157482147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1194845661520958, + "rewards/margins": 0.07978887856006622, + "rewards/rejected": -0.19927343726158142, + "step": 698 + }, + { + "epoch": 1.8431114040870138, + "grad_norm": 9.683701515197754, + "learning_rate": 3.1091393078970714e-06, + "log_odds_chosen": 0.2705000042915344, + "log_odds_ratio": -0.577082633972168, + "logits/chosen": -1.0815887451171875, + "logits/rejected": -1.0369924306869507, + "logps/chosen": -1.2282958030700684, + "logps/rejected": -1.437868356704712, + "loss": 1.8773, + "nll_loss": 0.4116290807723999, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12282958626747131, + "rewards/margins": 0.020957253873348236, + "rewards/rejected": -0.14378683269023895, + "step": 699 + }, + { + "epoch": 1.8457481872116017, + "grad_norm": 9.654878616333008, + "learning_rate": 3.1020408163265303e-06, + "log_odds_chosen": 0.5054506063461304, + "log_odds_ratio": -0.5001856684684753, + "logits/chosen": -1.1152770519256592, + "logits/rejected": -1.0579036474227905, + "logps/chosen": -1.1892142295837402, + "logps/rejected": -1.5673846006393433, + "loss": 1.9948, + "nll_loss": 0.44868773221969604, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11892141401767731, + "rewards/margins": 0.037817053496837616, + "rewards/rejected": -0.15673847496509552, + "step": 700 + }, + { + "epoch": 1.8483849703361899, + "grad_norm": 9.699676513671875, + "learning_rate": 3.094942324755989e-06, + "log_odds_chosen": 0.6631026268005371, + "log_odds_ratio": -0.4391542971134186, + "logits/chosen": -1.0755290985107422, + "logits/rejected": -0.9599955677986145, + "logps/chosen": -1.3520907163619995, + "logps/rejected": -1.8745989799499512, + "loss": 2.0716, + "nll_loss": 0.4739803969860077, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13520905375480652, + "rewards/margins": 0.05225083604454994, + "rewards/rejected": -0.18745990097522736, + "step": 701 + }, + { + "epoch": 1.8510217534607778, + "grad_norm": 10.442801475524902, + "learning_rate": 3.087843833185448e-06, + "log_odds_chosen": 0.5106677412986755, + "log_odds_ratio": -0.4874875545501709, + "logits/chosen": -1.146780014038086, + "logits/rejected": -1.0521918535232544, + "logps/chosen": -1.4891130924224854, + "logps/rejected": -1.9028270244598389, + "loss": 2.6028, + "nll_loss": 0.6019536256790161, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14891131222248077, + "rewards/margins": 0.04137139022350311, + "rewards/rejected": -0.1902827024459839, + "step": 702 + }, + { + "epoch": 1.8536585365853657, + "grad_norm": 8.870477676391602, + "learning_rate": 3.0807453416149066e-06, + "log_odds_chosen": 0.824571967124939, + "log_odds_ratio": -0.4329356551170349, + "logits/chosen": -1.0489051342010498, + "logits/rejected": -0.9817217588424683, + "logps/chosen": -1.0155425071716309, + "logps/rejected": -1.557959794998169, + "loss": 1.6078, + "nll_loss": 0.35866761207580566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1015542522072792, + "rewards/margins": 0.054241739213466644, + "rewards/rejected": -0.15579599142074585, + "step": 703 + }, + { + "epoch": 1.8562953197099539, + "grad_norm": 9.754107475280762, + "learning_rate": 3.0736468500443655e-06, + "log_odds_chosen": 0.5776047110557556, + "log_odds_ratio": -0.46575939655303955, + "logits/chosen": -1.1001160144805908, + "logits/rejected": -0.9796845316886902, + "logps/chosen": -1.4508471488952637, + "logps/rejected": -1.9013798236846924, + "loss": 2.2456, + "nll_loss": 0.5148159861564636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14508472383022308, + "rewards/margins": 0.04505325108766556, + "rewards/rejected": -0.19013796746730804, + "step": 704 + }, + { + "epoch": 1.858932102834542, + "grad_norm": 10.134719848632812, + "learning_rate": 3.0665483584738244e-06, + "log_odds_chosen": 0.4445488750934601, + "log_odds_ratio": -0.5248530507087708, + "logits/chosen": -1.2013803720474243, + "logits/rejected": -1.0629832744598389, + "logps/chosen": -1.440731167793274, + "logps/rejected": -1.7891690731048584, + "loss": 2.807, + "nll_loss": 0.649268388748169, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14407309889793396, + "rewards/margins": 0.034843798726797104, + "rewards/rejected": -0.17891690135002136, + "step": 705 + }, + { + "epoch": 1.8615688859591297, + "grad_norm": 9.605047225952148, + "learning_rate": 3.0594498669032833e-06, + "log_odds_chosen": 0.6526737809181213, + "log_odds_ratio": -0.43708115816116333, + "logits/chosen": -1.1206607818603516, + "logits/rejected": -0.9992779493331909, + "logps/chosen": -1.1988224983215332, + "logps/rejected": -1.6805169582366943, + "loss": 2.0354, + "nll_loss": 0.46514028310775757, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1198822557926178, + "rewards/margins": 0.048169441521167755, + "rewards/rejected": -0.16805168986320496, + "step": 706 + }, + { + "epoch": 1.8642056690837179, + "grad_norm": 10.393851280212402, + "learning_rate": 3.0523513753327413e-06, + "log_odds_chosen": 0.3795328438282013, + "log_odds_ratio": -0.536065399646759, + "logits/chosen": -1.0506230592727661, + "logits/rejected": -0.9675607085227966, + "logps/chosen": -1.3011157512664795, + "logps/rejected": -1.5858736038208008, + "loss": 2.5173, + "nll_loss": 0.5757268071174622, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13011157512664795, + "rewards/margins": 0.02847578376531601, + "rewards/rejected": -0.15858736634254456, + "step": 707 + }, + { + "epoch": 1.866842452208306, + "grad_norm": 10.673075675964355, + "learning_rate": 3.0452528837622002e-06, + "log_odds_chosen": 0.4388372600078583, + "log_odds_ratio": -0.5036283731460571, + "logits/chosen": -1.0832300186157227, + "logits/rejected": -0.9819395542144775, + "logps/chosen": -1.3873385190963745, + "logps/rejected": -1.731518268585205, + "loss": 2.5007, + "nll_loss": 0.5748181343078613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1387338638305664, + "rewards/margins": 0.03441796079277992, + "rewards/rejected": -0.17315182089805603, + "step": 708 + }, + { + "epoch": 1.8694792353328937, + "grad_norm": 10.285957336425781, + "learning_rate": 3.038154392191659e-06, + "log_odds_chosen": 0.4851961135864258, + "log_odds_ratio": -0.4922325015068054, + "logits/chosen": -1.1458088159561157, + "logits/rejected": -1.0482499599456787, + "logps/chosen": -1.318127989768982, + "logps/rejected": -1.6883344650268555, + "loss": 2.3074, + "nll_loss": 0.5276387929916382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13181281089782715, + "rewards/margins": 0.03702065348625183, + "rewards/rejected": -0.16883346438407898, + "step": 709 + }, + { + "epoch": 1.8721160184574819, + "grad_norm": 9.493169784545898, + "learning_rate": 3.031055900621118e-06, + "log_odds_chosen": 0.551567554473877, + "log_odds_ratio": -0.47243914008140564, + "logits/chosen": -1.0503392219543457, + "logits/rejected": -1.012966275215149, + "logps/chosen": -1.3903131484985352, + "logps/rejected": -1.8147319555282593, + "loss": 1.9649, + "nll_loss": 0.4439811706542969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.139031320810318, + "rewards/margins": 0.04244187846779823, + "rewards/rejected": -0.18147319555282593, + "step": 710 + }, + { + "epoch": 1.87475280158207, + "grad_norm": 10.9459867477417, + "learning_rate": 3.0239574090505765e-06, + "log_odds_chosen": 0.4790281057357788, + "log_odds_ratio": -0.49135541915893555, + "logits/chosen": -1.0996241569519043, + "logits/rejected": -1.0557196140289307, + "logps/chosen": -1.4130548238754272, + "logps/rejected": -1.7887698411941528, + "loss": 2.4598, + "nll_loss": 0.5658220052719116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14130547642707825, + "rewards/margins": 0.0375715047121048, + "rewards/rejected": -0.17887699604034424, + "step": 711 + }, + { + "epoch": 1.8773895847066577, + "grad_norm": 9.297354698181152, + "learning_rate": 3.0168589174800354e-06, + "log_odds_chosen": 0.4402661621570587, + "log_odds_ratio": -0.5122517943382263, + "logits/chosen": -1.086590051651001, + "logits/rejected": -1.048518180847168, + "logps/chosen": -1.2138535976409912, + "logps/rejected": -1.5244641304016113, + "loss": 2.0503, + "nll_loss": 0.4613535404205322, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1213853657245636, + "rewards/margins": 0.031061064451932907, + "rewards/rejected": -0.1524464190006256, + "step": 712 + }, + { + "epoch": 1.8800263678312459, + "grad_norm": 9.646504402160645, + "learning_rate": 3.0097604259094943e-06, + "log_odds_chosen": 0.38965076208114624, + "log_odds_ratio": -0.5235826373100281, + "logits/chosen": -1.107448697090149, + "logits/rejected": -1.0148836374282837, + "logps/chosen": -1.3409647941589355, + "logps/rejected": -1.6361957788467407, + "loss": 2.3443, + "nll_loss": 0.5337234735488892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13409648835659027, + "rewards/margins": 0.029523085802793503, + "rewards/rejected": -0.16361957788467407, + "step": 713 + }, + { + "epoch": 1.882663150955834, + "grad_norm": 11.009034156799316, + "learning_rate": 3.0026619343389528e-06, + "log_odds_chosen": 0.530620813369751, + "log_odds_ratio": -0.47252893447875977, + "logits/chosen": -1.1298047304153442, + "logits/rejected": -1.0366967916488647, + "logps/chosen": -1.4072208404541016, + "logps/rejected": -1.830824375152588, + "loss": 2.446, + "nll_loss": 0.5642455816268921, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14072206616401672, + "rewards/margins": 0.04236038029193878, + "rewards/rejected": -0.1830824315547943, + "step": 714 + }, + { + "epoch": 1.8852999340804217, + "grad_norm": 9.541563034057617, + "learning_rate": 2.9955634427684117e-06, + "log_odds_chosen": 0.6051095724105835, + "log_odds_ratio": -0.4559980034828186, + "logits/chosen": -1.060058355331421, + "logits/rejected": -0.9675153493881226, + "logps/chosen": -1.272647738456726, + "logps/rejected": -1.7453210353851318, + "loss": 1.9082, + "nll_loss": 0.43144115805625916, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12726476788520813, + "rewards/margins": 0.04726734012365341, + "rewards/rejected": -0.17453211545944214, + "step": 715 + }, + { + "epoch": 1.8879367172050099, + "grad_norm": 9.738765716552734, + "learning_rate": 2.98846495119787e-06, + "log_odds_chosen": 0.5946243405342102, + "log_odds_ratio": -0.4913176894187927, + "logits/chosen": -1.168287754058838, + "logits/rejected": -1.0590921640396118, + "logps/chosen": -1.27141273021698, + "logps/rejected": -1.6295111179351807, + "loss": 2.5759, + "nll_loss": 0.5948377847671509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12714126706123352, + "rewards/margins": 0.035809844732284546, + "rewards/rejected": -0.16295112669467926, + "step": 716 + }, + { + "epoch": 1.890573500329598, + "grad_norm": 9.53346061706543, + "learning_rate": 2.981366459627329e-06, + "log_odds_chosen": 0.8909972906112671, + "log_odds_ratio": -0.3848392367362976, + "logits/chosen": -1.010292410850525, + "logits/rejected": -0.9597340822219849, + "logps/chosen": -1.180947184562683, + "logps/rejected": -1.8466895818710327, + "loss": 2.1973, + "nll_loss": 0.5108322501182556, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11809471994638443, + "rewards/margins": 0.06657424569129944, + "rewards/rejected": -0.18466898798942566, + "step": 717 + }, + { + "epoch": 1.8932102834541857, + "grad_norm": 9.259313583374023, + "learning_rate": 2.9742679680567875e-06, + "log_odds_chosen": 0.8006938695907593, + "log_odds_ratio": -0.37917834520339966, + "logits/chosen": -1.073501706123352, + "logits/rejected": -1.0082961320877075, + "logps/chosen": -1.2161955833435059, + "logps/rejected": -1.83875572681427, + "loss": 1.7586, + "nll_loss": 0.40172743797302246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12161955237388611, + "rewards/margins": 0.06225602328777313, + "rewards/rejected": -0.18387556076049805, + "step": 718 + }, + { + "epoch": 1.8958470665787739, + "grad_norm": 9.879798889160156, + "learning_rate": 2.9671694764862464e-06, + "log_odds_chosen": 0.7014466524124146, + "log_odds_ratio": -0.4430071711540222, + "logits/chosen": -1.1105573177337646, + "logits/rejected": -0.9432505965232849, + "logps/chosen": -1.3722140789031982, + "logps/rejected": -1.937282919883728, + "loss": 2.2508, + "nll_loss": 0.5183901190757751, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13722142577171326, + "rewards/margins": 0.056506864726543427, + "rewards/rejected": -0.1937282830476761, + "step": 719 + }, + { + "epoch": 1.898483849703362, + "grad_norm": 9.9389009475708, + "learning_rate": 2.9600709849157053e-06, + "log_odds_chosen": 0.7960854768753052, + "log_odds_ratio": -0.39902588725090027, + "logits/chosen": -1.1237053871154785, + "logits/rejected": -1.043378233909607, + "logps/chosen": -1.2699528932571411, + "logps/rejected": -1.8459627628326416, + "loss": 2.1568, + "nll_loss": 0.4992954134941101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1269952952861786, + "rewards/margins": 0.05760098248720169, + "rewards/rejected": -0.18459627032279968, + "step": 720 + }, + { + "epoch": 1.9011206328279497, + "grad_norm": 9.857022285461426, + "learning_rate": 2.9529724933451642e-06, + "log_odds_chosen": 0.6493891477584839, + "log_odds_ratio": -0.4351784586906433, + "logits/chosen": -1.1531649827957153, + "logits/rejected": -1.0092952251434326, + "logps/chosen": -1.3989733457565308, + "logps/rejected": -1.9283164739608765, + "loss": 2.581, + "nll_loss": 0.6017433404922485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13989733159542084, + "rewards/margins": 0.05293431133031845, + "rewards/rejected": -0.19283165037631989, + "step": 721 + }, + { + "epoch": 1.903757415952538, + "grad_norm": 9.490763664245605, + "learning_rate": 2.9458740017746227e-06, + "log_odds_chosen": 0.5713068842887878, + "log_odds_ratio": -0.4703507423400879, + "logits/chosen": -1.1523844003677368, + "logits/rejected": -0.9938646554946899, + "logps/chosen": -1.1879104375839233, + "logps/rejected": -1.6090285778045654, + "loss": 2.2061, + "nll_loss": 0.5044786334037781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11879104375839233, + "rewards/margins": 0.04211181402206421, + "rewards/rejected": -0.16090285778045654, + "step": 722 + }, + { + "epoch": 1.906394199077126, + "grad_norm": 10.344840049743652, + "learning_rate": 2.9387755102040816e-06, + "log_odds_chosen": 0.6381205320358276, + "log_odds_ratio": -0.4392378032207489, + "logits/chosen": -1.1031138896942139, + "logits/rejected": -0.9807515144348145, + "logps/chosen": -1.3628013134002686, + "logps/rejected": -1.865041971206665, + "loss": 2.6738, + "nll_loss": 0.6245163083076477, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1362801492214203, + "rewards/margins": 0.050224047154188156, + "rewards/rejected": -0.18650420010089874, + "step": 723 + }, + { + "epoch": 1.9090309822017137, + "grad_norm": 10.27442741394043, + "learning_rate": 2.9316770186335405e-06, + "log_odds_chosen": 0.5367138981819153, + "log_odds_ratio": -0.47964826226234436, + "logits/chosen": -1.150720477104187, + "logits/rejected": -1.040562629699707, + "logps/chosen": -1.2588378190994263, + "logps/rejected": -1.6655387878417969, + "loss": 2.4424, + "nll_loss": 0.5626363754272461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1258837878704071, + "rewards/margins": 0.04067010432481766, + "rewards/rejected": -0.16655388474464417, + "step": 724 + }, + { + "epoch": 1.911667765326302, + "grad_norm": 9.781803131103516, + "learning_rate": 2.924578527062999e-06, + "log_odds_chosen": 0.3105473220348358, + "log_odds_ratio": -0.5560980439186096, + "logits/chosen": -1.1701128482818604, + "logits/rejected": -1.060251235961914, + "logps/chosen": -1.3494994640350342, + "logps/rejected": -1.5901248455047607, + "loss": 2.7193, + "nll_loss": 0.6242145299911499, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1349499225616455, + "rewards/margins": 0.024062547832727432, + "rewards/rejected": -0.15901248157024384, + "step": 725 + }, + { + "epoch": 1.91430454845089, + "grad_norm": 9.695030212402344, + "learning_rate": 2.9174800354924575e-06, + "log_odds_chosen": 0.5412957072257996, + "log_odds_ratio": -0.4844801723957062, + "logits/chosen": -1.1531310081481934, + "logits/rejected": -1.0898048877716064, + "logps/chosen": -1.2744779586791992, + "logps/rejected": -1.6676826477050781, + "loss": 2.3628, + "nll_loss": 0.5422565937042236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12744779884815216, + "rewards/margins": 0.039320461452007294, + "rewards/rejected": -0.16676826775074005, + "step": 726 + }, + { + "epoch": 1.916941331575478, + "grad_norm": 10.185785293579102, + "learning_rate": 2.9103815439219164e-06, + "log_odds_chosen": 0.5890101790428162, + "log_odds_ratio": -0.44325900077819824, + "logits/chosen": -1.1804449558258057, + "logits/rejected": -0.9826102256774902, + "logps/chosen": -1.356105089187622, + "logps/rejected": -1.8139309883117676, + "loss": 2.7428, + "nll_loss": 0.6413748264312744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13561052083969116, + "rewards/margins": 0.045782577246427536, + "rewards/rejected": -0.1813930869102478, + "step": 727 + }, + { + "epoch": 1.919578114700066, + "grad_norm": 9.88233470916748, + "learning_rate": 2.9032830523513753e-06, + "log_odds_chosen": 0.688329815864563, + "log_odds_ratio": -0.418424129486084, + "logits/chosen": -1.0789997577667236, + "logits/rejected": -1.004409909248352, + "logps/chosen": -1.2447686195373535, + "logps/rejected": -1.7736945152282715, + "loss": 2.4874, + "nll_loss": 0.5799974203109741, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12447687238454819, + "rewards/margins": 0.052892591804265976, + "rewards/rejected": -0.17736944556236267, + "step": 728 + }, + { + "epoch": 1.922214897824654, + "grad_norm": 10.790196418762207, + "learning_rate": 2.896184560780834e-06, + "log_odds_chosen": 0.45303723216056824, + "log_odds_ratio": -0.4973563551902771, + "logits/chosen": -1.0952262878417969, + "logits/rejected": -1.0493183135986328, + "logps/chosen": -1.3963035345077515, + "logps/rejected": -1.7568044662475586, + "loss": 2.3031, + "nll_loss": 0.5260452032089233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13963034749031067, + "rewards/margins": 0.03605009987950325, + "rewards/rejected": -0.17568045854568481, + "step": 729 + }, + { + "epoch": 1.924851680949242, + "grad_norm": 10.839725494384766, + "learning_rate": 2.8890860692102926e-06, + "log_odds_chosen": 0.5385131239891052, + "log_odds_ratio": -0.47357553243637085, + "logits/chosen": -1.1808533668518066, + "logits/rejected": -1.045129418373108, + "logps/chosen": -1.469390869140625, + "logps/rejected": -1.9083622694015503, + "loss": 2.7756, + "nll_loss": 0.6465520262718201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14693908393383026, + "rewards/margins": 0.04389715567231178, + "rewards/rejected": -0.19083623588085175, + "step": 730 + }, + { + "epoch": 1.92748846407383, + "grad_norm": 9.984872817993164, + "learning_rate": 2.8819875776397515e-06, + "log_odds_chosen": 0.6364855170249939, + "log_odds_ratio": -0.4419393539428711, + "logits/chosen": -1.059470534324646, + "logits/rejected": -1.0081063508987427, + "logps/chosen": -1.362449049949646, + "logps/rejected": -1.8734867572784424, + "loss": 2.1205, + "nll_loss": 0.48593536019325256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13624490797519684, + "rewards/margins": 0.051103778183460236, + "rewards/rejected": -0.18734869360923767, + "step": 731 + }, + { + "epoch": 1.930125247198418, + "grad_norm": 10.651154518127441, + "learning_rate": 2.8748890860692104e-06, + "log_odds_chosen": 0.5924453735351562, + "log_odds_ratio": -0.4516263008117676, + "logits/chosen": -1.1864819526672363, + "logits/rejected": -1.0519362688064575, + "logps/chosen": -1.2969294786453247, + "logps/rejected": -1.7565484046936035, + "loss": 2.4514, + "nll_loss": 0.5676829814910889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.129692941904068, + "rewards/margins": 0.0459618978202343, + "rewards/rejected": -0.1756548434495926, + "step": 732 + }, + { + "epoch": 1.932762030323006, + "grad_norm": 10.41089153289795, + "learning_rate": 2.867790594498669e-06, + "log_odds_chosen": 0.285758912563324, + "log_odds_ratio": -0.5712098479270935, + "logits/chosen": -1.2113354206085205, + "logits/rejected": -1.1036522388458252, + "logps/chosen": -1.4044160842895508, + "logps/rejected": -1.6196887493133545, + "loss": 2.7402, + "nll_loss": 0.6279230117797852, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14044161140918732, + "rewards/margins": 0.021527256816625595, + "rewards/rejected": -0.1619688868522644, + "step": 733 + }, + { + "epoch": 1.935398813447594, + "grad_norm": 11.022461891174316, + "learning_rate": 2.8606921029281274e-06, + "log_odds_chosen": 0.5275789499282837, + "log_odds_ratio": -0.4706922173500061, + "logits/chosen": -1.21927011013031, + "logits/rejected": -1.0647447109222412, + "logps/chosen": -1.4133145809173584, + "logps/rejected": -1.8368372917175293, + "loss": 2.6767, + "nll_loss": 0.6221182942390442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14133146405220032, + "rewards/margins": 0.04235227406024933, + "rewards/rejected": -0.18368372321128845, + "step": 734 + }, + { + "epoch": 1.938035596572182, + "grad_norm": 9.407811164855957, + "learning_rate": 2.8535936113575863e-06, + "log_odds_chosen": 0.441721647977829, + "log_odds_ratio": -0.5065779685974121, + "logits/chosen": -1.0536956787109375, + "logits/rejected": -0.9907861948013306, + "logps/chosen": -1.1991026401519775, + "logps/rejected": -1.5222476720809937, + "loss": 2.0497, + "nll_loss": 0.4617636799812317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11991026252508163, + "rewards/margins": 0.032314497977495193, + "rewards/rejected": -0.15222477912902832, + "step": 735 + }, + { + "epoch": 1.94067237969677, + "grad_norm": 9.999675750732422, + "learning_rate": 2.846495119787045e-06, + "log_odds_chosen": 0.6899769306182861, + "log_odds_ratio": -0.41962409019470215, + "logits/chosen": -1.1290652751922607, + "logits/rejected": -1.0376579761505127, + "logps/chosen": -1.4229869842529297, + "logps/rejected": -1.9804730415344238, + "loss": 2.2811, + "nll_loss": 0.528308629989624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14229869842529297, + "rewards/margins": 0.0557485856115818, + "rewards/rejected": -0.19804728031158447, + "step": 736 + }, + { + "epoch": 1.943309162821358, + "grad_norm": 10.267180442810059, + "learning_rate": 2.8393966282165037e-06, + "log_odds_chosen": 0.5577402710914612, + "log_odds_ratio": -0.46777093410491943, + "logits/chosen": -1.0950483083724976, + "logits/rejected": -1.0081474781036377, + "logps/chosen": -1.3645390272140503, + "logps/rejected": -1.7952313423156738, + "loss": 2.704, + "nll_loss": 0.6292195320129395, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13645391166210175, + "rewards/margins": 0.043069228529930115, + "rewards/rejected": -0.17952314019203186, + "step": 737 + }, + { + "epoch": 1.945945945945946, + "grad_norm": 9.377684593200684, + "learning_rate": 2.8322981366459626e-06, + "log_odds_chosen": 0.43222254514694214, + "log_odds_ratio": -0.5103168487548828, + "logits/chosen": -1.0828691720962524, + "logits/rejected": -1.0081210136413574, + "logps/chosen": -1.3563244342803955, + "logps/rejected": -1.7054436206817627, + "loss": 1.9605, + "nll_loss": 0.4390990138053894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13563242554664612, + "rewards/margins": 0.03491192311048508, + "rewards/rejected": -0.1705443561077118, + "step": 738 + }, + { + "epoch": 1.948582729070534, + "grad_norm": 9.630548477172852, + "learning_rate": 2.8251996450754215e-06, + "log_odds_chosen": 0.634854793548584, + "log_odds_ratio": -0.4447363317012787, + "logits/chosen": -1.116487741470337, + "logits/rejected": -0.9982193112373352, + "logps/chosen": -1.2106181383132935, + "logps/rejected": -1.732189655303955, + "loss": 2.1607, + "nll_loss": 0.495696485042572, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12106182426214218, + "rewards/margins": 0.052157141268253326, + "rewards/rejected": -0.1732189655303955, + "step": 739 + }, + { + "epoch": 1.951219512195122, + "grad_norm": 9.25537109375, + "learning_rate": 2.8181011535048804e-06, + "log_odds_chosen": 0.5245146751403809, + "log_odds_ratio": -0.4931119382381439, + "logits/chosen": -1.1068140268325806, + "logits/rejected": -1.0467549562454224, + "logps/chosen": -1.3359605073928833, + "logps/rejected": -1.7490854263305664, + "loss": 1.8681, + "nll_loss": 0.4177016019821167, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1335960477590561, + "rewards/margins": 0.04131249338388443, + "rewards/rejected": -0.17490854859352112, + "step": 740 + }, + { + "epoch": 1.95385629531971, + "grad_norm": 9.554231643676758, + "learning_rate": 2.811002661934339e-06, + "log_odds_chosen": 0.5873994827270508, + "log_odds_ratio": -0.4573465585708618, + "logits/chosen": -1.1190392971038818, + "logits/rejected": -1.0469231605529785, + "logps/chosen": -1.2938807010650635, + "logps/rejected": -1.757972002029419, + "loss": 2.1344, + "nll_loss": 0.48787564039230347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12938806414604187, + "rewards/margins": 0.046409137547016144, + "rewards/rejected": -0.1757972091436386, + "step": 741 + }, + { + "epoch": 1.956493078444298, + "grad_norm": 10.868066787719727, + "learning_rate": 2.8039041703637977e-06, + "log_odds_chosen": 0.4728792905807495, + "log_odds_ratio": -0.49270740151405334, + "logits/chosen": -1.1300780773162842, + "logits/rejected": -1.0287731885910034, + "logps/chosen": -1.4597746133804321, + "logps/rejected": -1.8425248861312866, + "loss": 2.4114, + "nll_loss": 0.5535825490951538, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1459774672985077, + "rewards/margins": 0.03827501833438873, + "rewards/rejected": -0.18425247073173523, + "step": 742 + }, + { + "epoch": 1.959129861568886, + "grad_norm": 9.725842475891113, + "learning_rate": 2.796805678793256e-06, + "log_odds_chosen": 0.6654451489448547, + "log_odds_ratio": -0.4607689082622528, + "logits/chosen": -1.1382243633270264, + "logits/rejected": -1.0733942985534668, + "logps/chosen": -1.2684041261672974, + "logps/rejected": -1.785618543624878, + "loss": 2.2847, + "nll_loss": 0.5250944495201111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12684041261672974, + "rewards/margins": 0.051721446216106415, + "rewards/rejected": -0.17856186628341675, + "step": 743 + }, + { + "epoch": 1.961766644693474, + "grad_norm": 10.576794624328613, + "learning_rate": 2.789707187222715e-06, + "log_odds_chosen": 0.6424548029899597, + "log_odds_ratio": -0.45932537317276, + "logits/chosen": -1.0593125820159912, + "logits/rejected": -1.040118932723999, + "logps/chosen": -1.3602181673049927, + "logps/rejected": -1.8843753337860107, + "loss": 2.5558, + "nll_loss": 0.5930271148681641, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13602182269096375, + "rewards/margins": 0.052415721118450165, + "rewards/rejected": -0.1884375661611557, + "step": 744 + }, + { + "epoch": 1.964403427818062, + "grad_norm": 10.075294494628906, + "learning_rate": 2.7826086956521736e-06, + "log_odds_chosen": 0.5383829474449158, + "log_odds_ratio": -0.46967270970344543, + "logits/chosen": -1.1858677864074707, + "logits/rejected": -1.0492582321166992, + "logps/chosen": -1.3368380069732666, + "logps/rejected": -1.7565494775772095, + "loss": 2.3507, + "nll_loss": 0.5407072901725769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13368380069732666, + "rewards/margins": 0.04197114706039429, + "rewards/rejected": -0.17565494775772095, + "step": 745 + }, + { + "epoch": 1.96704021094265, + "grad_norm": 10.174092292785645, + "learning_rate": 2.7755102040816325e-06, + "log_odds_chosen": 0.7807490229606628, + "log_odds_ratio": -0.4044077694416046, + "logits/chosen": -1.0873914957046509, + "logits/rejected": -0.9772408604621887, + "logps/chosen": -1.2904683351516724, + "logps/rejected": -1.8890689611434937, + "loss": 2.3492, + "nll_loss": 0.5468517541885376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12904684245586395, + "rewards/margins": 0.05986006557941437, + "rewards/rejected": -0.18890689313411713, + "step": 746 + }, + { + "epoch": 1.969676994067238, + "grad_norm": 10.78231143951416, + "learning_rate": 2.7684117125110914e-06, + "log_odds_chosen": 0.29232144355773926, + "log_odds_ratio": -0.5633444786071777, + "logits/chosen": -1.1790039539337158, + "logits/rejected": -1.0135250091552734, + "logps/chosen": -1.4243428707122803, + "logps/rejected": -1.6583789587020874, + "loss": 2.8697, + "nll_loss": 0.6611008644104004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1424342840909958, + "rewards/margins": 0.02340361848473549, + "rewards/rejected": -0.16583789885044098, + "step": 747 + }, + { + "epoch": 1.972313777191826, + "grad_norm": 10.888761520385742, + "learning_rate": 2.7613132209405503e-06, + "log_odds_chosen": 0.7005020380020142, + "log_odds_ratio": -0.42932698130607605, + "logits/chosen": -1.1515930891036987, + "logits/rejected": -1.0647493600845337, + "logps/chosen": -1.4700562953948975, + "logps/rejected": -2.058032274246216, + "loss": 2.4503, + "nll_loss": 0.5696460008621216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14700563251972198, + "rewards/margins": 0.05879759415984154, + "rewards/rejected": -0.20580321550369263, + "step": 748 + }, + { + "epoch": 1.974950560316414, + "grad_norm": 10.145458221435547, + "learning_rate": 2.7542147293700088e-06, + "log_odds_chosen": 0.39581814408302307, + "log_odds_ratio": -0.5312362909317017, + "logits/chosen": -1.1222620010375977, + "logits/rejected": -1.020433783531189, + "logps/chosen": -1.45713472366333, + "logps/rejected": -1.7835485935211182, + "loss": 2.3973, + "nll_loss": 0.5462045073509216, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14571347832679749, + "rewards/margins": 0.03264138102531433, + "rewards/rejected": -0.17835485935211182, + "step": 749 + }, + { + "epoch": 1.977587343441002, + "grad_norm": 9.972173690795898, + "learning_rate": 2.7471162377994677e-06, + "log_odds_chosen": 0.5671351552009583, + "log_odds_ratio": -0.49810683727264404, + "logits/chosen": -1.018516182899475, + "logits/rejected": -0.9507037401199341, + "logps/chosen": -1.3662822246551514, + "logps/rejected": -1.8222932815551758, + "loss": 2.4143, + "nll_loss": 0.5537749528884888, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13662822544574738, + "rewards/margins": 0.045601099729537964, + "rewards/rejected": -0.18222934007644653, + "step": 750 + }, + { + "epoch": 1.98022412656559, + "grad_norm": 8.758198738098145, + "learning_rate": 2.740017746228926e-06, + "log_odds_chosen": 0.4473654627799988, + "log_odds_ratio": -0.5166757106781006, + "logits/chosen": -1.0452587604522705, + "logits/rejected": -0.9860420823097229, + "logps/chosen": -1.2896453142166138, + "logps/rejected": -1.6250877380371094, + "loss": 1.6328, + "nll_loss": 0.35654035210609436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12896452844142914, + "rewards/margins": 0.03354424238204956, + "rewards/rejected": -0.1625087708234787, + "step": 751 + }, + { + "epoch": 1.982860909690178, + "grad_norm": 9.59146785736084, + "learning_rate": 2.7329192546583846e-06, + "log_odds_chosen": 0.3367576599121094, + "log_odds_ratio": -0.5577321648597717, + "logits/chosen": -1.1110894680023193, + "logits/rejected": -1.0275874137878418, + "logps/chosen": -1.2562785148620605, + "logps/rejected": -1.5166959762573242, + "loss": 2.2225, + "nll_loss": 0.4998584985733032, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12562784552574158, + "rewards/margins": 0.026041746139526367, + "rewards/rejected": -0.15166959166526794, + "step": 752 + }, + { + "epoch": 1.985497692814766, + "grad_norm": 11.306876182556152, + "learning_rate": 2.7258207630878435e-06, + "log_odds_chosen": 0.9240682721138, + "log_odds_ratio": -0.45148539543151855, + "logits/chosen": -1.0912346839904785, + "logits/rejected": -0.9852017164230347, + "logps/chosen": -1.3862550258636475, + "logps/rejected": -2.1982877254486084, + "loss": 2.8986, + "nll_loss": 0.6795053482055664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13862551748752594, + "rewards/margins": 0.08120325952768326, + "rewards/rejected": -0.2198287844657898, + "step": 753 + }, + { + "epoch": 1.988134475939354, + "grad_norm": 9.992732048034668, + "learning_rate": 2.7187222715173024e-06, + "log_odds_chosen": 0.5552957057952881, + "log_odds_ratio": -0.46775341033935547, + "logits/chosen": -1.106242299079895, + "logits/rejected": -0.9955162405967712, + "logps/chosen": -1.2407417297363281, + "logps/rejected": -1.6577999591827393, + "loss": 2.0104, + "nll_loss": 0.4558298885822296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12407416850328445, + "rewards/margins": 0.041705839335918427, + "rewards/rejected": -0.16578000783920288, + "step": 754 + }, + { + "epoch": 1.990771259063942, + "grad_norm": 9.361556053161621, + "learning_rate": 2.7116237799467613e-06, + "log_odds_chosen": 0.6962761878967285, + "log_odds_ratio": -0.417205274105072, + "logits/chosen": -1.049082636833191, + "logits/rejected": -0.996416449546814, + "logps/chosen": -1.2697757482528687, + "logps/rejected": -1.797317624092102, + "loss": 1.8086, + "nll_loss": 0.4104408323764801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1269775778055191, + "rewards/margins": 0.052754178643226624, + "rewards/rejected": -0.17973175644874573, + "step": 755 + }, + { + "epoch": 1.99340804218853, + "grad_norm": 10.411174774169922, + "learning_rate": 2.7045252883762198e-06, + "log_odds_chosen": 0.3635842800140381, + "log_odds_ratio": -0.5335854291915894, + "logits/chosen": -1.0935719013214111, + "logits/rejected": -1.046020269393921, + "logps/chosen": -1.4086623191833496, + "logps/rejected": -1.6860154867172241, + "loss": 2.5162, + "nll_loss": 0.5756828188896179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1408662348985672, + "rewards/margins": 0.027735330164432526, + "rewards/rejected": -0.16860157251358032, + "step": 756 + }, + { + "epoch": 1.996044825313118, + "grad_norm": 9.917908668518066, + "learning_rate": 2.6974267968056787e-06, + "log_odds_chosen": 0.6547459363937378, + "log_odds_ratio": -0.44633957743644714, + "logits/chosen": -1.0706005096435547, + "logits/rejected": -1.034470558166504, + "logps/chosen": -1.2188279628753662, + "logps/rejected": -1.6781857013702393, + "loss": 1.9883, + "nll_loss": 0.452441543340683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12188279628753662, + "rewards/margins": 0.04593577980995178, + "rewards/rejected": -0.1678185760974884, + "step": 757 + }, + { + "epoch": 1.998681608437706, + "grad_norm": 10.383499145507812, + "learning_rate": 2.6903283052351376e-06, + "log_odds_chosen": 0.5008169412612915, + "log_odds_ratio": -0.48359251022338867, + "logits/chosen": -1.0675674676895142, + "logits/rejected": -1.0341382026672363, + "logps/chosen": -1.3880095481872559, + "logps/rejected": -1.7721683979034424, + "loss": 2.4269, + "nll_loss": 0.5583770275115967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1388009637594223, + "rewards/margins": 0.03841589018702507, + "rewards/rejected": -0.17721684277057648, + "step": 758 + }, + { + "epoch": 2.001318391562294, + "grad_norm": 11.348942756652832, + "learning_rate": 2.6832298136645965e-06, + "log_odds_chosen": 0.8849374055862427, + "log_odds_ratio": -0.4379063546657562, + "logits/chosen": -1.1419169902801514, + "logits/rejected": -1.0298004150390625, + "logps/chosen": -1.3286932706832886, + "logps/rejected": -2.0717482566833496, + "loss": 2.2291, + "nll_loss": 0.5134726166725159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13286933302879333, + "rewards/margins": 0.07430551201105118, + "rewards/rejected": -0.20717483758926392, + "step": 759 + }, + { + "epoch": 2.0039551746868822, + "grad_norm": 10.079428672790527, + "learning_rate": 2.6761313220940545e-06, + "log_odds_chosen": 0.5260939598083496, + "log_odds_ratio": -0.47664958238601685, + "logits/chosen": -1.209180235862732, + "logits/rejected": -1.0874968767166138, + "logps/chosen": -1.2297840118408203, + "logps/rejected": -1.6314725875854492, + "loss": 2.2452, + "nll_loss": 0.513647198677063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12297839671373367, + "rewards/margins": 0.04016885906457901, + "rewards/rejected": -0.16314725577831268, + "step": 760 + }, + { + "epoch": 2.00659195781147, + "grad_norm": 10.240530967712402, + "learning_rate": 2.6690328305235134e-06, + "log_odds_chosen": 0.3144952356815338, + "log_odds_ratio": -0.556167721748352, + "logits/chosen": -1.161802053451538, + "logits/rejected": -1.1026802062988281, + "logps/chosen": -1.4659947156906128, + "logps/rejected": -1.7172274589538574, + "loss": 2.5276, + "nll_loss": 0.5762755870819092, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14659947156906128, + "rewards/margins": 0.025123273953795433, + "rewards/rejected": -0.17172273993492126, + "step": 761 + }, + { + "epoch": 2.009228740936058, + "grad_norm": 9.578766822814941, + "learning_rate": 2.6619343389529723e-06, + "log_odds_chosen": 0.7745237946510315, + "log_odds_ratio": -0.44236063957214355, + "logits/chosen": -1.0694057941436768, + "logits/rejected": -0.9940791130065918, + "logps/chosen": -1.2447141408920288, + "logps/rejected": -1.8774677515029907, + "loss": 2.1539, + "nll_loss": 0.4942498803138733, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12447141110897064, + "rewards/margins": 0.06327535212039948, + "rewards/rejected": -0.1877467781305313, + "step": 762 + }, + { + "epoch": 2.0118655240606462, + "grad_norm": 10.117440223693848, + "learning_rate": 2.6548358473824312e-06, + "log_odds_chosen": 0.538081169128418, + "log_odds_ratio": -0.46689748764038086, + "logits/chosen": -1.1605879068374634, + "logits/rejected": -1.0427160263061523, + "logps/chosen": -1.3132154941558838, + "logps/rejected": -1.7294139862060547, + "loss": 2.5959, + "nll_loss": 0.6022971272468567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13132154941558838, + "rewards/margins": 0.04161985218524933, + "rewards/rejected": -0.1729414016008377, + "step": 763 + }, + { + "epoch": 2.014502307185234, + "grad_norm": 9.777716636657715, + "learning_rate": 2.6477373558118897e-06, + "log_odds_chosen": 0.5327706933021545, + "log_odds_ratio": -0.49130064249038696, + "logits/chosen": -1.1164560317993164, + "logits/rejected": -1.0969555377960205, + "logps/chosen": -1.237712025642395, + "logps/rejected": -1.625910758972168, + "loss": 2.266, + "nll_loss": 0.5173711776733398, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12377119064331055, + "rewards/margins": 0.03881988301873207, + "rewards/rejected": -0.1625910848379135, + "step": 764 + }, + { + "epoch": 2.017139090309822, + "grad_norm": 9.787508010864258, + "learning_rate": 2.6406388642413486e-06, + "log_odds_chosen": 0.736811101436615, + "log_odds_ratio": -0.40617555379867554, + "logits/chosen": -1.1186765432357788, + "logits/rejected": -1.0302070379257202, + "logps/chosen": -1.132691502571106, + "logps/rejected": -1.656376838684082, + "loss": 2.1621, + "nll_loss": 0.49990198016166687, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11326915770769119, + "rewards/margins": 0.052368536591529846, + "rewards/rejected": -0.16563768684864044, + "step": 765 + }, + { + "epoch": 2.0197758734344102, + "grad_norm": 10.189085960388184, + "learning_rate": 2.6335403726708075e-06, + "log_odds_chosen": 0.5936908721923828, + "log_odds_ratio": -0.45090213418006897, + "logits/chosen": -1.2093477249145508, + "logits/rejected": -1.0756824016571045, + "logps/chosen": -1.3594684600830078, + "logps/rejected": -1.8339993953704834, + "loss": 2.5355, + "nll_loss": 0.5887949466705322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1359468400478363, + "rewards/margins": 0.047453105449676514, + "rewards/rejected": -0.18339994549751282, + "step": 766 + }, + { + "epoch": 2.022412656558998, + "grad_norm": 10.076580047607422, + "learning_rate": 2.6264418811002664e-06, + "log_odds_chosen": 0.513107180595398, + "log_odds_ratio": -0.4888473451137543, + "logits/chosen": -1.129032850265503, + "logits/rejected": -1.0239429473876953, + "logps/chosen": -1.4806454181671143, + "logps/rejected": -1.900446891784668, + "loss": 2.3663, + "nll_loss": 0.5426843762397766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14806455373764038, + "rewards/margins": 0.04198013246059418, + "rewards/rejected": -0.19004470109939575, + "step": 767 + }, + { + "epoch": 2.025049439683586, + "grad_norm": 8.9835844039917, + "learning_rate": 2.619343389529725e-06, + "log_odds_chosen": 0.5582860112190247, + "log_odds_ratio": -0.47621530294418335, + "logits/chosen": -0.9895690083503723, + "logits/rejected": -0.9377783536911011, + "logps/chosen": -1.24507474899292, + "logps/rejected": -1.6821479797363281, + "loss": 1.5754, + "nll_loss": 0.3462305963039398, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12450747936964035, + "rewards/margins": 0.043707333505153656, + "rewards/rejected": -0.168214812874794, + "step": 768 + }, + { + "epoch": 2.0276862228081742, + "grad_norm": 10.730051040649414, + "learning_rate": 2.6122448979591834e-06, + "log_odds_chosen": 0.7082866430282593, + "log_odds_ratio": -0.4379771947860718, + "logits/chosen": -1.226049780845642, + "logits/rejected": -1.099149465560913, + "logps/chosen": -1.4317500591278076, + "logps/rejected": -2.018101215362549, + "loss": 2.9005, + "nll_loss": 0.681334912776947, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14317500591278076, + "rewards/margins": 0.05863512307405472, + "rewards/rejected": -0.20181013643741608, + "step": 769 + }, + { + "epoch": 2.030323005932762, + "grad_norm": 9.710260391235352, + "learning_rate": 2.6051464063886423e-06, + "log_odds_chosen": 0.770453929901123, + "log_odds_ratio": -0.43126559257507324, + "logits/chosen": -1.1383628845214844, + "logits/rejected": -1.0302354097366333, + "logps/chosen": -1.2499903440475464, + "logps/rejected": -1.8305046558380127, + "loss": 2.3788, + "nll_loss": 0.5515839457511902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1249990463256836, + "rewards/margins": 0.05805141478776932, + "rewards/rejected": -0.18305045366287231, + "step": 770 + }, + { + "epoch": 2.03295978905735, + "grad_norm": 10.036596298217773, + "learning_rate": 2.5980479148181007e-06, + "log_odds_chosen": 0.6412467360496521, + "log_odds_ratio": -0.46849995851516724, + "logits/chosen": -1.131897211074829, + "logits/rejected": -1.059012532234192, + "logps/chosen": -1.3608183860778809, + "logps/rejected": -1.8439494371414185, + "loss": 2.6094, + "nll_loss": 0.605492115020752, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13608182966709137, + "rewards/margins": 0.048313114792108536, + "rewards/rejected": -0.1843949556350708, + "step": 771 + }, + { + "epoch": 2.0355965721819382, + "grad_norm": 10.127285957336426, + "learning_rate": 2.5909494232475596e-06, + "log_odds_chosen": 0.2889218330383301, + "log_odds_ratio": -0.5826900601387024, + "logits/chosen": -1.1097972393035889, + "logits/rejected": -1.0736355781555176, + "logps/chosen": -1.3271205425262451, + "logps/rejected": -1.5551434755325317, + "loss": 2.75, + "nll_loss": 0.6292246580123901, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13271205127239227, + "rewards/margins": 0.022802293300628662, + "rewards/rejected": -0.15551434457302094, + "step": 772 + }, + { + "epoch": 2.038233355306526, + "grad_norm": 9.093977928161621, + "learning_rate": 2.5838509316770185e-06, + "log_odds_chosen": 0.44692784547805786, + "log_odds_ratio": -0.5036763548851013, + "logits/chosen": -1.0858466625213623, + "logits/rejected": -1.040008544921875, + "logps/chosen": -1.080949306488037, + "logps/rejected": -1.3944963216781616, + "loss": 1.8039, + "nll_loss": 0.40060439705848694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10809493809938431, + "rewards/margins": 0.031354695558547974, + "rewards/rejected": -0.13944962620735168, + "step": 773 + }, + { + "epoch": 2.040870138431114, + "grad_norm": 9.224924087524414, + "learning_rate": 2.5767524401064774e-06, + "log_odds_chosen": 0.7012994289398193, + "log_odds_ratio": -0.41290491819381714, + "logits/chosen": -1.1321061849594116, + "logits/rejected": -1.0112411975860596, + "logps/chosen": -1.133347749710083, + "logps/rejected": -1.6516485214233398, + "loss": 2.177, + "nll_loss": 0.5029694437980652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1133347749710083, + "rewards/margins": 0.051830075681209564, + "rewards/rejected": -0.16516485810279846, + "step": 774 + }, + { + "epoch": 2.0435069215557022, + "grad_norm": 10.931916236877441, + "learning_rate": 2.569653948535936e-06, + "log_odds_chosen": 0.48683279752731323, + "log_odds_ratio": -0.48779165744781494, + "logits/chosen": -1.1395113468170166, + "logits/rejected": -1.055709958076477, + "logps/chosen": -1.5400993824005127, + "logps/rejected": -1.9409468173980713, + "loss": 2.6751, + "nll_loss": 0.6199974417686462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15400993824005127, + "rewards/margins": 0.04008473455905914, + "rewards/rejected": -0.1940946877002716, + "step": 775 + }, + { + "epoch": 2.04614370468029, + "grad_norm": 10.797554969787598, + "learning_rate": 2.562555456965395e-06, + "log_odds_chosen": 0.42376700043678284, + "log_odds_ratio": -0.5112159848213196, + "logits/chosen": -1.163484811782837, + "logits/rejected": -1.0718196630477905, + "logps/chosen": -1.3019428253173828, + "logps/rejected": -1.6327495574951172, + "loss": 2.4008, + "nll_loss": 0.5490672588348389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1301942765712738, + "rewards/margins": 0.03308069333434105, + "rewards/rejected": -0.16327497363090515, + "step": 776 + }, + { + "epoch": 2.048780487804878, + "grad_norm": 9.408236503601074, + "learning_rate": 2.5554569653948537e-06, + "log_odds_chosen": 0.6201867461204529, + "log_odds_ratio": -0.4448419511318207, + "logits/chosen": -1.1767107248306274, + "logits/rejected": -1.074312686920166, + "logps/chosen": -1.1322187185287476, + "logps/rejected": -1.593127965927124, + "loss": 2.1043, + "nll_loss": 0.48158198595046997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11322186887264252, + "rewards/margins": 0.04609092324972153, + "rewards/rejected": -0.15931278467178345, + "step": 777 + }, + { + "epoch": 2.0514172709294662, + "grad_norm": 10.060628890991211, + "learning_rate": 2.548358473824312e-06, + "log_odds_chosen": 0.8383358716964722, + "log_odds_ratio": -0.3872286081314087, + "logits/chosen": -1.1026513576507568, + "logits/rejected": -1.02262282371521, + "logps/chosen": -1.244518756866455, + "logps/rejected": -1.8661563396453857, + "loss": 2.0012, + "nll_loss": 0.4615873098373413, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12445187568664551, + "rewards/margins": 0.06216376647353172, + "rewards/rejected": -0.18661564588546753, + "step": 778 + }, + { + "epoch": 2.054054054054054, + "grad_norm": 9.155142784118652, + "learning_rate": 2.5412599822537707e-06, + "log_odds_chosen": 0.38540011644363403, + "log_odds_ratio": -0.5317520499229431, + "logits/chosen": -1.0778992176055908, + "logits/rejected": -1.0378587245941162, + "logps/chosen": -1.3756157159805298, + "logps/rejected": -1.6602983474731445, + "loss": 2.061, + "nll_loss": 0.4620826840400696, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13756157457828522, + "rewards/margins": 0.02846825122833252, + "rewards/rejected": -0.16602981090545654, + "step": 779 + }, + { + "epoch": 2.056690837178642, + "grad_norm": 10.545321464538574, + "learning_rate": 2.5341614906832296e-06, + "log_odds_chosen": 0.505893349647522, + "log_odds_ratio": -0.4876767694950104, + "logits/chosen": -1.128211259841919, + "logits/rejected": -0.9622944593429565, + "logps/chosen": -1.4639647006988525, + "logps/rejected": -1.8753548860549927, + "loss": 2.6502, + "nll_loss": 0.6137940883636475, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1463964730501175, + "rewards/margins": 0.041139017790555954, + "rewards/rejected": -0.18753549456596375, + "step": 780 + }, + { + "epoch": 2.0593276203032302, + "grad_norm": 9.913485527038574, + "learning_rate": 2.5270629991126885e-06, + "log_odds_chosen": 0.3947891592979431, + "log_odds_ratio": -0.5272966027259827, + "logits/chosen": -1.2290658950805664, + "logits/rejected": -1.1424353122711182, + "logps/chosen": -1.2813048362731934, + "logps/rejected": -1.5810402631759644, + "loss": 2.6443, + "nll_loss": 0.6083491444587708, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1281304806470871, + "rewards/margins": 0.029973559081554413, + "rewards/rejected": -0.1581040322780609, + "step": 781 + }, + { + "epoch": 2.061964403427818, + "grad_norm": 9.621504783630371, + "learning_rate": 2.5199645075421474e-06, + "log_odds_chosen": 0.6991655826568604, + "log_odds_ratio": -0.4380638003349304, + "logits/chosen": -1.1538786888122559, + "logits/rejected": -1.0285148620605469, + "logps/chosen": -1.1817439794540405, + "logps/rejected": -1.7146399021148682, + "loss": 2.2452, + "nll_loss": 0.517501175403595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11817440390586853, + "rewards/margins": 0.05328959599137306, + "rewards/rejected": -0.1714639961719513, + "step": 782 + }, + { + "epoch": 2.064601186552406, + "grad_norm": 11.298930168151855, + "learning_rate": 2.512866015971606e-06, + "log_odds_chosen": 0.5812134742736816, + "log_odds_ratio": -0.45801496505737305, + "logits/chosen": -1.2240318059921265, + "logits/rejected": -1.0378308296203613, + "logps/chosen": -1.3979883193969727, + "logps/rejected": -1.8701411485671997, + "loss": 2.7863, + "nll_loss": 0.6507730484008789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1397988349199295, + "rewards/margins": 0.0472152978181839, + "rewards/rejected": -0.1870141327381134, + "step": 783 + }, + { + "epoch": 2.0672379696769942, + "grad_norm": 9.547139167785645, + "learning_rate": 2.5057675244010647e-06, + "log_odds_chosen": 0.5346148610115051, + "log_odds_ratio": -0.4834967255592346, + "logits/chosen": -1.0990629196166992, + "logits/rejected": -1.0282251834869385, + "logps/chosen": -1.0115312337875366, + "logps/rejected": -1.3923074007034302, + "loss": 1.8481, + "nll_loss": 0.4136834144592285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10115312039852142, + "rewards/margins": 0.03807761147618294, + "rewards/rejected": -0.13923074305057526, + "step": 784 + }, + { + "epoch": 2.069874752801582, + "grad_norm": 10.128329277038574, + "learning_rate": 2.4986690328305236e-06, + "log_odds_chosen": 0.5243815779685974, + "log_odds_ratio": -0.4750070571899414, + "logits/chosen": -1.1562871932983398, + "logits/rejected": -1.0403151512145996, + "logps/chosen": -1.2300305366516113, + "logps/rejected": -1.6397430896759033, + "loss": 2.0847, + "nll_loss": 0.4736851453781128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1230030506849289, + "rewards/margins": 0.040971267968416214, + "rewards/rejected": -0.1639743149280548, + "step": 785 + }, + { + "epoch": 2.07251153592617, + "grad_norm": 10.04702091217041, + "learning_rate": 2.4915705412599825e-06, + "log_odds_chosen": 0.7055188417434692, + "log_odds_ratio": -0.4192762076854706, + "logits/chosen": -1.1293883323669434, + "logits/rejected": -1.0029202699661255, + "logps/chosen": -1.264027714729309, + "logps/rejected": -1.823844075202942, + "loss": 2.281, + "nll_loss": 0.5283329486846924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12640278041362762, + "rewards/margins": 0.05598163977265358, + "rewards/rejected": -0.1823844015598297, + "step": 786 + }, + { + "epoch": 2.0751483190507582, + "grad_norm": 9.414009094238281, + "learning_rate": 2.4844720496894406e-06, + "log_odds_chosen": 0.595037579536438, + "log_odds_ratio": -0.46986937522888184, + "logits/chosen": -1.0502935647964478, + "logits/rejected": -1.0202876329421997, + "logps/chosen": -1.0903228521347046, + "logps/rejected": -1.5388803482055664, + "loss": 1.7637, + "nll_loss": 0.39393508434295654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1090322881937027, + "rewards/margins": 0.044855739921331406, + "rewards/rejected": -0.1538880169391632, + "step": 787 + }, + { + "epoch": 2.077785102175346, + "grad_norm": 9.77463436126709, + "learning_rate": 2.4773735581188995e-06, + "log_odds_chosen": 0.7229048013687134, + "log_odds_ratio": -0.40499523282051086, + "logits/chosen": -1.1099520921707153, + "logits/rejected": -1.0251625776290894, + "logps/chosen": -1.338236689567566, + "logps/rejected": -1.9192970991134644, + "loss": 2.149, + "nll_loss": 0.4967554211616516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1338236927986145, + "rewards/margins": 0.058106038719415665, + "rewards/rejected": -0.19192972779273987, + "step": 788 + }, + { + "epoch": 2.080421885299934, + "grad_norm": 10.018221855163574, + "learning_rate": 2.4702750665483584e-06, + "log_odds_chosen": 0.557384192943573, + "log_odds_ratio": -0.48614200949668884, + "logits/chosen": -1.125726342201233, + "logits/rejected": -1.0347040891647339, + "logps/chosen": -1.2295360565185547, + "logps/rejected": -1.650094747543335, + "loss": 2.0944, + "nll_loss": 0.4749777913093567, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1229536160826683, + "rewards/margins": 0.04205586016178131, + "rewards/rejected": -0.16500946879386902, + "step": 789 + }, + { + "epoch": 2.0830586684245223, + "grad_norm": 9.642407417297363, + "learning_rate": 2.463176574977817e-06, + "log_odds_chosen": 0.6076935529708862, + "log_odds_ratio": -0.4736708998680115, + "logits/chosen": -1.0674395561218262, + "logits/rejected": -1.0196499824523926, + "logps/chosen": -1.3642213344573975, + "logps/rejected": -1.8080679178237915, + "loss": 2.0324, + "nll_loss": 0.4607207179069519, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13642212748527527, + "rewards/margins": 0.044384658336639404, + "rewards/rejected": -0.18080680072307587, + "step": 790 + }, + { + "epoch": 2.08569545154911, + "grad_norm": 9.645079612731934, + "learning_rate": 2.4560780834072758e-06, + "log_odds_chosen": 0.6035420894622803, + "log_odds_ratio": -0.44714921712875366, + "logits/chosen": -1.1527628898620605, + "logits/rejected": -1.0415222644805908, + "logps/chosen": -1.1864968538284302, + "logps/rejected": -1.6468815803527832, + "loss": 2.1985, + "nll_loss": 0.504905104637146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1186496764421463, + "rewards/margins": 0.046038489788770676, + "rewards/rejected": -0.16468816995620728, + "step": 791 + }, + { + "epoch": 2.088332234673698, + "grad_norm": 10.855006217956543, + "learning_rate": 2.4489795918367347e-06, + "log_odds_chosen": 0.7123809456825256, + "log_odds_ratio": -0.4443729519844055, + "logits/chosen": -1.128336787223816, + "logits/rejected": -0.983013391494751, + "logps/chosen": -1.3530423641204834, + "logps/rejected": -1.9418303966522217, + "loss": 2.4777, + "nll_loss": 0.5749930143356323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13530424237251282, + "rewards/margins": 0.05887877941131592, + "rewards/rejected": -0.19418302178382874, + "step": 792 + }, + { + "epoch": 2.0909690177982863, + "grad_norm": 10.273240089416504, + "learning_rate": 2.4418811002661936e-06, + "log_odds_chosen": 0.6091665029525757, + "log_odds_ratio": -0.4456428587436676, + "logits/chosen": -1.0923587083816528, + "logits/rejected": -0.965667188167572, + "logps/chosen": -1.3967857360839844, + "logps/rejected": -1.8644620180130005, + "loss": 2.2365, + "nll_loss": 0.5145605802536011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13967856764793396, + "rewards/margins": 0.04676762968301773, + "rewards/rejected": -0.1864461898803711, + "step": 793 + }, + { + "epoch": 2.093605800922874, + "grad_norm": 9.252375602722168, + "learning_rate": 2.434782608695652e-06, + "log_odds_chosen": 0.5784919857978821, + "log_odds_ratio": -0.4597025215625763, + "logits/chosen": -1.0548346042633057, + "logits/rejected": -1.0105911493301392, + "logps/chosen": -1.1711606979370117, + "logps/rejected": -1.5927342176437378, + "loss": 1.7621, + "nll_loss": 0.3945651650428772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11711607873439789, + "rewards/margins": 0.04215734824538231, + "rewards/rejected": -0.1592734307050705, + "step": 794 + }, + { + "epoch": 2.096242584047462, + "grad_norm": 9.470747947692871, + "learning_rate": 2.427684117125111e-06, + "log_odds_chosen": 0.7224401235580444, + "log_odds_ratio": -0.42393580079078674, + "logits/chosen": -1.1122452020645142, + "logits/rejected": -1.0029590129852295, + "logps/chosen": -1.26948881149292, + "logps/rejected": -1.857187271118164, + "loss": 2.2675, + "nll_loss": 0.5244921445846558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12694887816905975, + "rewards/margins": 0.05876985937356949, + "rewards/rejected": -0.18571873009204865, + "step": 795 + }, + { + "epoch": 2.0988793671720503, + "grad_norm": 9.463521003723145, + "learning_rate": 2.4205856255545694e-06, + "log_odds_chosen": 1.0167723894119263, + "log_odds_ratio": -0.3493209481239319, + "logits/chosen": -1.101204752922058, + "logits/rejected": -1.0462161302566528, + "logps/chosen": -0.9706986546516418, + "logps/rejected": -1.6190752983093262, + "loss": 1.6351, + "nll_loss": 0.3738459348678589, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0970698744058609, + "rewards/margins": 0.06483766436576843, + "rewards/rejected": -0.16190752387046814, + "step": 796 + }, + { + "epoch": 2.101516150296638, + "grad_norm": 9.539721488952637, + "learning_rate": 2.4134871339840283e-06, + "log_odds_chosen": 0.49127352237701416, + "log_odds_ratio": -0.4973955452442169, + "logits/chosen": -1.0409544706344604, + "logits/rejected": -1.0238300561904907, + "logps/chosen": -1.3186019659042358, + "logps/rejected": -1.69057035446167, + "loss": 2.0852, + "nll_loss": 0.4715545177459717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13186019659042358, + "rewards/margins": 0.03719683736562729, + "rewards/rejected": -0.16905704140663147, + "step": 797 + }, + { + "epoch": 2.104152933421226, + "grad_norm": 10.430363655090332, + "learning_rate": 2.406388642413487e-06, + "log_odds_chosen": 0.2506150007247925, + "log_odds_ratio": -0.5804030895233154, + "logits/chosen": -1.1437674760818481, + "logits/rejected": -1.1276366710662842, + "logps/chosen": -1.5441486835479736, + "logps/rejected": -1.736472487449646, + "loss": 2.6284, + "nll_loss": 0.5990653038024902, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15441486239433289, + "rewards/margins": 0.019232388585805893, + "rewards/rejected": -0.17364725470542908, + "step": 798 + }, + { + "epoch": 2.1067897165458143, + "grad_norm": 9.204401016235352, + "learning_rate": 2.3992901508429457e-06, + "log_odds_chosen": 0.9071800708770752, + "log_odds_ratio": -0.3889234960079193, + "logits/chosen": -1.079196572303772, + "logits/rejected": -0.9312431812286377, + "logps/chosen": -1.2495801448822021, + "logps/rejected": -1.9906275272369385, + "loss": 1.8734, + "nll_loss": 0.4294639229774475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12495802342891693, + "rewards/margins": 0.07410473376512527, + "rewards/rejected": -0.1990627497434616, + "step": 799 + }, + { + "epoch": 2.109426499670402, + "grad_norm": 10.068049430847168, + "learning_rate": 2.3921916592724046e-06, + "log_odds_chosen": 0.5332250595092773, + "log_odds_ratio": -0.48249611258506775, + "logits/chosen": -1.14569091796875, + "logits/rejected": -1.0281379222869873, + "logps/chosen": -1.500299334526062, + "logps/rejected": -1.9449788331985474, + "loss": 2.7113, + "nll_loss": 0.6295745968818665, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15002992749214172, + "rewards/margins": 0.04446795582771301, + "rewards/rejected": -0.19449788331985474, + "step": 800 + }, + { + "epoch": 2.11206328279499, + "grad_norm": 10.614519119262695, + "learning_rate": 2.3850931677018635e-06, + "log_odds_chosen": 0.5133019089698792, + "log_odds_ratio": -0.48530033230781555, + "logits/chosen": -1.0950928926467896, + "logits/rejected": -0.9656269550323486, + "logps/chosen": -1.5714972019195557, + "logps/rejected": -2.00163197517395, + "loss": 2.49, + "nll_loss": 0.5739596486091614, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15714971721172333, + "rewards/margins": 0.04301348328590393, + "rewards/rejected": -0.20016320049762726, + "step": 801 + }, + { + "epoch": 2.1147000659195783, + "grad_norm": 8.792914390563965, + "learning_rate": 2.377994676131322e-06, + "log_odds_chosen": 0.7065452337265015, + "log_odds_ratio": -0.4272323250770569, + "logits/chosen": -1.0454217195510864, + "logits/rejected": -0.9867510199546814, + "logps/chosen": -1.188128113746643, + "logps/rejected": -1.7429618835449219, + "loss": 1.7372, + "nll_loss": 0.39158543944358826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11881281435489655, + "rewards/margins": 0.055483367294073105, + "rewards/rejected": -0.17429618537425995, + "step": 802 + }, + { + "epoch": 2.117336849044166, + "grad_norm": 9.577733039855957, + "learning_rate": 2.370896184560781e-06, + "log_odds_chosen": 0.41758400201797485, + "log_odds_ratio": -0.5160441398620605, + "logits/chosen": -1.1367295980453491, + "logits/rejected": -1.0775389671325684, + "logps/chosen": -1.2276097536087036, + "logps/rejected": -1.538201093673706, + "loss": 2.2566, + "nll_loss": 0.5125335454940796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12276098132133484, + "rewards/margins": 0.031059138476848602, + "rewards/rejected": -0.15382012724876404, + "step": 803 + }, + { + "epoch": 2.119973632168754, + "grad_norm": 9.7593412399292, + "learning_rate": 2.3637976929902398e-06, + "log_odds_chosen": 0.5125753283500671, + "log_odds_ratio": -0.48863285779953003, + "logits/chosen": -1.089426875114441, + "logits/rejected": -1.052983283996582, + "logps/chosen": -1.442460298538208, + "logps/rejected": -1.8569387197494507, + "loss": 2.1403, + "nll_loss": 0.4862205684185028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14424604177474976, + "rewards/margins": 0.04144783690571785, + "rewards/rejected": -0.1856938600540161, + "step": 804 + }, + { + "epoch": 2.1226104152933423, + "grad_norm": 9.3715181350708, + "learning_rate": 2.356699201419698e-06, + "log_odds_chosen": 0.4354270398616791, + "log_odds_ratio": -0.514744758605957, + "logits/chosen": -1.085614562034607, + "logits/rejected": -1.0473663806915283, + "logps/chosen": -1.2321531772613525, + "logps/rejected": -1.5729819536209106, + "loss": 2.0199, + "nll_loss": 0.4535001516342163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12321531772613525, + "rewards/margins": 0.034082889556884766, + "rewards/rejected": -0.15729820728302002, + "step": 805 + }, + { + "epoch": 2.12524719841793, + "grad_norm": 10.585258483886719, + "learning_rate": 2.3496007098491567e-06, + "log_odds_chosen": 0.5048158764839172, + "log_odds_ratio": -0.48133382201194763, + "logits/chosen": -1.141870379447937, + "logits/rejected": -1.0114864110946655, + "logps/chosen": -1.4730051755905151, + "logps/rejected": -1.8799927234649658, + "loss": 2.6892, + "nll_loss": 0.6241719722747803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14730052649974823, + "rewards/margins": 0.04069875553250313, + "rewards/rejected": -0.18799926340579987, + "step": 806 + }, + { + "epoch": 2.127883981542518, + "grad_norm": 9.937313079833984, + "learning_rate": 2.3425022182786156e-06, + "log_odds_chosen": 0.7905609011650085, + "log_odds_ratio": -0.4188607335090637, + "logits/chosen": -1.0846725702285767, + "logits/rejected": -1.025403618812561, + "logps/chosen": -1.1108648777008057, + "logps/rejected": -1.6417710781097412, + "loss": 1.7662, + "nll_loss": 0.3996736705303192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11108650267124176, + "rewards/margins": 0.05309060961008072, + "rewards/rejected": -0.16417710483074188, + "step": 807 + }, + { + "epoch": 2.1305207646671063, + "grad_norm": 9.610738754272461, + "learning_rate": 2.3354037267080745e-06, + "log_odds_chosen": 0.5842036008834839, + "log_odds_ratio": -0.4494381248950958, + "logits/chosen": -1.0718939304351807, + "logits/rejected": -0.9993994235992432, + "logps/chosen": -1.1714190244674683, + "logps/rejected": -1.5994045734405518, + "loss": 1.7764, + "nll_loss": 0.39915409684181213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11714190989732742, + "rewards/margins": 0.04279854893684387, + "rewards/rejected": -0.1599404513835907, + "step": 808 + }, + { + "epoch": 2.133157547791694, + "grad_norm": 9.702584266662598, + "learning_rate": 2.328305235137533e-06, + "log_odds_chosen": 0.8238008618354797, + "log_odds_ratio": -0.39040911197662354, + "logits/chosen": -1.0546904802322388, + "logits/rejected": -1.0126965045928955, + "logps/chosen": -1.2540767192840576, + "logps/rejected": -1.9113447666168213, + "loss": 2.0101, + "nll_loss": 0.4634771943092346, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12540766596794128, + "rewards/margins": 0.06572680175304413, + "rewards/rejected": -0.1911344677209854, + "step": 809 + }, + { + "epoch": 2.135794330916282, + "grad_norm": 10.387869834899902, + "learning_rate": 2.321206743566992e-06, + "log_odds_chosen": 0.2536086142063141, + "log_odds_ratio": -0.5774084329605103, + "logits/chosen": -1.2409507036209106, + "logits/rejected": -1.1530442237854004, + "logps/chosen": -1.430117130279541, + "logps/rejected": -1.633103370666504, + "loss": 2.8274, + "nll_loss": 0.649109959602356, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14301171898841858, + "rewards/margins": 0.020298613235354424, + "rewards/rejected": -0.16331034898757935, + "step": 810 + }, + { + "epoch": 2.1384311140408703, + "grad_norm": 9.708829879760742, + "learning_rate": 2.314108251996451e-06, + "log_odds_chosen": 0.5131632685661316, + "log_odds_ratio": -0.48157426714897156, + "logits/chosen": -1.1093146800994873, + "logits/rejected": -1.0246758460998535, + "logps/chosen": -1.308085560798645, + "logps/rejected": -1.709665298461914, + "loss": 2.0433, + "nll_loss": 0.46267786622047424, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13080856204032898, + "rewards/margins": 0.040157970041036606, + "rewards/rejected": -0.17096653580665588, + "step": 811 + }, + { + "epoch": 2.141067897165458, + "grad_norm": 10.519964218139648, + "learning_rate": 2.3070097604259097e-06, + "log_odds_chosen": 0.7052963376045227, + "log_odds_ratio": -0.42012089490890503, + "logits/chosen": -1.0860507488250732, + "logits/rejected": -1.0053993463516235, + "logps/chosen": -1.4012532234191895, + "logps/rejected": -1.9778465032577515, + "loss": 2.2422, + "nll_loss": 0.5185346603393555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1401253342628479, + "rewards/margins": 0.057659320533275604, + "rewards/rejected": -0.1977846622467041, + "step": 812 + }, + { + "epoch": 2.143704680290046, + "grad_norm": 9.929567337036133, + "learning_rate": 2.299911268855368e-06, + "log_odds_chosen": 0.5145715475082397, + "log_odds_ratio": -0.5079393982887268, + "logits/chosen": -1.0435419082641602, + "logits/rejected": -0.9938980340957642, + "logps/chosen": -1.2124900817871094, + "logps/rejected": -1.5268044471740723, + "loss": 2.3445, + "nll_loss": 0.5353326797485352, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1212489977478981, + "rewards/margins": 0.03143144026398659, + "rewards/rejected": -0.152680441737175, + "step": 813 + }, + { + "epoch": 2.1463414634146343, + "grad_norm": 9.384446144104004, + "learning_rate": 2.2928127772848267e-06, + "log_odds_chosen": 0.7435194849967957, + "log_odds_ratio": -0.42445817589759827, + "logits/chosen": -1.0636303424835205, + "logits/rejected": -0.9686412215232849, + "logps/chosen": -1.2277225255966187, + "logps/rejected": -1.7723865509033203, + "loss": 1.9616, + "nll_loss": 0.44795966148376465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12277225404977798, + "rewards/margins": 0.05446639657020569, + "rewards/rejected": -0.17723865807056427, + "step": 814 + }, + { + "epoch": 2.148978246539222, + "grad_norm": 10.526111602783203, + "learning_rate": 2.2857142857142856e-06, + "log_odds_chosen": 0.43695545196533203, + "log_odds_ratio": -0.5100619196891785, + "logits/chosen": -1.0403656959533691, + "logits/rejected": -0.9722212553024292, + "logps/chosen": -1.3435180187225342, + "logps/rejected": -1.6733289957046509, + "loss": 2.039, + "nll_loss": 0.45873549580574036, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13435180485248566, + "rewards/margins": 0.03298109024763107, + "rewards/rejected": -0.16733288764953613, + "step": 815 + }, + { + "epoch": 2.15161502966381, + "grad_norm": 8.918171882629395, + "learning_rate": 2.2786157941437445e-06, + "log_odds_chosen": 0.5507664084434509, + "log_odds_ratio": -0.47093725204467773, + "logits/chosen": -1.1360986232757568, + "logits/rejected": -1.0540223121643066, + "logps/chosen": -1.1695194244384766, + "logps/rejected": -1.5818474292755127, + "loss": 1.8886, + "nll_loss": 0.42505964636802673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11695195734500885, + "rewards/margins": 0.04123278334736824, + "rewards/rejected": -0.1581847369670868, + "step": 816 + }, + { + "epoch": 2.1542518127883983, + "grad_norm": 10.905034065246582, + "learning_rate": 2.271517302573203e-06, + "log_odds_chosen": 0.6513671875, + "log_odds_ratio": -0.4349040389060974, + "logits/chosen": -1.0883439779281616, + "logits/rejected": -0.9979703426361084, + "logps/chosen": -1.441659688949585, + "logps/rejected": -1.9642170667648315, + "loss": 2.4105, + "nll_loss": 0.5591432452201843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1441659778356552, + "rewards/margins": 0.05225573480129242, + "rewards/rejected": -0.19642171263694763, + "step": 817 + }, + { + "epoch": 2.156888595912986, + "grad_norm": 9.421453475952148, + "learning_rate": 2.264418811002662e-06, + "log_odds_chosen": 0.3477077782154083, + "log_odds_ratio": -0.5469948649406433, + "logits/chosen": -1.109951138496399, + "logits/rejected": -1.0714219808578491, + "logps/chosen": -1.2214250564575195, + "logps/rejected": -1.4891982078552246, + "loss": 1.9733, + "nll_loss": 0.43863645195961, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12214250862598419, + "rewards/margins": 0.026777319610118866, + "rewards/rejected": -0.14891982078552246, + "step": 818 + }, + { + "epoch": 2.159525379037574, + "grad_norm": 10.353071212768555, + "learning_rate": 2.2573203194321207e-06, + "log_odds_chosen": 0.5227789878845215, + "log_odds_ratio": -0.4777028560638428, + "logits/chosen": -1.155128836631775, + "logits/rejected": -1.083472728729248, + "logps/chosen": -1.2611448764801025, + "logps/rejected": -1.655976414680481, + "loss": 2.5178, + "nll_loss": 0.5816807746887207, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12611448764801025, + "rewards/margins": 0.03948315605521202, + "rewards/rejected": -0.16559764742851257, + "step": 819 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 10.612918853759766, + "learning_rate": 2.2502218278615796e-06, + "log_odds_chosen": 0.325747549533844, + "log_odds_ratio": -0.5508489608764648, + "logits/chosen": -1.2103018760681152, + "logits/rejected": -1.083714485168457, + "logps/chosen": -1.396499752998352, + "logps/rejected": -1.6515552997589111, + "loss": 2.6947, + "nll_loss": 0.6185782551765442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13964997231960297, + "rewards/margins": 0.025505557656288147, + "rewards/rejected": -0.1651555299758911, + "step": 820 + }, + { + "epoch": 2.16479894528675, + "grad_norm": 9.557706832885742, + "learning_rate": 2.243123336291038e-06, + "log_odds_chosen": 0.48723679780960083, + "log_odds_ratio": -0.5050497055053711, + "logits/chosen": -1.0607335567474365, + "logits/rejected": -1.0438419580459595, + "logps/chosen": -1.180483341217041, + "logps/rejected": -1.550545573234558, + "loss": 1.8689, + "nll_loss": 0.4167235195636749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11804834008216858, + "rewards/margins": 0.037006210535764694, + "rewards/rejected": -0.15505453944206238, + "step": 821 + }, + { + "epoch": 2.167435728411338, + "grad_norm": 10.278383255004883, + "learning_rate": 2.2360248447204966e-06, + "log_odds_chosen": 0.4420629143714905, + "log_odds_ratio": -0.5143228769302368, + "logits/chosen": -1.0987699031829834, + "logits/rejected": -1.0278682708740234, + "logps/chosen": -1.3499083518981934, + "logps/rejected": -1.6710383892059326, + "loss": 2.7196, + "nll_loss": 0.628455400466919, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13499082624912262, + "rewards/margins": 0.032113008201122284, + "rewards/rejected": -0.1671038269996643, + "step": 822 + }, + { + "epoch": 2.1700725115359263, + "grad_norm": 8.535082817077637, + "learning_rate": 2.2289263531499555e-06, + "log_odds_chosen": 1.006068229675293, + "log_odds_ratio": -0.3642235994338989, + "logits/chosen": -1.0344353914260864, + "logits/rejected": -0.9348142147064209, + "logps/chosen": -1.1169939041137695, + "logps/rejected": -1.9077107906341553, + "loss": 1.5286, + "nll_loss": 0.34572744369506836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11169938743114471, + "rewards/margins": 0.07907170057296753, + "rewards/rejected": -0.19077108800411224, + "step": 823 + }, + { + "epoch": 2.172709294660514, + "grad_norm": 10.125965118408203, + "learning_rate": 2.221827861579414e-06, + "log_odds_chosen": 0.43210938572883606, + "log_odds_ratio": -0.517227292060852, + "logits/chosen": -1.0547758340835571, + "logits/rejected": -0.9629392027854919, + "logps/chosen": -1.4742023944854736, + "logps/rejected": -1.8478717803955078, + "loss": 2.1903, + "nll_loss": 0.4958563446998596, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1474202275276184, + "rewards/margins": 0.03736693412065506, + "rewards/rejected": -0.18478718400001526, + "step": 824 + }, + { + "epoch": 2.175346077785102, + "grad_norm": 9.980408668518066, + "learning_rate": 2.214729370008873e-06, + "log_odds_chosen": 0.4883590042591095, + "log_odds_ratio": -0.4976821541786194, + "logits/chosen": -1.1093132495880127, + "logits/rejected": -1.004177212715149, + "logps/chosen": -1.3356246948242188, + "logps/rejected": -1.7281272411346436, + "loss": 2.12, + "nll_loss": 0.4802260398864746, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13356247544288635, + "rewards/margins": 0.03925025463104248, + "rewards/rejected": -0.17281271517276764, + "step": 825 + }, + { + "epoch": 2.1779828609096903, + "grad_norm": 10.402122497558594, + "learning_rate": 2.2076308784383318e-06, + "log_odds_chosen": 0.36031287908554077, + "log_odds_ratio": -0.5396561622619629, + "logits/chosen": -1.1205006837844849, + "logits/rejected": -1.0499624013900757, + "logps/chosen": -1.4112681150436401, + "logps/rejected": -1.6909284591674805, + "loss": 2.3688, + "nll_loss": 0.5382289886474609, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.141126811504364, + "rewards/margins": 0.02796604298055172, + "rewards/rejected": -0.16909286379814148, + "step": 826 + }, + { + "epoch": 2.180619644034278, + "grad_norm": 9.647673606872559, + "learning_rate": 2.2005323868677907e-06, + "log_odds_chosen": 0.5771644711494446, + "log_odds_ratio": -0.45836514234542847, + "logits/chosen": -1.152066707611084, + "logits/rejected": -1.0163257122039795, + "logps/chosen": -1.243578553199768, + "logps/rejected": -1.6959649324417114, + "loss": 2.3424, + "nll_loss": 0.5397616028785706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12435785681009293, + "rewards/margins": 0.04523865133523941, + "rewards/rejected": -0.16959650814533234, + "step": 827 + }, + { + "epoch": 2.183256427158866, + "grad_norm": 9.848581314086914, + "learning_rate": 2.193433895297249e-06, + "log_odds_chosen": 0.2979162633419037, + "log_odds_ratio": -0.5617325305938721, + "logits/chosen": -1.088724136352539, + "logits/rejected": -1.0479118824005127, + "logps/chosen": -1.2935439348220825, + "logps/rejected": -1.5216472148895264, + "loss": 2.18, + "nll_loss": 0.48883068561553955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12935440242290497, + "rewards/margins": 0.022810325026512146, + "rewards/rejected": -0.15216472744941711, + "step": 828 + }, + { + "epoch": 2.1858932102834543, + "grad_norm": 9.815072059631348, + "learning_rate": 2.186335403726708e-06, + "log_odds_chosen": 0.5657855868339539, + "log_odds_ratio": -0.45759493112564087, + "logits/chosen": -1.0358961820602417, + "logits/rejected": -0.9519480466842651, + "logps/chosen": -1.3682363033294678, + "logps/rejected": -1.8163623809814453, + "loss": 1.9967, + "nll_loss": 0.45340484380722046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1368236392736435, + "rewards/margins": 0.044812608510255814, + "rewards/rejected": -0.181636244058609, + "step": 829 + }, + { + "epoch": 2.188529993408042, + "grad_norm": 9.674201011657715, + "learning_rate": 2.179236912156167e-06, + "log_odds_chosen": 0.8280074596405029, + "log_odds_ratio": -0.4154477119445801, + "logits/chosen": -1.0819666385650635, + "logits/rejected": -0.9875413775444031, + "logps/chosen": -1.0795493125915527, + "logps/rejected": -1.6378755569458008, + "loss": 1.9321, + "nll_loss": 0.44148778915405273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10795491933822632, + "rewards/margins": 0.0558326430618763, + "rewards/rejected": -0.16378755867481232, + "step": 830 + }, + { + "epoch": 2.19116677653263, + "grad_norm": 9.76904582977295, + "learning_rate": 2.1721384205856254e-06, + "log_odds_chosen": 0.4637497067451477, + "log_odds_ratio": -0.4947831332683563, + "logits/chosen": -1.0782703161239624, + "logits/rejected": -1.0121512413024902, + "logps/chosen": -1.4170691967010498, + "logps/rejected": -1.7891721725463867, + "loss": 2.1634, + "nll_loss": 0.49136391282081604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1417069137096405, + "rewards/margins": 0.03721030056476593, + "rewards/rejected": -0.17891722917556763, + "step": 831 + }, + { + "epoch": 2.1938035596572183, + "grad_norm": 10.08104133605957, + "learning_rate": 2.165039929015084e-06, + "log_odds_chosen": 0.4065963625907898, + "log_odds_ratio": -0.5492662787437439, + "logits/chosen": -1.1009379625320435, + "logits/rejected": -1.0309967994689941, + "logps/chosen": -1.2967877388000488, + "logps/rejected": -1.6170520782470703, + "loss": 2.6675, + "nll_loss": 0.611954927444458, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12967877089977264, + "rewards/margins": 0.032026439905166626, + "rewards/rejected": -0.16170522570610046, + "step": 832 + }, + { + "epoch": 2.196440342781806, + "grad_norm": 9.883426666259766, + "learning_rate": 2.1579414374445428e-06, + "log_odds_chosen": 0.33256006240844727, + "log_odds_ratio": -0.5476436614990234, + "logits/chosen": -1.1163049936294556, + "logits/rejected": -1.0105125904083252, + "logps/chosen": -1.2677969932556152, + "logps/rejected": -1.515932559967041, + "loss": 2.0921, + "nll_loss": 0.4682601988315582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.126779705286026, + "rewards/margins": 0.024813562631607056, + "rewards/rejected": -0.15159326791763306, + "step": 833 + }, + { + "epoch": 2.199077125906394, + "grad_norm": 9.637345314025879, + "learning_rate": 2.1508429458740017e-06, + "log_odds_chosen": 0.7757169008255005, + "log_odds_ratio": -0.43987080454826355, + "logits/chosen": -1.0220355987548828, + "logits/rejected": -0.9482641816139221, + "logps/chosen": -1.1861026287078857, + "logps/rejected": -1.8005510568618774, + "loss": 1.7984, + "nll_loss": 0.40560632944107056, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11861026287078857, + "rewards/margins": 0.061444856226444244, + "rewards/rejected": -0.18005511164665222, + "step": 834 + }, + { + "epoch": 2.2017139090309823, + "grad_norm": 9.93033504486084, + "learning_rate": 2.1437444543034606e-06, + "log_odds_chosen": 0.26072263717651367, + "log_odds_ratio": -0.5927022695541382, + "logits/chosen": -1.0605559349060059, + "logits/rejected": -0.9693813323974609, + "logps/chosen": -1.387969970703125, + "logps/rejected": -1.5906848907470703, + "loss": 2.1978, + "nll_loss": 0.49017882347106934, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13879700005054474, + "rewards/margins": 0.02027149498462677, + "rewards/rejected": -0.1590684950351715, + "step": 835 + }, + { + "epoch": 2.20435069215557, + "grad_norm": 9.383538246154785, + "learning_rate": 2.136645962732919e-06, + "log_odds_chosen": 0.8809219598770142, + "log_odds_ratio": -0.3742610514163971, + "logits/chosen": -1.103813886642456, + "logits/rejected": -0.965684711933136, + "logps/chosen": -1.2720234394073486, + "logps/rejected": -1.9837875366210938, + "loss": 1.8789, + "nll_loss": 0.43230006098747253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1272023618221283, + "rewards/margins": 0.07117639482021332, + "rewards/rejected": -0.1983787566423416, + "step": 836 + }, + { + "epoch": 2.206987475280158, + "grad_norm": 9.359567642211914, + "learning_rate": 2.129547471162378e-06, + "log_odds_chosen": 0.563247561454773, + "log_odds_ratio": -0.4659076929092407, + "logits/chosen": -1.081076979637146, + "logits/rejected": -1.0120054483413696, + "logps/chosen": -1.1683335304260254, + "logps/rejected": -1.5994642972946167, + "loss": 1.9513, + "nll_loss": 0.4412320852279663, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11683335900306702, + "rewards/margins": 0.04311307519674301, + "rewards/rejected": -0.15994644165039062, + "step": 837 + }, + { + "epoch": 2.2096242584047463, + "grad_norm": 10.473090171813965, + "learning_rate": 2.122448979591837e-06, + "log_odds_chosen": 0.6541207432746887, + "log_odds_ratio": -0.4413262903690338, + "logits/chosen": -1.2150276899337769, + "logits/rejected": -1.0857141017913818, + "logps/chosen": -1.156285285949707, + "logps/rejected": -1.643873691558838, + "loss": 2.2774, + "nll_loss": 0.5252097845077515, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11562854051589966, + "rewards/margins": 0.04875882714986801, + "rewards/rejected": -0.16438736021518707, + "step": 838 + }, + { + "epoch": 2.212261041529334, + "grad_norm": 9.396411895751953, + "learning_rate": 2.1153504880212958e-06, + "log_odds_chosen": 0.7791802883148193, + "log_odds_ratio": -0.41297250986099243, + "logits/chosen": -1.1654194593429565, + "logits/rejected": -1.0504658222198486, + "logps/chosen": -1.3216980695724487, + "logps/rejected": -1.9628183841705322, + "loss": 2.2329, + "nll_loss": 0.5169225931167603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13216981291770935, + "rewards/margins": 0.06411202251911163, + "rewards/rejected": -0.19628183543682098, + "step": 839 + }, + { + "epoch": 2.214897824653922, + "grad_norm": 10.38984203338623, + "learning_rate": 2.108251996450754e-06, + "log_odds_chosen": 0.28098800778388977, + "log_odds_ratio": -0.5779528617858887, + "logits/chosen": -1.0790941715240479, + "logits/rejected": -1.0334962606430054, + "logps/chosen": -1.303661584854126, + "logps/rejected": -1.5208593606948853, + "loss": 2.2238, + "nll_loss": 0.4981459379196167, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13036617636680603, + "rewards/margins": 0.02171977609395981, + "rewards/rejected": -0.15208593010902405, + "step": 840 + }, + { + "epoch": 2.2175346077785103, + "grad_norm": 10.593436241149902, + "learning_rate": 2.1011535048802127e-06, + "log_odds_chosen": 0.5783386826515198, + "log_odds_ratio": -0.46727651357650757, + "logits/chosen": -1.0302200317382812, + "logits/rejected": -0.975492537021637, + "logps/chosen": -1.274944543838501, + "logps/rejected": -1.7174361944198608, + "loss": 2.1627, + "nll_loss": 0.4939413070678711, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1274944692850113, + "rewards/margins": 0.044249165803194046, + "rewards/rejected": -0.17174363136291504, + "step": 841 + }, + { + "epoch": 2.220171390903098, + "grad_norm": 10.232915878295898, + "learning_rate": 2.0940550133096716e-06, + "log_odds_chosen": 0.41215968132019043, + "log_odds_ratio": -0.5210889577865601, + "logits/chosen": -1.1280133724212646, + "logits/rejected": -1.026360273361206, + "logps/chosen": -1.3107011318206787, + "logps/rejected": -1.639918565750122, + "loss": 2.6401, + "nll_loss": 0.6079277992248535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1310701072216034, + "rewards/margins": 0.03292175382375717, + "rewards/rejected": -0.16399186849594116, + "step": 842 + }, + { + "epoch": 2.222808174027686, + "grad_norm": 9.591880798339844, + "learning_rate": 2.08695652173913e-06, + "log_odds_chosen": 0.5880998373031616, + "log_odds_ratio": -0.46353909373283386, + "logits/chosen": -1.0672273635864258, + "logits/rejected": -1.0226701498031616, + "logps/chosen": -1.1952481269836426, + "logps/rejected": -1.648200511932373, + "loss": 1.7761, + "nll_loss": 0.3976776599884033, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11952481418848038, + "rewards/margins": 0.04529523849487305, + "rewards/rejected": -0.16482004523277283, + "step": 843 + }, + { + "epoch": 2.2254449571522743, + "grad_norm": 9.671662330627441, + "learning_rate": 2.079858030168589e-06, + "log_odds_chosen": 0.4567071199417114, + "log_odds_ratio": -0.5039539337158203, + "logits/chosen": -1.059120535850525, + "logits/rejected": -0.9793242812156677, + "logps/chosen": -1.3402698040008545, + "logps/rejected": -1.696657419204712, + "loss": 2.0356, + "nll_loss": 0.45850008726119995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13402698934078217, + "rewards/margins": 0.035638757050037384, + "rewards/rejected": -0.16966573894023895, + "step": 844 + }, + { + "epoch": 2.2280817402768625, + "grad_norm": 10.071785926818848, + "learning_rate": 2.072759538598048e-06, + "log_odds_chosen": 0.6569117307662964, + "log_odds_ratio": -0.44372034072875977, + "logits/chosen": -1.1549427509307861, + "logits/rejected": -1.0164406299591064, + "logps/chosen": -1.2740365266799927, + "logps/rejected": -1.7857685089111328, + "loss": 2.3573, + "nll_loss": 0.5449540019035339, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12740366160869598, + "rewards/margins": 0.05117318406701088, + "rewards/rejected": -0.17857685685157776, + "step": 845 + }, + { + "epoch": 2.23071852340145, + "grad_norm": 10.06051254272461, + "learning_rate": 2.0656610470275068e-06, + "log_odds_chosen": 0.7105215191841125, + "log_odds_ratio": -0.4064417779445648, + "logits/chosen": -1.1775195598602295, + "logits/rejected": -1.0729823112487793, + "logps/chosen": -1.2887108325958252, + "logps/rejected": -1.845299243927002, + "loss": 2.3104, + "nll_loss": 0.5369586944580078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12887108325958252, + "rewards/margins": 0.05565885454416275, + "rewards/rejected": -0.18452993035316467, + "step": 846 + }, + { + "epoch": 2.2333553065260383, + "grad_norm": 10.156782150268555, + "learning_rate": 2.0585625554569653e-06, + "log_odds_chosen": 0.862796425819397, + "log_odds_ratio": -0.40923982858657837, + "logits/chosen": -1.1173951625823975, + "logits/rejected": -1.020320177078247, + "logps/chosen": -1.159977674484253, + "logps/rejected": -1.8526980876922607, + "loss": 1.9856, + "nll_loss": 0.4554870128631592, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11599776893854141, + "rewards/margins": 0.06927204132080078, + "rewards/rejected": -0.1852698028087616, + "step": 847 + }, + { + "epoch": 2.235992089650626, + "grad_norm": 10.295635223388672, + "learning_rate": 2.051464063886424e-06, + "log_odds_chosen": 0.34328746795654297, + "log_odds_ratio": -0.5436752438545227, + "logits/chosen": -1.054999589920044, + "logits/rejected": -0.9837340116500854, + "logps/chosen": -1.2793586254119873, + "logps/rejected": -1.533247709274292, + "loss": 2.8444, + "nll_loss": 0.6567400097846985, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12793587148189545, + "rewards/margins": 0.025388918817043304, + "rewards/rejected": -0.15332478284835815, + "step": 848 + }, + { + "epoch": 2.238628872775214, + "grad_norm": 9.936890602111816, + "learning_rate": 2.0443655723158826e-06, + "log_odds_chosen": 0.7530043125152588, + "log_odds_ratio": -0.4255771040916443, + "logits/chosen": -1.07007896900177, + "logits/rejected": -1.0139280557632446, + "logps/chosen": -1.1299421787261963, + "logps/rejected": -1.6235958337783813, + "loss": 2.0257, + "nll_loss": 0.46387165784835815, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11299421638250351, + "rewards/margins": 0.049365364015102386, + "rewards/rejected": -0.1623595803976059, + "step": 849 + }, + { + "epoch": 2.2412656558998023, + "grad_norm": 10.255937576293945, + "learning_rate": 2.0372670807453415e-06, + "log_odds_chosen": 0.736466646194458, + "log_odds_ratio": -0.4073718786239624, + "logits/chosen": -1.0956451892852783, + "logits/rejected": -0.9570380449295044, + "logps/chosen": -1.362380027770996, + "logps/rejected": -1.9511312246322632, + "loss": 2.4922, + "nll_loss": 0.5823126435279846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1362380087375641, + "rewards/margins": 0.05887509882450104, + "rewards/rejected": -0.19511312246322632, + "step": 850 + }, + { + "epoch": 2.2439024390243905, + "grad_norm": 9.78884506225586, + "learning_rate": 2.0301685891748e-06, + "log_odds_chosen": 0.59482741355896, + "log_odds_ratio": -0.4567106366157532, + "logits/chosen": -1.187472939491272, + "logits/rejected": -1.0651209354400635, + "logps/chosen": -1.3073811531066895, + "logps/rejected": -1.7776367664337158, + "loss": 2.4399, + "nll_loss": 0.5643101930618286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13073810935020447, + "rewards/margins": 0.04702557250857353, + "rewards/rejected": -0.1777637004852295, + "step": 851 + }, + { + "epoch": 2.246539222148978, + "grad_norm": 9.819873809814453, + "learning_rate": 2.023070097604259e-06, + "log_odds_chosen": 0.7717651128768921, + "log_odds_ratio": -0.3990074098110199, + "logits/chosen": -1.2048760652542114, + "logits/rejected": -1.0654480457305908, + "logps/chosen": -1.35507333278656, + "logps/rejected": -1.9843974113464355, + "loss": 2.5905, + "nll_loss": 0.607713520526886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13550734519958496, + "rewards/margins": 0.06293240189552307, + "rewards/rejected": -0.19843974709510803, + "step": 852 + }, + { + "epoch": 2.2491760052735663, + "grad_norm": 9.984160423278809, + "learning_rate": 2.015971606033718e-06, + "log_odds_chosen": 0.24353592097759247, + "log_odds_ratio": -0.5929281711578369, + "logits/chosen": -1.041812539100647, + "logits/rejected": -0.9944383502006531, + "logps/chosen": -1.3887944221496582, + "logps/rejected": -1.5821037292480469, + "loss": 2.0191, + "nll_loss": 0.4454866647720337, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1388794332742691, + "rewards/margins": 0.019330933690071106, + "rewards/rejected": -0.1582103669643402, + "step": 853 + }, + { + "epoch": 2.251812788398154, + "grad_norm": 10.243196487426758, + "learning_rate": 2.0088731144631767e-06, + "log_odds_chosen": 0.4460301399230957, + "log_odds_ratio": -0.5126500129699707, + "logits/chosen": -1.0765531063079834, + "logits/rejected": -0.9831266403198242, + "logps/chosen": -1.4298105239868164, + "logps/rejected": -1.788804054260254, + "loss": 2.8298, + "nll_loss": 0.6561848521232605, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14298105239868164, + "rewards/margins": 0.035899337381124496, + "rewards/rejected": -0.17888039350509644, + "step": 854 + }, + { + "epoch": 2.254449571522742, + "grad_norm": 10.224044799804688, + "learning_rate": 2.001774622892635e-06, + "log_odds_chosen": 0.5008156895637512, + "log_odds_ratio": -0.4781492352485657, + "logits/chosen": -1.1945780515670776, + "logits/rejected": -1.0529351234436035, + "logps/chosen": -1.283106803894043, + "logps/rejected": -1.6531083583831787, + "loss": 2.5087, + "nll_loss": 0.5793533325195312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1283106803894043, + "rewards/margins": 0.0370001494884491, + "rewards/rejected": -0.1653108447790146, + "step": 855 + }, + { + "epoch": 2.2570863546473303, + "grad_norm": 10.119146347045898, + "learning_rate": 1.994676131322094e-06, + "log_odds_chosen": 0.3080759644508362, + "log_odds_ratio": -0.5613141059875488, + "logits/chosen": -1.1984078884124756, + "logits/rejected": -1.0711963176727295, + "logps/chosen": -1.4382545948028564, + "logps/rejected": -1.6706795692443848, + "loss": 2.6115, + "nll_loss": 0.5967416763305664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1438254565000534, + "rewards/margins": 0.023242495954036713, + "rewards/rejected": -0.16706794500350952, + "step": 856 + }, + { + "epoch": 2.2597231377719185, + "grad_norm": 10.42589282989502, + "learning_rate": 1.9875776397515526e-06, + "log_odds_chosen": 0.5139608383178711, + "log_odds_ratio": -0.4912663400173187, + "logits/chosen": -1.0437419414520264, + "logits/rejected": -0.9561313390731812, + "logps/chosen": -1.3991198539733887, + "logps/rejected": -1.7931902408599854, + "loss": 2.0823, + "nll_loss": 0.47144123911857605, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13991199433803558, + "rewards/margins": 0.039407022297382355, + "rewards/rejected": -0.17931902408599854, + "step": 857 + }, + { + "epoch": 2.262359920896506, + "grad_norm": 10.34991455078125, + "learning_rate": 1.9804791481810115e-06, + "log_odds_chosen": 0.501839280128479, + "log_odds_ratio": -0.488447368144989, + "logits/chosen": -1.0929710865020752, + "logits/rejected": -1.0430017709732056, + "logps/chosen": -1.2344586849212646, + "logps/rejected": -1.5981402397155762, + "loss": 2.4483, + "nll_loss": 0.5632182359695435, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12344586849212646, + "rewards/margins": 0.036368150264024734, + "rewards/rejected": -0.1598140299320221, + "step": 858 + }, + { + "epoch": 2.2649967040210943, + "grad_norm": 9.237586975097656, + "learning_rate": 1.9733806566104704e-06, + "log_odds_chosen": 0.7990984916687012, + "log_odds_ratio": -0.3936226963996887, + "logits/chosen": -1.0956703424453735, + "logits/rejected": -0.9938894510269165, + "logps/chosen": -1.1750367879867554, + "logps/rejected": -1.7683098316192627, + "loss": 1.9704, + "nll_loss": 0.45324358344078064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11750368773937225, + "rewards/margins": 0.05932728946208954, + "rewards/rejected": -0.1768309772014618, + "step": 859 + }, + { + "epoch": 2.267633487145682, + "grad_norm": 10.378680229187012, + "learning_rate": 1.966282165039929e-06, + "log_odds_chosen": 0.8377877473831177, + "log_odds_ratio": -0.4150227904319763, + "logits/chosen": -1.1576271057128906, + "logits/rejected": -1.0481622219085693, + "logps/chosen": -1.2925989627838135, + "logps/rejected": -1.9561983346939087, + "loss": 2.54, + "nll_loss": 0.5934931039810181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1292598843574524, + "rewards/margins": 0.06635995954275131, + "rewards/rejected": -0.1956198513507843, + "step": 860 + }, + { + "epoch": 2.27027027027027, + "grad_norm": 10.77652359008789, + "learning_rate": 1.9591836734693877e-06, + "log_odds_chosen": 0.5458847880363464, + "log_odds_ratio": -0.4712611138820648, + "logits/chosen": -1.0664516687393188, + "logits/rejected": -1.0485702753067017, + "logps/chosen": -1.309433937072754, + "logps/rejected": -1.7289760112762451, + "loss": 2.7616, + "nll_loss": 0.6432833075523376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1309433877468109, + "rewards/margins": 0.04195421189069748, + "rewards/rejected": -0.172897607088089, + "step": 861 + }, + { + "epoch": 2.2729070533948583, + "grad_norm": 9.428654670715332, + "learning_rate": 1.952085181898846e-06, + "log_odds_chosen": 0.5159881114959717, + "log_odds_ratio": -0.5256285071372986, + "logits/chosen": -1.1286839246749878, + "logits/rejected": -1.0185039043426514, + "logps/chosen": -1.3016605377197266, + "logps/rejected": -1.7159929275512695, + "loss": 2.1145, + "nll_loss": 0.4760742783546448, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13016605377197266, + "rewards/margins": 0.041433244943618774, + "rewards/rejected": -0.17159929871559143, + "step": 862 + }, + { + "epoch": 2.2755438365194465, + "grad_norm": 10.207335472106934, + "learning_rate": 1.944986690328305e-06, + "log_odds_chosen": 0.3654537796974182, + "log_odds_ratio": -0.5408401489257812, + "logits/chosen": -1.084734320640564, + "logits/rejected": -0.9910352230072021, + "logps/chosen": -1.3164527416229248, + "logps/rejected": -1.6015342473983765, + "loss": 2.1288, + "nll_loss": 0.4781232476234436, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1316452920436859, + "rewards/margins": 0.02850813791155815, + "rewards/rejected": -0.16015341877937317, + "step": 863 + }, + { + "epoch": 2.278180619644034, + "grad_norm": 10.110044479370117, + "learning_rate": 1.937888198757764e-06, + "log_odds_chosen": 0.6507178544998169, + "log_odds_ratio": -0.4426138401031494, + "logits/chosen": -1.0788090229034424, + "logits/rejected": -1.0549869537353516, + "logps/chosen": -1.224971890449524, + "logps/rejected": -1.730021357536316, + "loss": 2.0642, + "nll_loss": 0.4717872142791748, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12249719351530075, + "rewards/margins": 0.05050494521856308, + "rewards/rejected": -0.17300213873386383, + "step": 864 + }, + { + "epoch": 2.2808174027686223, + "grad_norm": 10.79237174987793, + "learning_rate": 1.9307897071872225e-06, + "log_odds_chosen": 0.42231541872024536, + "log_odds_ratio": -0.5157482624053955, + "logits/chosen": -1.1129214763641357, + "logits/rejected": -1.0310511589050293, + "logps/chosen": -1.4913139343261719, + "logps/rejected": -1.8260332345962524, + "loss": 2.5362, + "nll_loss": 0.5824636220932007, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1491314172744751, + "rewards/margins": 0.03347191959619522, + "rewards/rejected": -0.18260332942008972, + "step": 865 + }, + { + "epoch": 2.2834541858932105, + "grad_norm": 10.123908042907715, + "learning_rate": 1.9236912156166814e-06, + "log_odds_chosen": 0.6933606266975403, + "log_odds_ratio": -0.48170340061187744, + "logits/chosen": -1.1296451091766357, + "logits/rejected": -1.0230624675750732, + "logps/chosen": -1.3490564823150635, + "logps/rejected": -1.9312152862548828, + "loss": 2.2868, + "nll_loss": 0.5235217213630676, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1349056512117386, + "rewards/margins": 0.058215878903865814, + "rewards/rejected": -0.1931215226650238, + "step": 866 + }, + { + "epoch": 2.286090969017798, + "grad_norm": 9.657790184020996, + "learning_rate": 1.9165927240461403e-06, + "log_odds_chosen": 0.7490557432174683, + "log_odds_ratio": -0.43284159898757935, + "logits/chosen": -1.0705084800720215, + "logits/rejected": -1.0278089046478271, + "logps/chosen": -1.119507074356079, + "logps/rejected": -1.552225112915039, + "loss": 1.8637, + "nll_loss": 0.4226509630680084, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11195070296525955, + "rewards/margins": 0.04327181726694107, + "rewards/rejected": -0.15522252023220062, + "step": 867 + }, + { + "epoch": 2.2887277521423863, + "grad_norm": 9.32958698272705, + "learning_rate": 1.9094942324755988e-06, + "log_odds_chosen": 0.49227672815322876, + "log_odds_ratio": -0.4879855215549469, + "logits/chosen": -1.1634224653244019, + "logits/rejected": -1.050025224685669, + "logps/chosen": -1.2189109325408936, + "logps/rejected": -1.592963695526123, + "loss": 2.2461, + "nll_loss": 0.5127338171005249, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1218910962343216, + "rewards/margins": 0.03740527853369713, + "rewards/rejected": -0.15929636359214783, + "step": 868 + }, + { + "epoch": 2.2913645352669745, + "grad_norm": 9.668261528015137, + "learning_rate": 1.9023957409050575e-06, + "log_odds_chosen": 0.5143749117851257, + "log_odds_ratio": -0.4800008535385132, + "logits/chosen": -1.0946844816207886, + "logits/rejected": -1.011817455291748, + "logps/chosen": -1.240984320640564, + "logps/rejected": -1.6015645265579224, + "loss": 2.0224, + "nll_loss": 0.45759451389312744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12409843504428864, + "rewards/margins": 0.03605801984667778, + "rewards/rejected": -0.1601564586162567, + "step": 869 + }, + { + "epoch": 2.294001318391562, + "grad_norm": 10.814599990844727, + "learning_rate": 1.8952972493345164e-06, + "log_odds_chosen": 0.4014270603656769, + "log_odds_ratio": -0.5303521156311035, + "logits/chosen": -1.0695867538452148, + "logits/rejected": -0.933932363986969, + "logps/chosen": -1.5640766620635986, + "logps/rejected": -1.8934946060180664, + "loss": 3.2221, + "nll_loss": 0.7524948716163635, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1564076691865921, + "rewards/margins": 0.032941803336143494, + "rewards/rejected": -0.1893494725227356, + "step": 870 + }, + { + "epoch": 2.2966381015161503, + "grad_norm": 10.645783424377441, + "learning_rate": 1.888198757763975e-06, + "log_odds_chosen": 0.5395257472991943, + "log_odds_ratio": -0.4920005202293396, + "logits/chosen": -1.184718132019043, + "logits/rejected": -1.0663403272628784, + "logps/chosen": -1.4322577714920044, + "logps/rejected": -1.8459181785583496, + "loss": 2.6386, + "nll_loss": 0.6104428768157959, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1432257890701294, + "rewards/margins": 0.04136602580547333, + "rewards/rejected": -0.18459181487560272, + "step": 871 + }, + { + "epoch": 2.2992748846407385, + "grad_norm": 10.148241996765137, + "learning_rate": 1.881100266193434e-06, + "log_odds_chosen": 0.4539548456668854, + "log_odds_ratio": -0.506780743598938, + "logits/chosen": -1.1411820650100708, + "logits/rejected": -1.0451831817626953, + "logps/chosen": -1.4123101234436035, + "logps/rejected": -1.7676352262496948, + "loss": 2.3386, + "nll_loss": 0.5339686870574951, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1412310153245926, + "rewards/margins": 0.03553250432014465, + "rewards/rejected": -0.17676350474357605, + "step": 872 + }, + { + "epoch": 2.301911667765326, + "grad_norm": 10.121185302734375, + "learning_rate": 1.8740017746228924e-06, + "log_odds_chosen": 0.5971920490264893, + "log_odds_ratio": -0.4546307325363159, + "logits/chosen": -1.1044751405715942, + "logits/rejected": -1.0192352533340454, + "logps/chosen": -1.2726118564605713, + "logps/rejected": -1.7381761074066162, + "loss": 2.374, + "nll_loss": 0.548031210899353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1272611767053604, + "rewards/margins": 0.046556420624256134, + "rewards/rejected": -0.17381760478019714, + "step": 873 + }, + { + "epoch": 2.3045484508899143, + "grad_norm": 10.169872283935547, + "learning_rate": 1.8669032830523513e-06, + "log_odds_chosen": 0.30267369747161865, + "log_odds_ratio": -0.5743823051452637, + "logits/chosen": -1.1027439832687378, + "logits/rejected": -1.0398067235946655, + "logps/chosen": -1.3280037641525269, + "logps/rejected": -1.5539779663085938, + "loss": 2.1782, + "nll_loss": 0.487112432718277, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1328003704547882, + "rewards/margins": 0.022597430273890495, + "rewards/rejected": -0.15539780259132385, + "step": 874 + }, + { + "epoch": 2.3071852340145025, + "grad_norm": 9.782196044921875, + "learning_rate": 1.85980479148181e-06, + "log_odds_chosen": 0.9248654842376709, + "log_odds_ratio": -0.34867754578590393, + "logits/chosen": -1.1500089168548584, + "logits/rejected": -0.9892420768737793, + "logps/chosen": -1.2782080173492432, + "logps/rejected": -2.0070817470550537, + "loss": 2.2703, + "nll_loss": 0.5327146053314209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12782080471515656, + "rewards/margins": 0.0728873685002327, + "rewards/rejected": -0.20070818066596985, + "step": 875 + }, + { + "epoch": 2.30982201713909, + "grad_norm": 10.457931518554688, + "learning_rate": 1.852706299911269e-06, + "log_odds_chosen": 0.48485198616981506, + "log_odds_ratio": -0.4915451109409332, + "logits/chosen": -1.1804633140563965, + "logits/rejected": -1.0634907484054565, + "logps/chosen": -1.4077634811401367, + "logps/rejected": -1.787198781967163, + "loss": 2.454, + "nll_loss": 0.5643399357795715, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1407763659954071, + "rewards/margins": 0.037943534553050995, + "rewards/rejected": -0.1787198781967163, + "step": 876 + }, + { + "epoch": 2.3124588002636783, + "grad_norm": 9.764825820922852, + "learning_rate": 1.8456078083407276e-06, + "log_odds_chosen": 0.6608742475509644, + "log_odds_ratio": -0.4454282522201538, + "logits/chosen": -1.0784417390823364, + "logits/rejected": -1.0097901821136475, + "logps/chosen": -1.1584433317184448, + "logps/rejected": -1.6725659370422363, + "loss": 2.0073, + "nll_loss": 0.45727962255477905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11584433913230896, + "rewards/margins": 0.05141226202249527, + "rewards/rejected": -0.16725659370422363, + "step": 877 + }, + { + "epoch": 2.3150955833882665, + "grad_norm": 10.151506423950195, + "learning_rate": 1.838509316770186e-06, + "log_odds_chosen": 0.6765606999397278, + "log_odds_ratio": -0.4168528616428375, + "logits/chosen": -1.1900181770324707, + "logits/rejected": -1.0964360237121582, + "logps/chosen": -1.3045673370361328, + "logps/rejected": -1.8341665267944336, + "loss": 2.5437, + "nll_loss": 0.5942333340644836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13045673072338104, + "rewards/margins": 0.05295991897583008, + "rewards/rejected": -0.18341666460037231, + "step": 878 + }, + { + "epoch": 2.317732366512854, + "grad_norm": 10.592248916625977, + "learning_rate": 1.831410825199645e-06, + "log_odds_chosen": 0.5475701689720154, + "log_odds_ratio": -0.4736286997795105, + "logits/chosen": -1.0685279369354248, + "logits/rejected": -1.0304744243621826, + "logps/chosen": -1.4587361812591553, + "logps/rejected": -1.8997396230697632, + "loss": 2.1521, + "nll_loss": 0.490667462348938, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14587360620498657, + "rewards/margins": 0.04410037025809288, + "rewards/rejected": -0.18997396528720856, + "step": 879 + }, + { + "epoch": 2.3203691496374423, + "grad_norm": 9.69548511505127, + "learning_rate": 1.8243123336291037e-06, + "log_odds_chosen": 0.5831120014190674, + "log_odds_ratio": -0.45221713185310364, + "logits/chosen": -1.1400697231292725, + "logits/rejected": -1.0067840814590454, + "logps/chosen": -1.306548833847046, + "logps/rejected": -1.7670178413391113, + "loss": 2.2661, + "nll_loss": 0.5212967395782471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13065488636493683, + "rewards/margins": 0.0460469052195549, + "rewards/rejected": -0.17670178413391113, + "step": 880 + }, + { + "epoch": 2.3230059327620305, + "grad_norm": 9.943262100219727, + "learning_rate": 1.8172138420585626e-06, + "log_odds_chosen": 0.46555984020233154, + "log_odds_ratio": -0.49573034048080444, + "logits/chosen": -1.1484509706497192, + "logits/rejected": -1.0376181602478027, + "logps/chosen": -1.2022852897644043, + "logps/rejected": -1.5464811325073242, + "loss": 2.1417, + "nll_loss": 0.48586392402648926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12022852897644043, + "rewards/margins": 0.03441959619522095, + "rewards/rejected": -0.15464811027050018, + "step": 881 + }, + { + "epoch": 2.325642715886618, + "grad_norm": 10.272644996643066, + "learning_rate": 1.810115350488021e-06, + "log_odds_chosen": 0.6326010823249817, + "log_odds_ratio": -0.44564175605773926, + "logits/chosen": -1.1390728950500488, + "logits/rejected": -1.0238792896270752, + "logps/chosen": -1.4400683641433716, + "logps/rejected": -1.9388606548309326, + "loss": 2.6395, + "nll_loss": 0.6153170466423035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1440068483352661, + "rewards/margins": 0.04987923428416252, + "rewards/rejected": -0.19388607144355774, + "step": 882 + }, + { + "epoch": 2.3282794990112063, + "grad_norm": 10.139723777770996, + "learning_rate": 1.80301685891748e-06, + "log_odds_chosen": 0.6296920776367188, + "log_odds_ratio": -0.453926682472229, + "logits/chosen": -1.1161080598831177, + "logits/rejected": -1.0118201971054077, + "logps/chosen": -1.3150339126586914, + "logps/rejected": -1.821239709854126, + "loss": 2.1842, + "nll_loss": 0.5006626844406128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1315034031867981, + "rewards/margins": 0.050620581954717636, + "rewards/rejected": -0.18212398886680603, + "step": 883 + }, + { + "epoch": 2.3309162821357945, + "grad_norm": 9.526472091674805, + "learning_rate": 1.7959183673469386e-06, + "log_odds_chosen": 0.2386457324028015, + "log_odds_ratio": -0.595291256904602, + "logits/chosen": -1.0480625629425049, + "logits/rejected": -1.0287106037139893, + "logps/chosen": -1.3027513027191162, + "logps/rejected": -1.4972296953201294, + "loss": 1.9034, + "nll_loss": 0.4163215458393097, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13027513027191162, + "rewards/margins": 0.019447840750217438, + "rewards/rejected": -0.14972296357154846, + "step": 884 + }, + { + "epoch": 2.333553065260382, + "grad_norm": 10.50646686553955, + "learning_rate": 1.7888198757763975e-06, + "log_odds_chosen": 0.2895394563674927, + "log_odds_ratio": -0.5652808547019958, + "logits/chosen": -1.111433982849121, + "logits/rejected": -1.0047237873077393, + "logps/chosen": -1.5478146076202393, + "logps/rejected": -1.776818037033081, + "loss": 2.7286, + "nll_loss": 0.6256152391433716, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15478146076202393, + "rewards/margins": 0.022900333628058434, + "rewards/rejected": -0.1776818037033081, + "step": 885 + }, + { + "epoch": 2.3361898483849703, + "grad_norm": 9.655818939208984, + "learning_rate": 1.7817213842058562e-06, + "log_odds_chosen": 0.3944101333618164, + "log_odds_ratio": -0.5261645317077637, + "logits/chosen": -1.1105564832687378, + "logits/rejected": -1.048958420753479, + "logps/chosen": -1.366010069847107, + "logps/rejected": -1.6764073371887207, + "loss": 2.5216, + "nll_loss": 0.5777935981750488, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1366010159254074, + "rewards/margins": 0.03103972040116787, + "rewards/rejected": -0.16764073073863983, + "step": 886 + }, + { + "epoch": 2.3388266315095585, + "grad_norm": 9.845623970031738, + "learning_rate": 1.7746228926353149e-06, + "log_odds_chosen": 0.5777924060821533, + "log_odds_ratio": -0.46263208985328674, + "logits/chosen": -1.0652246475219727, + "logits/rejected": -0.9925827980041504, + "logps/chosen": -1.2034990787506104, + "logps/rejected": -1.636778473854065, + "loss": 2.2739, + "nll_loss": 0.5222036838531494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12034991383552551, + "rewards/margins": 0.0433279350399971, + "rewards/rejected": -0.16367784142494202, + "step": 887 + }, + { + "epoch": 2.341463414634146, + "grad_norm": 9.432107925415039, + "learning_rate": 1.7675244010647736e-06, + "log_odds_chosen": 0.5671384334564209, + "log_odds_ratio": -0.4698998034000397, + "logits/chosen": -1.0846713781356812, + "logits/rejected": -0.9662580490112305, + "logps/chosen": -1.3929511308670044, + "logps/rejected": -1.8406968116760254, + "loss": 2.1215, + "nll_loss": 0.4833735525608063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13929511606693268, + "rewards/margins": 0.04477456212043762, + "rewards/rejected": -0.1840696781873703, + "step": 888 + }, + { + "epoch": 2.3441001977587343, + "grad_norm": 9.808156967163086, + "learning_rate": 1.7604259094942325e-06, + "log_odds_chosen": 0.5133799314498901, + "log_odds_ratio": -0.4915502965450287, + "logits/chosen": -1.0829869508743286, + "logits/rejected": -1.058847427368164, + "logps/chosen": -1.174734354019165, + "logps/rejected": -1.5203860998153687, + "loss": 1.9699, + "nll_loss": 0.4433155357837677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11747344583272934, + "rewards/margins": 0.03456517308950424, + "rewards/rejected": -0.1520386040210724, + "step": 889 + }, + { + "epoch": 2.3467369808833225, + "grad_norm": 9.102179527282715, + "learning_rate": 1.7533274179236912e-06, + "log_odds_chosen": 0.6991630792617798, + "log_odds_ratio": -0.4420987665653229, + "logits/chosen": -0.9580685496330261, + "logits/rejected": -0.9406499862670898, + "logps/chosen": -1.1506810188293457, + "logps/rejected": -1.6416935920715332, + "loss": 1.7795, + "nll_loss": 0.4006742238998413, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11506810039281845, + "rewards/margins": 0.04910125583410263, + "rewards/rejected": -0.16416935622692108, + "step": 890 + }, + { + "epoch": 2.34937376400791, + "grad_norm": 10.316890716552734, + "learning_rate": 1.7462289263531499e-06, + "log_odds_chosen": 0.6008606553077698, + "log_odds_ratio": -0.45902445912361145, + "logits/chosen": -1.0788568258285522, + "logits/rejected": -1.0219684839248657, + "logps/chosen": -1.3049077987670898, + "logps/rejected": -1.7716357707977295, + "loss": 2.3578, + "nll_loss": 0.543548047542572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13049077987670898, + "rewards/margins": 0.04667280614376068, + "rewards/rejected": -0.17716357111930847, + "step": 891 + }, + { + "epoch": 2.3520105471324984, + "grad_norm": 9.912158966064453, + "learning_rate": 1.7391304347826085e-06, + "log_odds_chosen": 0.5222499370574951, + "log_odds_ratio": -0.4807077646255493, + "logits/chosen": -1.194288730621338, + "logits/rejected": -1.010504961013794, + "logps/chosen": -1.3184592723846436, + "logps/rejected": -1.737593650817871, + "loss": 2.651, + "nll_loss": 0.6146837472915649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13184592127799988, + "rewards/margins": 0.04191344231367111, + "rewards/rejected": -0.17375938594341278, + "step": 892 + }, + { + "epoch": 2.3546473302570865, + "grad_norm": 11.46944808959961, + "learning_rate": 1.7320319432120674e-06, + "log_odds_chosen": 0.4674687385559082, + "log_odds_ratio": -0.5207874178886414, + "logits/chosen": -1.0839996337890625, + "logits/rejected": -1.0142912864685059, + "logps/chosen": -1.7729079723358154, + "logps/rejected": -2.1121439933776855, + "loss": 3.5885, + "nll_loss": 0.8450548648834229, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17729079723358154, + "rewards/margins": 0.03392360359430313, + "rewards/rejected": -0.21121439337730408, + "step": 893 + }, + { + "epoch": 2.357284113381674, + "grad_norm": 9.610631942749023, + "learning_rate": 1.7249334516415261e-06, + "log_odds_chosen": 0.5872002243995667, + "log_odds_ratio": -0.4672238826751709, + "logits/chosen": -1.069164752960205, + "logits/rejected": -1.022336721420288, + "logps/chosen": -1.069106101989746, + "logps/rejected": -1.5121127367019653, + "loss": 1.986, + "nll_loss": 0.44977521896362305, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10691061615943909, + "rewards/margins": 0.044300660490989685, + "rewards/rejected": -0.15121127665042877, + "step": 894 + }, + { + "epoch": 2.3599208965062624, + "grad_norm": 10.619580268859863, + "learning_rate": 1.717834960070985e-06, + "log_odds_chosen": 0.432054340839386, + "log_odds_ratio": -0.5096228122711182, + "logits/chosen": -1.0840938091278076, + "logits/rejected": -1.0283002853393555, + "logps/chosen": -1.402572512626648, + "logps/rejected": -1.734330177307129, + "loss": 2.6856, + "nll_loss": 0.62043297290802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14025725424289703, + "rewards/margins": 0.0331757590174675, + "rewards/rejected": -0.17343303561210632, + "step": 895 + }, + { + "epoch": 2.3625576796308505, + "grad_norm": 10.408077239990234, + "learning_rate": 1.7107364685004435e-06, + "log_odds_chosen": 0.7202540636062622, + "log_odds_ratio": -0.4119204878807068, + "logits/chosen": -1.1463948488235474, + "logits/rejected": -1.0047610998153687, + "logps/chosen": -1.3519786596298218, + "logps/rejected": -1.93733811378479, + "loss": 2.2873, + "nll_loss": 0.530625581741333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13519787788391113, + "rewards/margins": 0.05853593349456787, + "rewards/rejected": -0.193733811378479, + "step": 896 + }, + { + "epoch": 2.365194462755438, + "grad_norm": 9.422696113586426, + "learning_rate": 1.7036379769299022e-06, + "log_odds_chosen": 0.9097142219543457, + "log_odds_ratio": -0.3858075737953186, + "logits/chosen": -1.0098564624786377, + "logits/rejected": -0.9362455010414124, + "logps/chosen": -1.2319567203521729, + "logps/rejected": -1.968179702758789, + "loss": 2.0434, + "nll_loss": 0.47226881980895996, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12319567054510117, + "rewards/margins": 0.07362228631973267, + "rewards/rejected": -0.19681797921657562, + "step": 897 + }, + { + "epoch": 2.3678312458800264, + "grad_norm": 10.891953468322754, + "learning_rate": 1.696539485359361e-06, + "log_odds_chosen": 0.637458086013794, + "log_odds_ratio": -0.4614834189414978, + "logits/chosen": -1.1466891765594482, + "logits/rejected": -1.0769871473312378, + "logps/chosen": -1.287334680557251, + "logps/rejected": -1.781071424484253, + "loss": 2.4553, + "nll_loss": 0.5676878094673157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12873347103595734, + "rewards/margins": 0.04937367141246796, + "rewards/rejected": -0.1781071424484253, + "step": 898 + }, + { + "epoch": 2.3704680290046145, + "grad_norm": 10.109067916870117, + "learning_rate": 1.6894409937888198e-06, + "log_odds_chosen": 0.29623594880104065, + "log_odds_ratio": -0.5727648735046387, + "logits/chosen": -1.159841775894165, + "logits/rejected": -1.1025397777557373, + "logps/chosen": -1.2897531986236572, + "logps/rejected": -1.522739291191101, + "loss": 2.1762, + "nll_loss": 0.486769437789917, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12897531688213348, + "rewards/margins": 0.02329862117767334, + "rewards/rejected": -0.15227393805980682, + "step": 899 + }, + { + "epoch": 2.373104812129202, + "grad_norm": 10.160222053527832, + "learning_rate": 1.6823425022182785e-06, + "log_odds_chosen": 0.7126874923706055, + "log_odds_ratio": -0.4103580713272095, + "logits/chosen": -1.09098219871521, + "logits/rejected": -1.0015114545822144, + "logps/chosen": -1.2328615188598633, + "logps/rejected": -1.7861967086791992, + "loss": 1.919, + "nll_loss": 0.4387224018573761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1232861578464508, + "rewards/margins": 0.05533352866768837, + "rewards/rejected": -0.17861968278884888, + "step": 900 + }, + { + "epoch": 2.3757415952537904, + "grad_norm": 10.40146255493164, + "learning_rate": 1.6752440106477372e-06, + "log_odds_chosen": 0.5050216317176819, + "log_odds_ratio": -0.4958907961845398, + "logits/chosen": -1.119974136352539, + "logits/rejected": -1.0149524211883545, + "logps/chosen": -1.3021442890167236, + "logps/rejected": -1.7089707851409912, + "loss": 2.3676, + "nll_loss": 0.5423151254653931, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13021443784236908, + "rewards/margins": 0.04068264365196228, + "rewards/rejected": -0.17089708149433136, + "step": 901 + }, + { + "epoch": 2.3783783783783785, + "grad_norm": 9.469189643859863, + "learning_rate": 1.668145519077196e-06, + "log_odds_chosen": 0.766099750995636, + "log_odds_ratio": -0.4818335771560669, + "logits/chosen": -1.0585970878601074, + "logits/rejected": -1.0308507680892944, + "logps/chosen": -1.2359730005264282, + "logps/rejected": -1.876680612564087, + "loss": 1.7823, + "nll_loss": 0.39740118384361267, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12359730154275894, + "rewards/margins": 0.06407077610492706, + "rewards/rejected": -0.1876680850982666, + "step": 902 + }, + { + "epoch": 2.381015161502966, + "grad_norm": 10.153940200805664, + "learning_rate": 1.6610470275066547e-06, + "log_odds_chosen": 0.48691484332084656, + "log_odds_ratio": -0.497551828622818, + "logits/chosen": -1.1616532802581787, + "logits/rejected": -1.0509796142578125, + "logps/chosen": -1.2466967105865479, + "logps/rejected": -1.6137733459472656, + "loss": 2.1945, + "nll_loss": 0.49886205792427063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12466967850923538, + "rewards/margins": 0.03670765459537506, + "rewards/rejected": -0.16137734055519104, + "step": 903 + }, + { + "epoch": 2.3836519446275544, + "grad_norm": 10.649412155151367, + "learning_rate": 1.6539485359361136e-06, + "log_odds_chosen": 0.5018337965011597, + "log_odds_ratio": -0.4802248477935791, + "logits/chosen": -1.1726360321044922, + "logits/rejected": -1.0210614204406738, + "logps/chosen": -1.3647270202636719, + "logps/rejected": -1.753767728805542, + "loss": 2.3417, + "nll_loss": 0.5373994708061218, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1364727020263672, + "rewards/margins": 0.03890407457947731, + "rewards/rejected": -0.1753767728805542, + "step": 904 + }, + { + "epoch": 2.3862887277521425, + "grad_norm": 9.40090274810791, + "learning_rate": 1.6468500443655721e-06, + "log_odds_chosen": 0.472876638174057, + "log_odds_ratio": -0.4991152286529541, + "logits/chosen": -1.1030107736587524, + "logits/rejected": -0.9815706014633179, + "logps/chosen": -1.3572287559509277, + "logps/rejected": -1.733076810836792, + "loss": 2.1231, + "nll_loss": 0.480863094329834, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13572287559509277, + "rewards/margins": 0.0375848188996315, + "rewards/rejected": -0.17330768704414368, + "step": 905 + }, + { + "epoch": 2.38892551087673, + "grad_norm": 9.037947654724121, + "learning_rate": 1.639751552795031e-06, + "log_odds_chosen": 0.7296708822250366, + "log_odds_ratio": -0.4820438027381897, + "logits/chosen": -1.0791407823562622, + "logits/rejected": -1.0010749101638794, + "logps/chosen": -1.302752137184143, + "logps/rejected": -1.9282175302505493, + "loss": 1.9242, + "nll_loss": 0.43284353613853455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13027521967887878, + "rewards/margins": 0.06254653632640839, + "rewards/rejected": -0.19282175600528717, + "step": 906 + }, + { + "epoch": 2.3915622940013184, + "grad_norm": 10.885642051696777, + "learning_rate": 1.6326530612244897e-06, + "log_odds_chosen": 0.5610907673835754, + "log_odds_ratio": -0.4697563946247101, + "logits/chosen": -1.0593876838684082, + "logits/rejected": -0.9969384670257568, + "logps/chosen": -1.23292875289917, + "logps/rejected": -1.662524700164795, + "loss": 2.425, + "nll_loss": 0.5592802166938782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12329287827014923, + "rewards/margins": 0.04295959323644638, + "rewards/rejected": -0.16625246405601501, + "step": 907 + }, + { + "epoch": 2.3941990771259065, + "grad_norm": 9.862154960632324, + "learning_rate": 1.6255545696539486e-06, + "log_odds_chosen": 0.6774505376815796, + "log_odds_ratio": -0.4316444396972656, + "logits/chosen": -1.110433578491211, + "logits/rejected": -1.0029642581939697, + "logps/chosen": -1.3049352169036865, + "logps/rejected": -1.8275152444839478, + "loss": 1.94, + "nll_loss": 0.4418395161628723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13049352169036865, + "rewards/margins": 0.05225801095366478, + "rewards/rejected": -0.18275153636932373, + "step": 908 + }, + { + "epoch": 2.396835860250494, + "grad_norm": 10.400384902954102, + "learning_rate": 1.618456078083407e-06, + "log_odds_chosen": 0.6197330951690674, + "log_odds_ratio": -0.43622320890426636, + "logits/chosen": -1.1227972507476807, + "logits/rejected": -1.0015300512313843, + "logps/chosen": -1.4339704513549805, + "logps/rejected": -1.9296571016311646, + "loss": 2.5103, + "nll_loss": 0.5839563608169556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14339704811573029, + "rewards/margins": 0.049568649381399155, + "rewards/rejected": -0.19296568632125854, + "step": 909 + }, + { + "epoch": 2.3994726433750824, + "grad_norm": 9.41834831237793, + "learning_rate": 1.611357586512866e-06, + "log_odds_chosen": 0.865503191947937, + "log_odds_ratio": -0.3612247407436371, + "logits/chosen": -1.0651895999908447, + "logits/rejected": -0.9995630979537964, + "logps/chosen": -1.1853220462799072, + "logps/rejected": -1.8519682884216309, + "loss": 1.755, + "nll_loss": 0.4026201367378235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.118532195687294, + "rewards/margins": 0.06666462123394012, + "rewards/rejected": -0.18519681692123413, + "step": 910 + }, + { + "epoch": 2.4021094264996705, + "grad_norm": 10.072117805480957, + "learning_rate": 1.6042590949423247e-06, + "log_odds_chosen": 0.4887821674346924, + "log_odds_ratio": -0.4884355664253235, + "logits/chosen": -1.1391323804855347, + "logits/rejected": -1.0412737131118774, + "logps/chosen": -1.2391932010650635, + "logps/rejected": -1.613529086112976, + "loss": 2.2534, + "nll_loss": 0.5145063996315002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12391932308673859, + "rewards/margins": 0.03743358328938484, + "rewards/rejected": -0.16135290265083313, + "step": 911 + }, + { + "epoch": 2.4047462096242582, + "grad_norm": 9.537360191345215, + "learning_rate": 1.5971606033717836e-06, + "log_odds_chosen": 0.699766993522644, + "log_odds_ratio": -0.4318753480911255, + "logits/chosen": -1.1274890899658203, + "logits/rejected": -0.9987161159515381, + "logps/chosen": -1.1979037523269653, + "logps/rejected": -1.722472906112671, + "loss": 1.9952, + "nll_loss": 0.45561331510543823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11979037523269653, + "rewards/margins": 0.05245692655444145, + "rewards/rejected": -0.1722472906112671, + "step": 912 + }, + { + "epoch": 2.4073829927488464, + "grad_norm": 9.310301780700684, + "learning_rate": 1.590062111801242e-06, + "log_odds_chosen": 0.675567090511322, + "log_odds_ratio": -0.4486117660999298, + "logits/chosen": -1.030418038368225, + "logits/rejected": -1.0064417123794556, + "logps/chosen": -1.0173966884613037, + "logps/rejected": -1.495659351348877, + "loss": 1.8953, + "nll_loss": 0.4289514422416687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10173968225717545, + "rewards/margins": 0.04782627522945404, + "rewards/rejected": -0.1495659351348877, + "step": 913 + }, + { + "epoch": 2.4100197758734345, + "grad_norm": 9.618062973022461, + "learning_rate": 1.582963620230701e-06, + "log_odds_chosen": 0.5738446116447449, + "log_odds_ratio": -0.4660855829715729, + "logits/chosen": -1.1193753480911255, + "logits/rejected": -1.0765951871871948, + "logps/chosen": -1.2046260833740234, + "logps/rejected": -1.6074562072753906, + "loss": 2.2378, + "nll_loss": 0.5128461122512817, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12046261876821518, + "rewards/margins": 0.04028301313519478, + "rewards/rejected": -0.16074562072753906, + "step": 914 + }, + { + "epoch": 2.4126565589980222, + "grad_norm": 11.003042221069336, + "learning_rate": 1.5758651286601596e-06, + "log_odds_chosen": 0.3635341227054596, + "log_odds_ratio": -0.5497903823852539, + "logits/chosen": -1.0701490640640259, + "logits/rejected": -0.9974640607833862, + "logps/chosen": -1.3847324848175049, + "logps/rejected": -1.6702008247375488, + "loss": 2.0758, + "nll_loss": 0.46398162841796875, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1384732574224472, + "rewards/margins": 0.02854682132601738, + "rewards/rejected": -0.16702008247375488, + "step": 915 + }, + { + "epoch": 2.4152933421226104, + "grad_norm": 10.645037651062012, + "learning_rate": 1.5687666370896183e-06, + "log_odds_chosen": 0.6083018183708191, + "log_odds_ratio": -0.4520820677280426, + "logits/chosen": -1.2035808563232422, + "logits/rejected": -1.0772027969360352, + "logps/chosen": -1.566821813583374, + "logps/rejected": -2.0758349895477295, + "loss": 2.8514, + "nll_loss": 0.6676511168479919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15668217837810516, + "rewards/margins": 0.05090133473277092, + "rewards/rejected": -0.20758351683616638, + "step": 916 + }, + { + "epoch": 2.4179301252471985, + "grad_norm": 10.445541381835938, + "learning_rate": 1.5616681455190772e-06, + "log_odds_chosen": 0.2778787314891815, + "log_odds_ratio": -0.5730139017105103, + "logits/chosen": -1.122854232788086, + "logits/rejected": -1.0918922424316406, + "logps/chosen": -1.3809865713119507, + "logps/rejected": -1.6034111976623535, + "loss": 2.2505, + "nll_loss": 0.5053151249885559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13809865713119507, + "rewards/margins": 0.022242471575737, + "rewards/rejected": -0.16034114360809326, + "step": 917 + }, + { + "epoch": 2.4205669083717862, + "grad_norm": 10.097089767456055, + "learning_rate": 1.5545696539485357e-06, + "log_odds_chosen": 0.3447312116622925, + "log_odds_ratio": -0.5569623112678528, + "logits/chosen": -1.0964230298995972, + "logits/rejected": -1.018293857574463, + "logps/chosen": -1.2689425945281982, + "logps/rejected": -1.5412919521331787, + "loss": 2.3839, + "nll_loss": 0.5402828454971313, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1268942505121231, + "rewards/margins": 0.027234943583607674, + "rewards/rejected": -0.15412919223308563, + "step": 918 + }, + { + "epoch": 2.4232036914963744, + "grad_norm": 10.062116622924805, + "learning_rate": 1.5474711623779946e-06, + "log_odds_chosen": 0.40707215666770935, + "log_odds_ratio": -0.5170718431472778, + "logits/chosen": -1.1261249780654907, + "logits/rejected": -1.0706229209899902, + "logps/chosen": -1.3197919130325317, + "logps/rejected": -1.6365785598754883, + "loss": 2.1433, + "nll_loss": 0.4841257929801941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13197919726371765, + "rewards/margins": 0.03167865425348282, + "rewards/rejected": -0.16365784406661987, + "step": 919 + }, + { + "epoch": 2.4258404746209625, + "grad_norm": 9.968379020690918, + "learning_rate": 1.5403726708074533e-06, + "log_odds_chosen": 0.9766196012496948, + "log_odds_ratio": -0.38572365045547485, + "logits/chosen": -1.0680983066558838, + "logits/rejected": -0.9757612347602844, + "logps/chosen": -1.1962252855300903, + "logps/rejected": -1.8891607522964478, + "loss": 2.0798, + "nll_loss": 0.48137712478637695, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11962252855300903, + "rewards/margins": 0.0692935436964035, + "rewards/rejected": -0.18891608715057373, + "step": 920 + }, + { + "epoch": 2.4284772577455502, + "grad_norm": 10.270686149597168, + "learning_rate": 1.5332741792369122e-06, + "log_odds_chosen": 0.7579600214958191, + "log_odds_ratio": -0.40435367822647095, + "logits/chosen": -1.077327013015747, + "logits/rejected": -0.954658567905426, + "logps/chosen": -1.3990119695663452, + "logps/rejected": -2.0181727409362793, + "loss": 2.1346, + "nll_loss": 0.4932202696800232, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13990119099617004, + "rewards/margins": 0.06191607564687729, + "rewards/rejected": -0.20181725919246674, + "step": 921 + }, + { + "epoch": 2.4311140408701384, + "grad_norm": 10.072905540466309, + "learning_rate": 1.5261756876663707e-06, + "log_odds_chosen": 0.539434015750885, + "log_odds_ratio": -0.48120465874671936, + "logits/chosen": -1.085160255432129, + "logits/rejected": -0.9633433222770691, + "logps/chosen": -1.4367750883102417, + "logps/rejected": -1.837584137916565, + "loss": 2.2237, + "nll_loss": 0.5078064799308777, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1436775028705597, + "rewards/margins": 0.040080904960632324, + "rewards/rejected": -0.1837584227323532, + "step": 922 + }, + { + "epoch": 2.4337508239947265, + "grad_norm": 10.620375633239746, + "learning_rate": 1.5190771960958296e-06, + "log_odds_chosen": 0.19084365665912628, + "log_odds_ratio": -0.6263587474822998, + "logits/chosen": -1.134689211845398, + "logits/rejected": -1.024929165840149, + "logps/chosen": -1.4626632928848267, + "logps/rejected": -1.6243414878845215, + "loss": 2.854, + "nll_loss": 0.6508681774139404, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14626634120941162, + "rewards/margins": 0.016167812049388885, + "rewards/rejected": -0.1624341458082199, + "step": 923 + }, + { + "epoch": 2.4363876071193147, + "grad_norm": 9.383076667785645, + "learning_rate": 1.5119787045252883e-06, + "log_odds_chosen": 0.7854894399642944, + "log_odds_ratio": -0.42655807733535767, + "logits/chosen": -1.0859063863754272, + "logits/rejected": -1.0121045112609863, + "logps/chosen": -1.3842235803604126, + "logps/rejected": -2.0174098014831543, + "loss": 2.3423, + "nll_loss": 0.5429260730743408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13842236995697021, + "rewards/margins": 0.06331861764192581, + "rewards/rejected": -0.20174098014831543, + "step": 924 + }, + { + "epoch": 2.4390243902439024, + "grad_norm": 9.584192276000977, + "learning_rate": 1.5048802129547472e-06, + "log_odds_chosen": 0.8783799409866333, + "log_odds_ratio": -0.37397417426109314, + "logits/chosen": -1.03621506690979, + "logits/rejected": -0.9416929483413696, + "logps/chosen": -1.3316413164138794, + "logps/rejected": -2.048794746398926, + "loss": 1.7439, + "nll_loss": 0.3985658884048462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13316413760185242, + "rewards/margins": 0.0717153549194336, + "rewards/rejected": -0.204879492521286, + "step": 925 + }, + { + "epoch": 2.4416611733684905, + "grad_norm": 10.248364448547363, + "learning_rate": 1.4977817213842058e-06, + "log_odds_chosen": 0.604690670967102, + "log_odds_ratio": -0.451438844203949, + "logits/chosen": -1.1782548427581787, + "logits/rejected": -1.0685997009277344, + "logps/chosen": -1.2413527965545654, + "logps/rejected": -1.7049214839935303, + "loss": 2.5042, + "nll_loss": 0.5809139013290405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12413527816534042, + "rewards/margins": 0.04635685682296753, + "rewards/rejected": -0.17049214243888855, + "step": 926 + }, + { + "epoch": 2.4442979564930782, + "grad_norm": 9.285633087158203, + "learning_rate": 1.4906832298136645e-06, + "log_odds_chosen": 0.8101198673248291, + "log_odds_ratio": -0.37296995520591736, + "logits/chosen": -1.0476787090301514, + "logits/rejected": -0.9622210264205933, + "logps/chosen": -1.233733892440796, + "logps/rejected": -1.853700876235962, + "loss": 1.8566, + "nll_loss": 0.4268467426300049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12337338924407959, + "rewards/margins": 0.061996713280677795, + "rewards/rejected": -0.1853700876235962, + "step": 927 + }, + { + "epoch": 2.4469347396176664, + "grad_norm": 8.915876388549805, + "learning_rate": 1.4835847382431232e-06, + "log_odds_chosen": 0.36250048875808716, + "log_odds_ratio": -0.5382492542266846, + "logits/chosen": -1.1555805206298828, + "logits/rejected": -1.116523027420044, + "logps/chosen": -1.1413676738739014, + "logps/rejected": -1.4002259969711304, + "loss": 2.0615, + "nll_loss": 0.4615498185157776, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11413675546646118, + "rewards/margins": 0.025885840877890587, + "rewards/rejected": -0.14002260565757751, + "step": 928 + }, + { + "epoch": 2.4495715227422545, + "grad_norm": 10.278735160827637, + "learning_rate": 1.4764862466725821e-06, + "log_odds_chosen": 0.38574719429016113, + "log_odds_ratio": -0.5276346802711487, + "logits/chosen": -1.1740241050720215, + "logits/rejected": -1.1242327690124512, + "logps/chosen": -1.2782037258148193, + "logps/rejected": -1.5531538724899292, + "loss": 2.7706, + "nll_loss": 0.639897882938385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12782037258148193, + "rewards/margins": 0.027495015412569046, + "rewards/rejected": -0.15531539916992188, + "step": 929 + }, + { + "epoch": 2.4522083058668427, + "grad_norm": 9.918184280395508, + "learning_rate": 1.4693877551020408e-06, + "log_odds_chosen": 0.4487338066101074, + "log_odds_ratio": -0.5068457126617432, + "logits/chosen": -1.1033174991607666, + "logits/rejected": -1.0059428215026855, + "logps/chosen": -1.403915524482727, + "logps/rejected": -1.7551565170288086, + "loss": 2.3905, + "nll_loss": 0.5469300150871277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14039155840873718, + "rewards/margins": 0.035124097019433975, + "rewards/rejected": -0.17551565170288086, + "step": 930 + }, + { + "epoch": 2.4548450889914304, + "grad_norm": 8.942968368530273, + "learning_rate": 1.4622892635314995e-06, + "log_odds_chosen": 0.7661544680595398, + "log_odds_ratio": -0.4091379940509796, + "logits/chosen": -1.0771650075912476, + "logits/rejected": -0.9967333078384399, + "logps/chosen": -1.1602619886398315, + "logps/rejected": -1.7546803951263428, + "loss": 1.883, + "nll_loss": 0.42984068393707275, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11602620780467987, + "rewards/margins": 0.05944184586405754, + "rewards/rejected": -0.1754680573940277, + "step": 931 + }, + { + "epoch": 2.4574818721160185, + "grad_norm": 10.314979553222656, + "learning_rate": 1.4551907719609582e-06, + "log_odds_chosen": 0.9014593362808228, + "log_odds_ratio": -0.3533499538898468, + "logits/chosen": -1.0980167388916016, + "logits/rejected": -0.9716736078262329, + "logps/chosen": -1.2577919960021973, + "logps/rejected": -1.9532355070114136, + "loss": 2.1167, + "nll_loss": 0.4938514828681946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12577921152114868, + "rewards/margins": 0.06954433768987656, + "rewards/rejected": -0.19532354176044464, + "step": 932 + }, + { + "epoch": 2.4601186552406062, + "grad_norm": 10.245210647583008, + "learning_rate": 1.448092280390417e-06, + "log_odds_chosen": 0.7105802893638611, + "log_odds_ratio": -0.4090927541255951, + "logits/chosen": -1.1053932905197144, + "logits/rejected": -0.9996531009674072, + "logps/chosen": -1.353420376777649, + "logps/rejected": -1.9266541004180908, + "loss": 2.277, + "nll_loss": 0.5283440351486206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1353420466184616, + "rewards/margins": 0.05732336640357971, + "rewards/rejected": -0.19266541302204132, + "step": 933 + }, + { + "epoch": 2.4627554383651944, + "grad_norm": 10.484724998474121, + "learning_rate": 1.4409937888198758e-06, + "log_odds_chosen": 0.6949914693832397, + "log_odds_ratio": -0.41040536761283875, + "logits/chosen": -1.1644728183746338, + "logits/rejected": -1.0214042663574219, + "logps/chosen": -1.4014010429382324, + "logps/rejected": -1.9624037742614746, + "loss": 2.5328, + "nll_loss": 0.5921629667282104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1401400864124298, + "rewards/margins": 0.0561002753674984, + "rewards/rejected": -0.1962403804063797, + "step": 934 + }, + { + "epoch": 2.4653922214897825, + "grad_norm": 9.716775894165039, + "learning_rate": 1.4338952972493345e-06, + "log_odds_chosen": 0.860659658908844, + "log_odds_ratio": -0.37499281764030457, + "logits/chosen": -1.0980603694915771, + "logits/rejected": -1.0053725242614746, + "logps/chosen": -1.1715384721755981, + "logps/rejected": -1.8061293363571167, + "loss": 1.8292, + "nll_loss": 0.41981303691864014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11715385317802429, + "rewards/margins": 0.06345908343791962, + "rewards/rejected": -0.18061292171478271, + "step": 935 + }, + { + "epoch": 2.4680290046143707, + "grad_norm": 9.445055961608887, + "learning_rate": 1.4267968056787931e-06, + "log_odds_chosen": 0.34475648403167725, + "log_odds_ratio": -0.5601778626441956, + "logits/chosen": -1.0520339012145996, + "logits/rejected": -0.9856969118118286, + "logps/chosen": -1.2843670845031738, + "logps/rejected": -1.5656042098999023, + "loss": 2.1506, + "nll_loss": 0.4816225469112396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12843671441078186, + "rewards/margins": 0.028123700991272926, + "rewards/rejected": -0.15656042098999023, + "step": 936 + }, + { + "epoch": 2.4706657877389584, + "grad_norm": 10.8469877243042, + "learning_rate": 1.4196983141082518e-06, + "log_odds_chosen": 0.5317217707633972, + "log_odds_ratio": -0.4845069348812103, + "logits/chosen": -1.1012637615203857, + "logits/rejected": -1.030122995376587, + "logps/chosen": -1.5236291885375977, + "logps/rejected": -1.9571136236190796, + "loss": 2.2323, + "nll_loss": 0.5096133351325989, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1523628979921341, + "rewards/margins": 0.04334845766425133, + "rewards/rejected": -0.19571135938167572, + "step": 937 + }, + { + "epoch": 2.4733025708635465, + "grad_norm": 9.446576118469238, + "learning_rate": 1.4125998225377107e-06, + "log_odds_chosen": 0.6366671323776245, + "log_odds_ratio": -0.4362477660179138, + "logits/chosen": -1.0753679275512695, + "logits/rejected": -0.9698559045791626, + "logps/chosen": -1.1360713243484497, + "logps/rejected": -1.6101000308990479, + "loss": 1.7274, + "nll_loss": 0.38822537660598755, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11360714584589005, + "rewards/margins": 0.04740285128355026, + "rewards/rejected": -0.1610099971294403, + "step": 938 + }, + { + "epoch": 2.4759393539881343, + "grad_norm": 10.09611988067627, + "learning_rate": 1.4055013309671694e-06, + "log_odds_chosen": 0.6295111179351807, + "log_odds_ratio": -0.4323040843009949, + "logits/chosen": -1.1586459875106812, + "logits/rejected": -1.04908287525177, + "logps/chosen": -1.2559092044830322, + "logps/rejected": -1.7403661012649536, + "loss": 2.2095, + "nll_loss": 0.5091511607170105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12559092044830322, + "rewards/margins": 0.0484456792473793, + "rewards/rejected": -0.17403662204742432, + "step": 939 + }, + { + "epoch": 2.4785761371127224, + "grad_norm": 10.044174194335938, + "learning_rate": 1.398402839396628e-06, + "log_odds_chosen": 0.44056281447410583, + "log_odds_ratio": -0.5164551734924316, + "logits/chosen": -1.1366794109344482, + "logits/rejected": -0.9838275909423828, + "logps/chosen": -1.3997766971588135, + "logps/rejected": -1.7464218139648438, + "loss": 2.4201, + "nll_loss": 0.5533714294433594, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13997766375541687, + "rewards/margins": 0.03466450050473213, + "rewards/rejected": -0.1746421754360199, + "step": 940 + }, + { + "epoch": 2.4812129202373105, + "grad_norm": 10.043971061706543, + "learning_rate": 1.3913043478260868e-06, + "log_odds_chosen": 0.36631205677986145, + "log_odds_ratio": -0.5362348556518555, + "logits/chosen": -1.0773563385009766, + "logits/rejected": -1.0182524919509888, + "logps/chosen": -1.2820074558258057, + "logps/rejected": -1.5590157508850098, + "loss": 2.5162, + "nll_loss": 0.5754203200340271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12820076942443848, + "rewards/margins": 0.027700817212462425, + "rewards/rejected": -0.15590158104896545, + "step": 941 + }, + { + "epoch": 2.4838497033618987, + "grad_norm": 10.077068328857422, + "learning_rate": 1.3842058562555457e-06, + "log_odds_chosen": 0.6632999777793884, + "log_odds_ratio": -0.4329252243041992, + "logits/chosen": -1.0947636365890503, + "logits/rejected": -1.0356365442276, + "logps/chosen": -1.3468356132507324, + "logps/rejected": -1.8621776103973389, + "loss": 2.2513, + "nll_loss": 0.5195324420928955, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13468356430530548, + "rewards/margins": 0.05153419449925423, + "rewards/rejected": -0.1862177550792694, + "step": 942 + }, + { + "epoch": 2.4864864864864864, + "grad_norm": 9.562148094177246, + "learning_rate": 1.3771073646850044e-06, + "log_odds_chosen": 1.0629949569702148, + "log_odds_ratio": -0.38897430896759033, + "logits/chosen": -1.144912600517273, + "logits/rejected": -1.0449944734573364, + "logps/chosen": -1.256197452545166, + "logps/rejected": -2.1591362953186035, + "loss": 2.062, + "nll_loss": 0.4766029119491577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12561975419521332, + "rewards/margins": 0.09029386937618256, + "rewards/rejected": -0.21591362357139587, + "step": 943 + }, + { + "epoch": 2.4891232696110746, + "grad_norm": 9.22219467163086, + "learning_rate": 1.370008873114463e-06, + "log_odds_chosen": 0.456211119890213, + "log_odds_ratio": -0.5188755989074707, + "logits/chosen": -1.0710067749023438, + "logits/rejected": -1.0109732151031494, + "logps/chosen": -1.1019004583358765, + "logps/rejected": -1.4601200819015503, + "loss": 2.0044, + "nll_loss": 0.44920358061790466, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11019004881381989, + "rewards/margins": 0.03582196310162544, + "rewards/rejected": -0.14601200819015503, + "step": 944 + }, + { + "epoch": 2.4917600527356623, + "grad_norm": 10.367738723754883, + "learning_rate": 1.3629103815439218e-06, + "log_odds_chosen": 0.5476827025413513, + "log_odds_ratio": -0.47168028354644775, + "logits/chosen": -1.2086514234542847, + "logits/rejected": -1.066209316253662, + "logps/chosen": -1.3265548944473267, + "logps/rejected": -1.7548052072525024, + "loss": 2.438, + "nll_loss": 0.5623201131820679, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13265548646450043, + "rewards/margins": 0.042825035750865936, + "rewards/rejected": -0.17548052966594696, + "step": 945 + }, + { + "epoch": 2.4943968358602504, + "grad_norm": 10.300585746765137, + "learning_rate": 1.3558118899733807e-06, + "log_odds_chosen": 0.49440160393714905, + "log_odds_ratio": -0.4904671013355255, + "logits/chosen": -1.1634931564331055, + "logits/rejected": -1.1151797771453857, + "logps/chosen": -1.3160359859466553, + "logps/rejected": -1.7080636024475098, + "loss": 2.5215, + "nll_loss": 0.5813170671463013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13160359859466553, + "rewards/margins": 0.03920276463031769, + "rewards/rejected": -0.1708063781261444, + "step": 946 + }, + { + "epoch": 2.4970336189848386, + "grad_norm": 9.261826515197754, + "learning_rate": 1.3487133984028393e-06, + "log_odds_chosen": 0.48070111870765686, + "log_odds_ratio": -0.506763219833374, + "logits/chosen": -1.059067964553833, + "logits/rejected": -0.9914778470993042, + "logps/chosen": -1.228057861328125, + "logps/rejected": -1.629408836364746, + "loss": 1.8701, + "nll_loss": 0.4168543219566345, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12280578911304474, + "rewards/margins": 0.040135085582733154, + "rewards/rejected": -0.1629408746957779, + "step": 947 + }, + { + "epoch": 2.4996704021094267, + "grad_norm": 9.628554344177246, + "learning_rate": 1.3416149068322982e-06, + "log_odds_chosen": 0.6004096269607544, + "log_odds_ratio": -0.4586912989616394, + "logits/chosen": -1.0332534313201904, + "logits/rejected": -0.9923993945121765, + "logps/chosen": -1.2210625410079956, + "logps/rejected": -1.6863110065460205, + "loss": 1.7498, + "nll_loss": 0.3915759325027466, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12210625410079956, + "rewards/margins": 0.04652484133839607, + "rewards/rejected": -0.16863110661506653, + "step": 948 + }, + { + "epoch": 2.5023071852340144, + "grad_norm": 9.986357688903809, + "learning_rate": 1.3345164152617567e-06, + "log_odds_chosen": 0.8193560838699341, + "log_odds_ratio": -0.4735927879810333, + "logits/chosen": -1.0893129110336304, + "logits/rejected": -1.0309288501739502, + "logps/chosen": -1.295593500137329, + "logps/rejected": -1.9911038875579834, + "loss": 1.9891, + "nll_loss": 0.4499149024486542, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12955935299396515, + "rewards/margins": 0.06955103576183319, + "rewards/rejected": -0.19911038875579834, + "step": 949 + }, + { + "epoch": 2.5049439683586026, + "grad_norm": 9.801891326904297, + "learning_rate": 1.3274179236912156e-06, + "log_odds_chosen": 0.6543738842010498, + "log_odds_ratio": -0.438315212726593, + "logits/chosen": -1.1201646327972412, + "logits/rejected": -0.994601845741272, + "logps/chosen": -1.2346127033233643, + "logps/rejected": -1.7307636737823486, + "loss": 2.2012, + "nll_loss": 0.5064605474472046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1234612762928009, + "rewards/margins": 0.049615100026130676, + "rewards/rejected": -0.17307636141777039, + "step": 950 + }, + { + "epoch": 2.5075807514831903, + "grad_norm": 9.423513412475586, + "learning_rate": 1.3203194321206743e-06, + "log_odds_chosen": 0.7122058868408203, + "log_odds_ratio": -0.41069626808166504, + "logits/chosen": -1.0354974269866943, + "logits/rejected": -0.953643262386322, + "logps/chosen": -1.1242071390151978, + "logps/rejected": -1.6442720890045166, + "loss": 1.7784, + "nll_loss": 0.40354055166244507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1124207079410553, + "rewards/margins": 0.052006494253873825, + "rewards/rejected": -0.16442719101905823, + "step": 951 + }, + { + "epoch": 2.5102175346077784, + "grad_norm": 9.538496017456055, + "learning_rate": 1.3132209405501332e-06, + "log_odds_chosen": 0.7136922478675842, + "log_odds_ratio": -0.41526490449905396, + "logits/chosen": -1.1126666069030762, + "logits/rejected": -0.994556725025177, + "logps/chosen": -1.1598321199417114, + "logps/rejected": -1.6915475130081177, + "loss": 1.8733, + "nll_loss": 0.4268023371696472, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11598322540521622, + "rewards/margins": 0.053171537816524506, + "rewards/rejected": -0.16915476322174072, + "step": 952 + }, + { + "epoch": 2.5128543177323666, + "grad_norm": 9.990797996520996, + "learning_rate": 1.3061224489795917e-06, + "log_odds_chosen": 0.5567875504493713, + "log_odds_ratio": -0.47149914503097534, + "logits/chosen": -1.1117167472839355, + "logits/rejected": -1.0665760040283203, + "logps/chosen": -1.40949285030365, + "logps/rejected": -1.861353874206543, + "loss": 2.4129, + "nll_loss": 0.5560762882232666, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1409492939710617, + "rewards/margins": 0.045186106115579605, + "rewards/rejected": -0.186135396361351, + "step": 953 + }, + { + "epoch": 2.5154911008569547, + "grad_norm": 10.886003494262695, + "learning_rate": 1.2990239574090504e-06, + "log_odds_chosen": 0.585273265838623, + "log_odds_ratio": -0.4497223198413849, + "logits/chosen": -1.2316553592681885, + "logits/rejected": -1.067345380783081, + "logps/chosen": -1.340728759765625, + "logps/rejected": -1.8043076992034912, + "loss": 2.6535, + "nll_loss": 0.6184085607528687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1340728998184204, + "rewards/margins": 0.0463578887283802, + "rewards/rejected": -0.18043076992034912, + "step": 954 + }, + { + "epoch": 2.5181278839815424, + "grad_norm": 11.22103214263916, + "learning_rate": 1.2919254658385093e-06, + "log_odds_chosen": 0.46139469742774963, + "log_odds_ratio": -0.5149021744728088, + "logits/chosen": -1.1275732517242432, + "logits/rejected": -1.0286986827850342, + "logps/chosen": -1.507557988166809, + "logps/rejected": -1.8830695152282715, + "loss": 2.4608, + "nll_loss": 0.5636999607086182, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15075579285621643, + "rewards/margins": 0.0375511571764946, + "rewards/rejected": -0.18830695748329163, + "step": 955 + }, + { + "epoch": 2.5207646671061306, + "grad_norm": 10.8544921875, + "learning_rate": 1.284826974267968e-06, + "log_odds_chosen": 0.5111101865768433, + "log_odds_ratio": -0.4778319001197815, + "logits/chosen": -1.109304428100586, + "logits/rejected": -0.9567216038703918, + "logps/chosen": -1.4662706851959229, + "logps/rejected": -1.87843656539917, + "loss": 2.3825, + "nll_loss": 0.5478419661521912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14662706851959229, + "rewards/margins": 0.04121658578515053, + "rewards/rejected": -0.18784365057945251, + "step": 956 + }, + { + "epoch": 2.5234014502307183, + "grad_norm": 9.740726470947266, + "learning_rate": 1.2777284826974269e-06, + "log_odds_chosen": 0.6970908045768738, + "log_odds_ratio": -0.41333848237991333, + "logits/chosen": -1.038455843925476, + "logits/rejected": -0.9585951566696167, + "logps/chosen": -1.2818219661712646, + "logps/rejected": -1.8254046440124512, + "loss": 1.9038, + "nll_loss": 0.43462392687797546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12818220257759094, + "rewards/margins": 0.05435826629400253, + "rewards/rejected": -0.18254047632217407, + "step": 957 + }, + { + "epoch": 2.5260382333553064, + "grad_norm": 10.290287971496582, + "learning_rate": 1.2706299911268853e-06, + "log_odds_chosen": 0.6766616106033325, + "log_odds_ratio": -0.42089855670928955, + "logits/chosen": -1.1274069547653198, + "logits/rejected": -0.9903121590614319, + "logps/chosen": -1.4189093112945557, + "logps/rejected": -1.977081060409546, + "loss": 2.3508, + "nll_loss": 0.5456117987632751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14189092814922333, + "rewards/margins": 0.055817171931266785, + "rewards/rejected": -0.1977081000804901, + "step": 958 + }, + { + "epoch": 2.5286750164798946, + "grad_norm": 10.350006103515625, + "learning_rate": 1.2635314995563442e-06, + "log_odds_chosen": 0.7320818305015564, + "log_odds_ratio": -0.49115613102912903, + "logits/chosen": -1.0401033163070679, + "logits/rejected": -1.0141887664794922, + "logps/chosen": -1.305769443511963, + "logps/rejected": -1.9269342422485352, + "loss": 2.0069, + "nll_loss": 0.4525972008705139, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1305769383907318, + "rewards/margins": 0.06211646646261215, + "rewards/rejected": -0.19269341230392456, + "step": 959 + }, + { + "epoch": 2.5313117996044827, + "grad_norm": 10.294855117797852, + "learning_rate": 1.256433007985803e-06, + "log_odds_chosen": 0.5948234796524048, + "log_odds_ratio": -0.46262726187705994, + "logits/chosen": -1.1262372732162476, + "logits/rejected": -1.0458968877792358, + "logps/chosen": -1.3608264923095703, + "logps/rejected": -1.8277140855789185, + "loss": 2.3149, + "nll_loss": 0.5324715971946716, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13608264923095703, + "rewards/margins": 0.046688761562108994, + "rewards/rejected": -0.18277139961719513, + "step": 960 + }, + { + "epoch": 2.5339485827290704, + "grad_norm": 9.843774795532227, + "learning_rate": 1.2493345164152618e-06, + "log_odds_chosen": 0.39834561944007874, + "log_odds_ratio": -0.5356795191764832, + "logits/chosen": -1.1416336297988892, + "logits/rejected": -1.0767290592193604, + "logps/chosen": -1.345848560333252, + "logps/rejected": -1.6646955013275146, + "loss": 2.4462, + "nll_loss": 0.5579822659492493, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13458484411239624, + "rewards/margins": 0.031884703785181046, + "rewards/rejected": -0.166469544172287, + "step": 961 + }, + { + "epoch": 2.5365853658536586, + "grad_norm": 10.40951919555664, + "learning_rate": 1.2422360248447203e-06, + "log_odds_chosen": 0.6294993162155151, + "log_odds_ratio": -0.4509700536727905, + "logits/chosen": -1.213744044303894, + "logits/rejected": -1.1220805644989014, + "logps/chosen": -1.324014663696289, + "logps/rejected": -1.8238645792007446, + "loss": 2.3754, + "nll_loss": 0.5487492084503174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1324014663696289, + "rewards/margins": 0.04998498782515526, + "rewards/rejected": -0.18238645792007446, + "step": 962 + }, + { + "epoch": 2.5392221489782463, + "grad_norm": 9.549568176269531, + "learning_rate": 1.2351375332741792e-06, + "log_odds_chosen": 0.6109352111816406, + "log_odds_ratio": -0.4740837514400482, + "logits/chosen": -1.0815215110778809, + "logits/rejected": -0.9629336595535278, + "logps/chosen": -1.2634634971618652, + "logps/rejected": -1.7190935611724854, + "loss": 1.9363, + "nll_loss": 0.4366764724254608, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12634634971618652, + "rewards/margins": 0.04556302726268768, + "rewards/rejected": -0.171909362077713, + "step": 963 + }, + { + "epoch": 2.5418589321028344, + "grad_norm": 9.77994441986084, + "learning_rate": 1.2280390417036379e-06, + "log_odds_chosen": 0.46185052394866943, + "log_odds_ratio": -0.5055705904960632, + "logits/chosen": -1.1587986946105957, + "logits/rejected": -1.0876436233520508, + "logps/chosen": -1.1706972122192383, + "logps/rejected": -1.5135177373886108, + "loss": 2.0106, + "nll_loss": 0.4520954489707947, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11706972867250443, + "rewards/margins": 0.03428204730153084, + "rewards/rejected": -0.15135176479816437, + "step": 964 + }, + { + "epoch": 2.5444957152274226, + "grad_norm": 10.625575065612793, + "learning_rate": 1.2209405501330968e-06, + "log_odds_chosen": 0.8708397150039673, + "log_odds_ratio": -0.37743672728538513, + "logits/chosen": -1.0998449325561523, + "logits/rejected": -1.0114262104034424, + "logps/chosen": -1.4298604726791382, + "logps/rejected": -2.1550512313842773, + "loss": 2.1465, + "nll_loss": 0.49889129400253296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14298602938652039, + "rewards/margins": 0.07251909375190735, + "rewards/rejected": -0.21550512313842773, + "step": 965 + }, + { + "epoch": 2.5471324983520107, + "grad_norm": 9.72364330291748, + "learning_rate": 1.2138420585625555e-06, + "log_odds_chosen": 0.5993391871452332, + "log_odds_ratio": -0.4506445825099945, + "logits/chosen": -1.15058171749115, + "logits/rejected": -1.0429490804672241, + "logps/chosen": -1.2814643383026123, + "logps/rejected": -1.7237976789474487, + "loss": 2.4773, + "nll_loss": 0.5742565393447876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1281464397907257, + "rewards/margins": 0.04423331469297409, + "rewards/rejected": -0.1723797619342804, + "step": 966 + }, + { + "epoch": 2.5497692814765984, + "grad_norm": 9.3811616897583, + "learning_rate": 1.2067435669920142e-06, + "log_odds_chosen": 0.8285520672798157, + "log_odds_ratio": -0.46860355138778687, + "logits/chosen": -1.133819818496704, + "logits/rejected": -1.083085536956787, + "logps/chosen": -1.2597310543060303, + "logps/rejected": -1.9405475854873657, + "loss": 2.148, + "nll_loss": 0.49013227224349976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12597310543060303, + "rewards/margins": 0.06808167695999146, + "rewards/rejected": -0.19405478239059448, + "step": 967 + }, + { + "epoch": 2.5524060646011866, + "grad_norm": 9.492497444152832, + "learning_rate": 1.1996450754214728e-06, + "log_odds_chosen": 0.5317522287368774, + "log_odds_ratio": -0.4742090702056885, + "logits/chosen": -1.0644326210021973, + "logits/rejected": -1.0012681484222412, + "logps/chosen": -1.2771978378295898, + "logps/rejected": -1.6803510189056396, + "loss": 1.9058, + "nll_loss": 0.42904096841812134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12771978974342346, + "rewards/margins": 0.04031532257795334, + "rewards/rejected": -0.1680351048707962, + "step": 968 + }, + { + "epoch": 2.5550428477257743, + "grad_norm": 10.43970775604248, + "learning_rate": 1.1925465838509317e-06, + "log_odds_chosen": 0.3138500154018402, + "log_odds_ratio": -0.5608047246932983, + "logits/chosen": -1.1299519538879395, + "logits/rejected": -1.0770107507705688, + "logps/chosen": -1.4027135372161865, + "logps/rejected": -1.6526198387145996, + "loss": 2.7312, + "nll_loss": 0.6267147660255432, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1402713656425476, + "rewards/margins": 0.024990636855363846, + "rewards/rejected": -0.16526201367378235, + "step": 969 + }, + { + "epoch": 2.5576796308503624, + "grad_norm": 10.022076606750488, + "learning_rate": 1.1854480922803904e-06, + "log_odds_chosen": 0.7182021737098694, + "log_odds_ratio": -0.4174467921257019, + "logits/chosen": -1.108182668685913, + "logits/rejected": -1.0129443407058716, + "logps/chosen": -1.3148906230926514, + "logps/rejected": -1.883524775505066, + "loss": 2.5325, + "nll_loss": 0.591376781463623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13148906826972961, + "rewards/margins": 0.05686340853571892, + "rewards/rejected": -0.18835246562957764, + "step": 970 + }, + { + "epoch": 2.5603164139749506, + "grad_norm": 9.961695671081543, + "learning_rate": 1.178349600709849e-06, + "log_odds_chosen": 0.7222421765327454, + "log_odds_ratio": -0.46626126766204834, + "logits/chosen": -1.1342153549194336, + "logits/rejected": -1.0204015970230103, + "logps/chosen": -1.2801722288131714, + "logps/rejected": -1.8723406791687012, + "loss": 2.3123, + "nll_loss": 0.5314509272575378, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12801721692085266, + "rewards/margins": 0.05921683460474014, + "rewards/rejected": -0.1872340738773346, + "step": 971 + }, + { + "epoch": 2.5629531970995387, + "grad_norm": 10.804487228393555, + "learning_rate": 1.1712511091393078e-06, + "log_odds_chosen": 0.3321448564529419, + "log_odds_ratio": -0.5600012540817261, + "logits/chosen": -1.1126806735992432, + "logits/rejected": -1.0470176935195923, + "logps/chosen": -1.4884405136108398, + "logps/rejected": -1.739980697631836, + "loss": 2.9032, + "nll_loss": 0.6698001623153687, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14884406328201294, + "rewards/margins": 0.02515401318669319, + "rewards/rejected": -0.17399807274341583, + "step": 972 + }, + { + "epoch": 2.5655899802241264, + "grad_norm": 10.045714378356934, + "learning_rate": 1.1641526175687665e-06, + "log_odds_chosen": 0.5490153431892395, + "log_odds_ratio": -0.46695026755332947, + "logits/chosen": -1.1155519485473633, + "logits/rejected": -1.0676300525665283, + "logps/chosen": -1.1715837717056274, + "logps/rejected": -1.5821819305419922, + "loss": 2.3139, + "nll_loss": 0.5317729115486145, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11715838313102722, + "rewards/margins": 0.04105980694293976, + "rewards/rejected": -0.15821819007396698, + "step": 973 + }, + { + "epoch": 2.5682267633487146, + "grad_norm": 10.197787284851074, + "learning_rate": 1.1570541259982254e-06, + "log_odds_chosen": 0.8162627220153809, + "log_odds_ratio": -0.40598583221435547, + "logits/chosen": -1.1592998504638672, + "logits/rejected": -1.0291635990142822, + "logps/chosen": -1.3154046535491943, + "logps/rejected": -1.9530718326568604, + "loss": 2.2154, + "nll_loss": 0.5132494568824768, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1315404623746872, + "rewards/margins": 0.0637667179107666, + "rewards/rejected": -0.1953071802854538, + "step": 974 + }, + { + "epoch": 2.5708635464733027, + "grad_norm": 10.279739379882812, + "learning_rate": 1.149955634427684e-06, + "log_odds_chosen": 0.480248361825943, + "log_odds_ratio": -0.49725350737571716, + "logits/chosen": -1.1203275918960571, + "logits/rejected": -1.0338075160980225, + "logps/chosen": -1.4031107425689697, + "logps/rejected": -1.7773079872131348, + "loss": 2.8042, + "nll_loss": 0.6513198614120483, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1403110772371292, + "rewards/margins": 0.03741970658302307, + "rewards/rejected": -0.17773079872131348, + "step": 975 + }, + { + "epoch": 2.5735003295978904, + "grad_norm": 9.14466667175293, + "learning_rate": 1.1428571428571428e-06, + "log_odds_chosen": 0.6685113310813904, + "log_odds_ratio": -0.4547085165977478, + "logits/chosen": -1.0525332689285278, + "logits/rejected": -0.9893536567687988, + "logps/chosen": -1.080911636352539, + "logps/rejected": -1.4747405052185059, + "loss": 1.7507, + "nll_loss": 0.3922029733657837, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10809117555618286, + "rewards/margins": 0.03938288241624832, + "rewards/rejected": -0.14747406542301178, + "step": 976 + }, + { + "epoch": 2.5761371127224786, + "grad_norm": 10.132596969604492, + "learning_rate": 1.1357586512866015e-06, + "log_odds_chosen": 0.6782791018486023, + "log_odds_ratio": -0.4253457188606262, + "logits/chosen": -1.107090950012207, + "logits/rejected": -1.0170270204544067, + "logps/chosen": -1.1491903066635132, + "logps/rejected": -1.6603305339813232, + "loss": 1.968, + "nll_loss": 0.4494568109512329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1149190291762352, + "rewards/margins": 0.051114022731781006, + "rewards/rejected": -0.1660330444574356, + "step": 977 + }, + { + "epoch": 2.5787738958470667, + "grad_norm": 10.886836051940918, + "learning_rate": 1.1286601597160604e-06, + "log_odds_chosen": 0.6357942223548889, + "log_odds_ratio": -0.45238715410232544, + "logits/chosen": -1.1401660442352295, + "logits/rejected": -1.0261318683624268, + "logps/chosen": -1.4753997325897217, + "logps/rejected": -2.0056228637695312, + "loss": 2.4576, + "nll_loss": 0.5691531896591187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14753997325897217, + "rewards/margins": 0.05302230268716812, + "rewards/rejected": -0.20056229829788208, + "step": 978 + }, + { + "epoch": 2.5814106789716544, + "grad_norm": 9.240728378295898, + "learning_rate": 1.121561668145519e-06, + "log_odds_chosen": 0.37126636505126953, + "log_odds_ratio": -0.5295717716217041, + "logits/chosen": -1.1164339780807495, + "logits/rejected": -1.0501753091812134, + "logps/chosen": -1.0403053760528564, + "logps/rejected": -1.2815606594085693, + "loss": 1.9984, + "nll_loss": 0.44663843512535095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.104030542075634, + "rewards/margins": 0.024125533178448677, + "rewards/rejected": -0.12815608084201813, + "step": 979 + }, + { + "epoch": 2.5840474620962426, + "grad_norm": 9.89041519165039, + "learning_rate": 1.1144631765749777e-06, + "log_odds_chosen": 0.46538037061691284, + "log_odds_ratio": -0.4960182309150696, + "logits/chosen": -1.1385443210601807, + "logits/rejected": -1.0360300540924072, + "logps/chosen": -1.3127050399780273, + "logps/rejected": -1.6616865396499634, + "loss": 2.3062, + "nll_loss": 0.5269367098808289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13127049803733826, + "rewards/margins": 0.034898146986961365, + "rewards/rejected": -0.16616865992546082, + "step": 980 + }, + { + "epoch": 2.5866842452208307, + "grad_norm": 10.444801330566406, + "learning_rate": 1.1073646850044364e-06, + "log_odds_chosen": 0.5480303764343262, + "log_odds_ratio": -0.4696999490261078, + "logits/chosen": -1.195300817489624, + "logits/rejected": -1.084096074104309, + "logps/chosen": -1.353794813156128, + "logps/rejected": -1.7875046730041504, + "loss": 2.5014, + "nll_loss": 0.5783883333206177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13537949323654175, + "rewards/margins": 0.043370962142944336, + "rewards/rejected": -0.17875047028064728, + "step": 981 + }, + { + "epoch": 2.5893210283454184, + "grad_norm": 10.700628280639648, + "learning_rate": 1.1002661934338953e-06, + "log_odds_chosen": 0.28562530875205994, + "log_odds_ratio": -0.5658694505691528, + "logits/chosen": -1.1571452617645264, + "logits/rejected": -1.0048047304153442, + "logps/chosen": -1.4264452457427979, + "logps/rejected": -1.650299310684204, + "loss": 3.1581, + "nll_loss": 0.7329351902008057, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14264453947544098, + "rewards/margins": 0.02238539233803749, + "rewards/rejected": -0.16502991318702698, + "step": 982 + }, + { + "epoch": 2.5919578114700066, + "grad_norm": 9.911328315734863, + "learning_rate": 1.093167701863354e-06, + "log_odds_chosen": 0.305277943611145, + "log_odds_ratio": -0.5615626573562622, + "logits/chosen": -1.147043228149414, + "logits/rejected": -1.0166703462600708, + "logps/chosen": -1.2626628875732422, + "logps/rejected": -1.482170581817627, + "loss": 2.2149, + "nll_loss": 0.4975632131099701, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12626628577709198, + "rewards/margins": 0.021950792521238327, + "rewards/rejected": -0.1482170820236206, + "step": 983 + }, + { + "epoch": 2.5945945945945947, + "grad_norm": 10.08964729309082, + "learning_rate": 1.0860692102928127e-06, + "log_odds_chosen": 0.601814866065979, + "log_odds_ratio": -0.44974032044410706, + "logits/chosen": -1.1483750343322754, + "logits/rejected": -1.0418391227722168, + "logps/chosen": -1.345975637435913, + "logps/rejected": -1.8269295692443848, + "loss": 2.444, + "nll_loss": 0.5660194158554077, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1345975697040558, + "rewards/margins": 0.04809538275003433, + "rewards/rejected": -0.18269294500350952, + "step": 984 + }, + { + "epoch": 2.5972313777191824, + "grad_norm": 10.545869827270508, + "learning_rate": 1.0789707187222714e-06, + "log_odds_chosen": 0.6214417219161987, + "log_odds_ratio": -0.45351165533065796, + "logits/chosen": -1.169751763343811, + "logits/rejected": -1.0490249395370483, + "logps/chosen": -1.2687312364578247, + "logps/rejected": -1.7470698356628418, + "loss": 2.3, + "nll_loss": 0.5296406745910645, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12687312066555023, + "rewards/margins": 0.04783385992050171, + "rewards/rejected": -0.17470699548721313, + "step": 985 + }, + { + "epoch": 2.5998681608437706, + "grad_norm": 9.85431957244873, + "learning_rate": 1.0718722271517303e-06, + "log_odds_chosen": 0.7516704797744751, + "log_odds_ratio": -0.4033581614494324, + "logits/chosen": -1.0369821786880493, + "logits/rejected": -0.9772351384162903, + "logps/chosen": -1.2994555234909058, + "logps/rejected": -1.9012876749038696, + "loss": 2.2013, + "nll_loss": 0.5099843740463257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1299455612897873, + "rewards/margins": 0.060183219611644745, + "rewards/rejected": -0.19012877345085144, + "step": 986 + }, + { + "epoch": 2.6025049439683587, + "grad_norm": 9.786620140075684, + "learning_rate": 1.064773735581189e-06, + "log_odds_chosen": 0.8149322271347046, + "log_odds_ratio": -0.3785549998283386, + "logits/chosen": -1.0241363048553467, + "logits/rejected": -0.9378775358200073, + "logps/chosen": -1.2854642868041992, + "logps/rejected": -1.9337173700332642, + "loss": 2.0159, + "nll_loss": 0.46613043546676636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12854641675949097, + "rewards/margins": 0.06482531130313873, + "rewards/rejected": -0.1933717429637909, + "step": 987 + }, + { + "epoch": 2.6051417270929464, + "grad_norm": 8.763384819030762, + "learning_rate": 1.0576752440106479e-06, + "log_odds_chosen": 1.1993224620819092, + "log_odds_ratio": -0.2995642125606537, + "logits/chosen": -1.037737250328064, + "logits/rejected": -0.991753101348877, + "logps/chosen": -1.0106923580169678, + "logps/rejected": -1.905373454093933, + "loss": 1.4312, + "nll_loss": 0.32785069942474365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10106924176216125, + "rewards/margins": 0.0894681066274643, + "rewards/rejected": -0.19053736329078674, + "step": 988 + }, + { + "epoch": 2.6077785102175346, + "grad_norm": 9.29425048828125, + "learning_rate": 1.0505767524401064e-06, + "log_odds_chosen": 0.5740713477134705, + "log_odds_ratio": -0.4664483070373535, + "logits/chosen": -1.084150791168213, + "logits/rejected": -0.9877736568450928, + "logps/chosen": -1.2419626712799072, + "logps/rejected": -1.6699585914611816, + "loss": 1.8763, + "nll_loss": 0.4224218726158142, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12419626116752625, + "rewards/margins": 0.04279961809515953, + "rewards/rejected": -0.16699588298797607, + "step": 989 + }, + { + "epoch": 2.6104152933421227, + "grad_norm": 10.544453620910645, + "learning_rate": 1.043478260869565e-06, + "log_odds_chosen": 0.6520025730133057, + "log_odds_ratio": -0.4675554931163788, + "logits/chosen": -1.2331300973892212, + "logits/rejected": -1.0616133213043213, + "logps/chosen": -1.280714511871338, + "logps/rejected": -1.8071590662002563, + "loss": 2.3621, + "nll_loss": 0.5437787175178528, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12807147204875946, + "rewards/margins": 0.05264444649219513, + "rewards/rejected": -0.1807159185409546, + "step": 990 + }, + { + "epoch": 2.6130520764667104, + "grad_norm": 9.320026397705078, + "learning_rate": 1.036379769299024e-06, + "log_odds_chosen": 0.7140133380889893, + "log_odds_ratio": -0.41474878787994385, + "logits/chosen": -1.0829880237579346, + "logits/rejected": -1.0305976867675781, + "logps/chosen": -1.2512176036834717, + "logps/rejected": -1.7967097759246826, + "loss": 2.1951, + "nll_loss": 0.5073036551475525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12512175738811493, + "rewards/margins": 0.05454923212528229, + "rewards/rejected": -0.17967098951339722, + "step": 991 + }, + { + "epoch": 2.6156888595912986, + "grad_norm": 9.70067024230957, + "learning_rate": 1.0292812777284826e-06, + "log_odds_chosen": 0.7287517786026001, + "log_odds_ratio": -0.40351107716560364, + "logits/chosen": -1.157404899597168, + "logits/rejected": -1.0376935005187988, + "logps/chosen": -1.2435426712036133, + "logps/rejected": -1.8053914308547974, + "loss": 2.1765, + "nll_loss": 0.503783106803894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1243542730808258, + "rewards/margins": 0.05618486925959587, + "rewards/rejected": -0.18053914606571198, + "step": 992 + }, + { + "epoch": 2.6183256427158867, + "grad_norm": 9.9812593460083, + "learning_rate": 1.0221827861579413e-06, + "log_odds_chosen": 0.5076786875724792, + "log_odds_ratio": -0.4871475100517273, + "logits/chosen": -1.1644964218139648, + "logits/rejected": -1.074755311012268, + "logps/chosen": -1.2209006547927856, + "logps/rejected": -1.6116774082183838, + "loss": 2.2044, + "nll_loss": 0.502373993396759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12209007143974304, + "rewards/margins": 0.03907766565680504, + "rewards/rejected": -0.16116774082183838, + "step": 993 + }, + { + "epoch": 2.6209624258404745, + "grad_norm": 10.192991256713867, + "learning_rate": 1.0150842945874e-06, + "log_odds_chosen": 0.8125649690628052, + "log_odds_ratio": -0.41364622116088867, + "logits/chosen": -1.1938300132751465, + "logits/rejected": -1.0928840637207031, + "logps/chosen": -1.1739590167999268, + "logps/rejected": -1.7607444524765015, + "loss": 2.5602, + "nll_loss": 0.5986862182617188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11739590018987656, + "rewards/margins": 0.05867853760719299, + "rewards/rejected": -0.17607444524765015, + "step": 994 + }, + { + "epoch": 2.6235992089650626, + "grad_norm": 9.447900772094727, + "learning_rate": 1.007985803016859e-06, + "log_odds_chosen": 0.7173506617546082, + "log_odds_ratio": -0.4152427315711975, + "logits/chosen": -1.109144926071167, + "logits/rejected": -1.0027085542678833, + "logps/chosen": -1.1777777671813965, + "logps/rejected": -1.7166709899902344, + "loss": 1.954, + "nll_loss": 0.4469866454601288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11777777969837189, + "rewards/margins": 0.05388931185007095, + "rewards/rejected": -0.17166709899902344, + "step": 995 + }, + { + "epoch": 2.6262359920896508, + "grad_norm": 10.02701473236084, + "learning_rate": 1.0008873114463176e-06, + "log_odds_chosen": 0.5110893249511719, + "log_odds_ratio": -0.4991420805454254, + "logits/chosen": -1.1886515617370605, + "logits/rejected": -1.072214126586914, + "logps/chosen": -1.2944159507751465, + "logps/rejected": -1.6956514120101929, + "loss": 2.3982, + "nll_loss": 0.5496328473091125, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12944158911705017, + "rewards/margins": 0.040123552083969116, + "rewards/rejected": -0.1695651412010193, + "step": 996 + }, + { + "epoch": 2.628872775214239, + "grad_norm": 10.027898788452148, + "learning_rate": 9.937888198757763e-07, + "log_odds_chosen": 0.6266584992408752, + "log_odds_ratio": -0.4440193176269531, + "logits/chosen": -1.1369056701660156, + "logits/rejected": -1.0225248336791992, + "logps/chosen": -1.290299415588379, + "logps/rejected": -1.7772315740585327, + "loss": 2.1245, + "nll_loss": 0.48673126101493835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12902995944023132, + "rewards/margins": 0.048693202435970306, + "rewards/rejected": -0.17772315442562103, + "step": 997 + }, + { + "epoch": 2.6315095583388266, + "grad_norm": 9.200778007507324, + "learning_rate": 9.866903283052352e-07, + "log_odds_chosen": 0.43737131357192993, + "log_odds_ratio": -0.5192932486534119, + "logits/chosen": -1.0621488094329834, + "logits/rejected": -0.9878532886505127, + "logps/chosen": -1.3959318399429321, + "logps/rejected": -1.7320349216461182, + "loss": 2.0259, + "nll_loss": 0.4545466899871826, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1395931839942932, + "rewards/margins": 0.033610306680202484, + "rewards/rejected": -0.1732034981250763, + "step": 998 + }, + { + "epoch": 2.6341463414634148, + "grad_norm": 9.254485130310059, + "learning_rate": 9.795918367346939e-07, + "log_odds_chosen": 0.9543919563293457, + "log_odds_ratio": -0.3626548647880554, + "logits/chosen": -1.090482473373413, + "logits/rejected": -1.0068384408950806, + "logps/chosen": -1.0385923385620117, + "logps/rejected": -1.72948157787323, + "loss": 1.6749, + "nll_loss": 0.3824600577354431, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10385924577713013, + "rewards/margins": 0.06908892095088959, + "rewards/rejected": -0.17294815182685852, + "step": 999 + }, + { + "epoch": 2.6367831245880025, + "grad_norm": 10.900224685668945, + "learning_rate": 9.724933451641526e-07, + "log_odds_chosen": 0.4607086181640625, + "log_odds_ratio": -0.5145030617713928, + "logits/chosen": -1.1873807907104492, + "logits/rejected": -1.0615363121032715, + "logps/chosen": -1.4615896940231323, + "logps/rejected": -1.8402800559997559, + "loss": 2.5449, + "nll_loss": 0.5847799777984619, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14615896344184875, + "rewards/margins": 0.03786905109882355, + "rewards/rejected": -0.1840280294418335, + "step": 1000 + }, + { + "epoch": 2.6394199077125906, + "grad_norm": 10.294342994689941, + "learning_rate": 9.653948535936112e-07, + "log_odds_chosen": 0.5043836832046509, + "log_odds_ratio": -0.48049962520599365, + "logits/chosen": -1.198920726776123, + "logits/rejected": -1.0782759189605713, + "logps/chosen": -1.3148038387298584, + "logps/rejected": -1.7069685459136963, + "loss": 2.8996, + "nll_loss": 0.6768454313278198, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1314803808927536, + "rewards/margins": 0.03921646997332573, + "rewards/rejected": -0.17069685459136963, + "step": 1001 + }, + { + "epoch": 2.6420566908371788, + "grad_norm": 9.512928009033203, + "learning_rate": 9.582963620230701e-07, + "log_odds_chosen": 0.484348326921463, + "log_odds_ratio": -0.4980863034725189, + "logits/chosen": -1.150258183479309, + "logits/rejected": -1.1008179187774658, + "logps/chosen": -1.1577883958816528, + "logps/rejected": -1.5190094709396362, + "loss": 1.9338, + "nll_loss": 0.43364277482032776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.115778848528862, + "rewards/margins": 0.03612210601568222, + "rewards/rejected": -0.15190094709396362, + "step": 1002 + }, + { + "epoch": 2.644693473961767, + "grad_norm": 9.940478324890137, + "learning_rate": 9.511978704525287e-07, + "log_odds_chosen": 0.40882444381713867, + "log_odds_ratio": -0.5205907821655273, + "logits/chosen": -1.0754611492156982, + "logits/rejected": -0.9930179119110107, + "logps/chosen": -1.2754976749420166, + "logps/rejected": -1.587766408920288, + "loss": 2.0367, + "nll_loss": 0.45710575580596924, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12754976749420166, + "rewards/margins": 0.03122686967253685, + "rewards/rejected": -0.1587766408920288, + "step": 1003 + }, + { + "epoch": 2.6473302570863546, + "grad_norm": 8.932833671569824, + "learning_rate": 9.440993788819875e-07, + "log_odds_chosen": 0.5635305047035217, + "log_odds_ratio": -0.4637274146080017, + "logits/chosen": -1.158718228340149, + "logits/rejected": -1.0395697355270386, + "logps/chosen": -1.109891653060913, + "logps/rejected": -1.51375150680542, + "loss": 1.881, + "nll_loss": 0.42386823892593384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11098916828632355, + "rewards/margins": 0.04038599506020546, + "rewards/rejected": -0.1513751745223999, + "step": 1004 + }, + { + "epoch": 2.6499670402109428, + "grad_norm": 10.461935043334961, + "learning_rate": 9.370008873114462e-07, + "log_odds_chosen": 0.6628550291061401, + "log_odds_ratio": -0.4403079152107239, + "logits/chosen": -1.0836372375488281, + "logits/rejected": -1.057179570198059, + "logps/chosen": -1.1715991497039795, + "logps/rejected": -1.6733651161193848, + "loss": 1.9231, + "nll_loss": 0.43674010038375854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11715991795063019, + "rewards/margins": 0.05017659068107605, + "rewards/rejected": -0.16733650863170624, + "step": 1005 + }, + { + "epoch": 2.6526038233355305, + "grad_norm": 10.285652160644531, + "learning_rate": 9.29902395740905e-07, + "log_odds_chosen": 0.4264982342720032, + "log_odds_ratio": -0.5198737382888794, + "logits/chosen": -1.1921157836914062, + "logits/rejected": -1.128915548324585, + "logps/chosen": -1.3721623420715332, + "logps/rejected": -1.705155611038208, + "loss": 2.6975, + "nll_loss": 0.6224000453948975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1372162401676178, + "rewards/margins": 0.033299338072538376, + "rewards/rejected": -0.17051556706428528, + "step": 1006 + }, + { + "epoch": 2.6552406064601186, + "grad_norm": 10.129936218261719, + "learning_rate": 9.228039041703638e-07, + "log_odds_chosen": 0.7587910890579224, + "log_odds_ratio": -0.3909429609775543, + "logits/chosen": -1.1581931114196777, + "logits/rejected": -1.0162979364395142, + "logps/chosen": -1.335216760635376, + "logps/rejected": -1.9500994682312012, + "loss": 2.2318, + "nll_loss": 0.5188637375831604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1335216760635376, + "rewards/margins": 0.06148828566074371, + "rewards/rejected": -0.1950099766254425, + "step": 1007 + }, + { + "epoch": 2.6578773895847068, + "grad_norm": 9.60527515411377, + "learning_rate": 9.157054125998225e-07, + "log_odds_chosen": 0.9209575057029724, + "log_odds_ratio": -0.37785103917121887, + "logits/chosen": -1.0892844200134277, + "logits/rejected": -0.9736959934234619, + "logps/chosen": -1.2888574600219727, + "logps/rejected": -2.035107374191284, + "loss": 2.0299, + "nll_loss": 0.46970081329345703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12888574600219727, + "rewards/margins": 0.07462498545646667, + "rewards/rejected": -0.20351073145866394, + "step": 1008 + }, + { + "epoch": 2.660514172709295, + "grad_norm": 10.338894844055176, + "learning_rate": 9.086069210292813e-07, + "log_odds_chosen": 0.47613853216171265, + "log_odds_ratio": -0.5086878538131714, + "logits/chosen": -1.1516331434249878, + "logits/rejected": -1.1064794063568115, + "logps/chosen": -1.222299337387085, + "logps/rejected": -1.5618138313293457, + "loss": 2.4068, + "nll_loss": 0.5508235692977905, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1222299337387085, + "rewards/margins": 0.03395145758986473, + "rewards/rejected": -0.15618139505386353, + "step": 1009 + }, + { + "epoch": 2.6631509558338826, + "grad_norm": 10.516220092773438, + "learning_rate": 9.0150842945874e-07, + "log_odds_chosen": 0.6181827783584595, + "log_odds_ratio": -0.4441305994987488, + "logits/chosen": -1.1471972465515137, + "logits/rejected": -1.0572733879089355, + "logps/chosen": -1.2600420713424683, + "logps/rejected": -1.744326114654541, + "loss": 2.1607, + "nll_loss": 0.4957590699195862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12600421905517578, + "rewards/margins": 0.04842839762568474, + "rewards/rejected": -0.17443260550498962, + "step": 1010 + }, + { + "epoch": 2.6657877389584708, + "grad_norm": 10.016770362854004, + "learning_rate": 8.944099378881988e-07, + "log_odds_chosen": 0.7220852971076965, + "log_odds_ratio": -0.47244396805763245, + "logits/chosen": -1.1754839420318604, + "logits/rejected": -1.073702096939087, + "logps/chosen": -1.3390041589736938, + "logps/rejected": -1.923474907875061, + "loss": 2.4484, + "nll_loss": 0.564854621887207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13390041887760162, + "rewards/margins": 0.05844707787036896, + "rewards/rejected": -0.19234749674797058, + "step": 1011 + }, + { + "epoch": 2.6684245220830585, + "grad_norm": 10.699859619140625, + "learning_rate": 8.873114463176574e-07, + "log_odds_chosen": 0.6538971662521362, + "log_odds_ratio": -0.43241989612579346, + "logits/chosen": -1.1091864109039307, + "logits/rejected": -0.9737260937690735, + "logps/chosen": -1.2799919843673706, + "logps/rejected": -1.7287094593048096, + "loss": 2.0487, + "nll_loss": 0.46893632411956787, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1279992163181305, + "rewards/margins": 0.044871747493743896, + "rewards/rejected": -0.1728709489107132, + "step": 1012 + }, + { + "epoch": 2.6710613052076466, + "grad_norm": 10.358034133911133, + "learning_rate": 8.802129547471162e-07, + "log_odds_chosen": 0.6818956732749939, + "log_odds_ratio": -0.4728802740573883, + "logits/chosen": -1.1002342700958252, + "logits/rejected": -1.0167999267578125, + "logps/chosen": -1.2693593502044678, + "logps/rejected": -1.8089416027069092, + "loss": 2.0842, + "nll_loss": 0.4737650156021118, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1269359439611435, + "rewards/margins": 0.0539582222700119, + "rewards/rejected": -0.1808941662311554, + "step": 1013 + }, + { + "epoch": 2.6736980883322348, + "grad_norm": 8.607314109802246, + "learning_rate": 8.731144631765749e-07, + "log_odds_chosen": 0.4106805920600891, + "log_odds_ratio": -0.5259032249450684, + "logits/chosen": -0.9953519701957703, + "logits/rejected": -0.96965092420578, + "logps/chosen": -1.1974859237670898, + "logps/rejected": -1.4902080297470093, + "loss": 1.3613, + "nll_loss": 0.28773102164268494, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11974858492612839, + "rewards/margins": 0.029272207990288734, + "rewards/rejected": -0.14902080595493317, + "step": 1014 + }, + { + "epoch": 2.676334871456823, + "grad_norm": 10.295807838439941, + "learning_rate": 8.660159716060337e-07, + "log_odds_chosen": 0.6526855826377869, + "log_odds_ratio": -0.43829602003097534, + "logits/chosen": -1.1492483615875244, + "logits/rejected": -1.050119400024414, + "logps/chosen": -1.2075819969177246, + "logps/rejected": -1.6898386478424072, + "loss": 2.4668, + "nll_loss": 0.5728762745857239, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12075819820165634, + "rewards/margins": 0.04822567105293274, + "rewards/rejected": -0.16898387670516968, + "step": 1015 + }, + { + "epoch": 2.6789716545814106, + "grad_norm": 11.07802963256836, + "learning_rate": 8.589174800354925e-07, + "log_odds_chosen": 0.4253402054309845, + "log_odds_ratio": -0.510114312171936, + "logits/chosen": -1.127990484237671, + "logits/rejected": -1.0399631261825562, + "logps/chosen": -1.4407004117965698, + "logps/rejected": -1.7871440649032593, + "loss": 2.7528, + "nll_loss": 0.6371949315071106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14407005906105042, + "rewards/margins": 0.03464437276124954, + "rewards/rejected": -0.17871442437171936, + "step": 1016 + }, + { + "epoch": 2.6816084377059988, + "grad_norm": 9.816594123840332, + "learning_rate": 8.518189884649511e-07, + "log_odds_chosen": 0.8740766048431396, + "log_odds_ratio": -0.3651520609855652, + "logits/chosen": -1.0894010066986084, + "logits/rejected": -0.9762166738510132, + "logps/chosen": -1.3023083209991455, + "logps/rejected": -1.9985140562057495, + "loss": 2.0904, + "nll_loss": 0.4860801696777344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1302308291196823, + "rewards/margins": 0.06962057203054428, + "rewards/rejected": -0.199851393699646, + "step": 1017 + }, + { + "epoch": 2.6842452208305865, + "grad_norm": 10.55259895324707, + "learning_rate": 8.447204968944099e-07, + "log_odds_chosen": 0.4953387975692749, + "log_odds_ratio": -0.4922969341278076, + "logits/chosen": -1.174715280532837, + "logits/rejected": -1.0578200817108154, + "logps/chosen": -1.3714582920074463, + "logps/rejected": -1.740402102470398, + "loss": 2.6346, + "nll_loss": 0.609413743019104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13714581727981567, + "rewards/margins": 0.03689439594745636, + "rewards/rejected": -0.17404022812843323, + "step": 1018 + }, + { + "epoch": 2.6868820039551746, + "grad_norm": 9.052789688110352, + "learning_rate": 8.376220053238686e-07, + "log_odds_chosen": 0.707571268081665, + "log_odds_ratio": -0.4167298376560211, + "logits/chosen": -1.0386282205581665, + "logits/rejected": -0.9862587451934814, + "logps/chosen": -1.158585548400879, + "logps/rejected": -1.6877071857452393, + "loss": 1.6206, + "nll_loss": 0.36347225308418274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11585855484008789, + "rewards/margins": 0.0529121533036232, + "rewards/rejected": -0.16877073049545288, + "step": 1019 + }, + { + "epoch": 2.6895187870797628, + "grad_norm": 9.590802192687988, + "learning_rate": 8.305235137533274e-07, + "log_odds_chosen": 0.7320303320884705, + "log_odds_ratio": -0.4414646327495575, + "logits/chosen": -1.1201412677764893, + "logits/rejected": -1.0050376653671265, + "logps/chosen": -1.2693010568618774, + "logps/rejected": -1.8650025129318237, + "loss": 2.0464, + "nll_loss": 0.4674500524997711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1269301176071167, + "rewards/margins": 0.059570129960775375, + "rewards/rejected": -0.18650023639202118, + "step": 1020 + }, + { + "epoch": 2.692155570204351, + "grad_norm": 10.539416313171387, + "learning_rate": 8.234250221827861e-07, + "log_odds_chosen": 0.4314787983894348, + "log_odds_ratio": -0.5104358792304993, + "logits/chosen": -1.178997278213501, + "logits/rejected": -1.054152488708496, + "logps/chosen": -1.3437623977661133, + "logps/rejected": -1.662109613418579, + "loss": 2.4121, + "nll_loss": 0.5519872307777405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13437624275684357, + "rewards/margins": 0.031834714114665985, + "rewards/rejected": -0.16621094942092896, + "step": 1021 + }, + { + "epoch": 2.6947923533289386, + "grad_norm": 9.64401626586914, + "learning_rate": 8.163265306122449e-07, + "log_odds_chosen": 0.456570565700531, + "log_odds_ratio": -0.5173032879829407, + "logits/chosen": -1.123307704925537, + "logits/rejected": -1.084729790687561, + "logps/chosen": -1.2351043224334717, + "logps/rejected": -1.591335654258728, + "loss": 2.1734, + "nll_loss": 0.4916233718395233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1235104352235794, + "rewards/margins": 0.03562314808368683, + "rewards/rejected": -0.15913358330726624, + "step": 1022 + }, + { + "epoch": 2.6974291364535268, + "grad_norm": 9.88548755645752, + "learning_rate": 8.092280390417035e-07, + "log_odds_chosen": 0.6224034428596497, + "log_odds_ratio": -0.4495518207550049, + "logits/chosen": -1.125747799873352, + "logits/rejected": -1.08957839012146, + "logps/chosen": -1.132243275642395, + "logps/rejected": -1.5897014141082764, + "loss": 1.9675, + "nll_loss": 0.44693225622177124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11322431266307831, + "rewards/margins": 0.045745816081762314, + "rewards/rejected": -0.15897014737129211, + "step": 1023 + }, + { + "epoch": 2.7000659195781145, + "grad_norm": 11.096732139587402, + "learning_rate": 8.021295474711623e-07, + "log_odds_chosen": 0.5247015357017517, + "log_odds_ratio": -0.4764745533466339, + "logits/chosen": -1.1881556510925293, + "logits/rejected": -1.057640552520752, + "logps/chosen": -1.435180425643921, + "logps/rejected": -1.863472580909729, + "loss": 2.5414, + "nll_loss": 0.5876976847648621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14351804554462433, + "rewards/margins": 0.04282921925187111, + "rewards/rejected": -0.18634726107120514, + "step": 1024 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 9.868570327758789, + "learning_rate": 7.95031055900621e-07, + "log_odds_chosen": 0.49480122327804565, + "log_odds_ratio": -0.5005102157592773, + "logits/chosen": -1.0674026012420654, + "logits/rejected": -1.0222371816635132, + "logps/chosen": -1.2881180047988892, + "logps/rejected": -1.6646883487701416, + "loss": 1.908, + "nll_loss": 0.4269576668739319, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1288118064403534, + "rewards/margins": 0.037657029926776886, + "rewards/rejected": -0.16646882891654968, + "step": 1025 + }, + { + "epoch": 2.705339485827291, + "grad_norm": 10.239690780639648, + "learning_rate": 7.879325643300798e-07, + "log_odds_chosen": 0.2985216975212097, + "log_odds_ratio": -0.5645860433578491, + "logits/chosen": -1.2120201587677002, + "logits/rejected": -1.108433723449707, + "logps/chosen": -1.2694709300994873, + "logps/rejected": -1.4897054433822632, + "loss": 2.6176, + "nll_loss": 0.5979484915733337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12694710493087769, + "rewards/margins": 0.02202344685792923, + "rewards/rejected": -0.14897054433822632, + "step": 1026 + }, + { + "epoch": 2.707976268951879, + "grad_norm": 9.421809196472168, + "learning_rate": 7.808340727595386e-07, + "log_odds_chosen": 0.8708092570304871, + "log_odds_ratio": -0.4166063666343689, + "logits/chosen": -1.0700078010559082, + "logits/rejected": -1.040330171585083, + "logps/chosen": -1.1527518033981323, + "logps/rejected": -1.845388650894165, + "loss": 1.6631, + "nll_loss": 0.37410521507263184, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11527517437934875, + "rewards/margins": 0.06926368176937103, + "rewards/rejected": -0.18453887104988098, + "step": 1027 + }, + { + "epoch": 2.7106130520764666, + "grad_norm": 9.974482536315918, + "learning_rate": 7.737355811889973e-07, + "log_odds_chosen": 0.6049374341964722, + "log_odds_ratio": -0.4576120376586914, + "logits/chosen": -1.0829582214355469, + "logits/rejected": -0.9886510968208313, + "logps/chosen": -1.4116907119750977, + "logps/rejected": -1.8953813314437866, + "loss": 2.2451, + "nll_loss": 0.5155256390571594, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14116908609867096, + "rewards/margins": 0.04836905375123024, + "rewards/rejected": -0.1895381361246109, + "step": 1028 + }, + { + "epoch": 2.713249835201055, + "grad_norm": 10.552530288696289, + "learning_rate": 7.666370896184561e-07, + "log_odds_chosen": 0.3404359817504883, + "log_odds_ratio": -0.5574318170547485, + "logits/chosen": -1.1303635835647583, + "logits/rejected": -1.0355476140975952, + "logps/chosen": -1.3993536233901978, + "logps/rejected": -1.6630032062530518, + "loss": 2.4588, + "nll_loss": 0.5589630007743835, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13993537425994873, + "rewards/margins": 0.026364946737885475, + "rewards/rejected": -0.16630031168460846, + "step": 1029 + }, + { + "epoch": 2.7158866183256425, + "grad_norm": 9.24097728729248, + "learning_rate": 7.595385980479148e-07, + "log_odds_chosen": 0.5806278586387634, + "log_odds_ratio": -0.47258681058883667, + "logits/chosen": -1.053170919418335, + "logits/rejected": -1.014019250869751, + "logps/chosen": -1.3315719366073608, + "logps/rejected": -1.7810237407684326, + "loss": 1.7611, + "nll_loss": 0.39301273226737976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13315720856189728, + "rewards/margins": 0.04494518041610718, + "rewards/rejected": -0.17810237407684326, + "step": 1030 + }, + { + "epoch": 2.7185234014502306, + "grad_norm": 9.250251770019531, + "learning_rate": 7.524401064773736e-07, + "log_odds_chosen": 0.7768089771270752, + "log_odds_ratio": -0.4117186963558197, + "logits/chosen": -1.1114296913146973, + "logits/rejected": -1.0370917320251465, + "logps/chosen": -1.152336835861206, + "logps/rejected": -1.7033677101135254, + "loss": 1.8086, + "nll_loss": 0.4109684228897095, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11523368209600449, + "rewards/margins": 0.05510308966040611, + "rewards/rejected": -0.1703367829322815, + "step": 1031 + }, + { + "epoch": 2.721160184574819, + "grad_norm": 10.387153625488281, + "learning_rate": 7.453416149068323e-07, + "log_odds_chosen": 0.5035200715065002, + "log_odds_ratio": -0.4789535105228424, + "logits/chosen": -1.1551223993301392, + "logits/rejected": -1.0495972633361816, + "logps/chosen": -1.2403175830841064, + "logps/rejected": -1.6171977519989014, + "loss": 2.4095, + "nll_loss": 0.5544703006744385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12403176724910736, + "rewards/margins": 0.0376880019903183, + "rewards/rejected": -0.16171976923942566, + "step": 1032 + }, + { + "epoch": 2.723796967699407, + "grad_norm": 10.71376895904541, + "learning_rate": 7.382431233362911e-07, + "log_odds_chosen": 0.26095426082611084, + "log_odds_ratio": -0.576055645942688, + "logits/chosen": -1.1644666194915771, + "logits/rejected": -1.076171875, + "logps/chosen": -1.3639743328094482, + "logps/rejected": -1.5646965503692627, + "loss": 2.7671, + "nll_loss": 0.6341590285301208, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13639745116233826, + "rewards/margins": 0.0200722087174654, + "rewards/rejected": -0.15646964311599731, + "step": 1033 + }, + { + "epoch": 2.7264337508239946, + "grad_norm": 9.896520614624023, + "learning_rate": 7.311446317657497e-07, + "log_odds_chosen": 0.5601645708084106, + "log_odds_ratio": -0.4592554271221161, + "logits/chosen": -1.172353982925415, + "logits/rejected": -1.0247211456298828, + "logps/chosen": -1.2512595653533936, + "logps/rejected": -1.6812150478363037, + "loss": 2.3149, + "nll_loss": 0.5327982902526855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1251259595155716, + "rewards/margins": 0.042995553463697433, + "rewards/rejected": -0.16812151670455933, + "step": 1034 + }, + { + "epoch": 2.729070533948583, + "grad_norm": 10.839550971984863, + "learning_rate": 7.240461401952085e-07, + "log_odds_chosen": 0.7401673793792725, + "log_odds_ratio": -0.4034544825553894, + "logits/chosen": -1.150311827659607, + "logits/rejected": -1.0346262454986572, + "logps/chosen": -1.332601547241211, + "logps/rejected": -1.9081275463104248, + "loss": 2.5539, + "nll_loss": 0.5981303453445435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13326016068458557, + "rewards/margins": 0.05755259841680527, + "rewards/rejected": -0.19081275165081024, + "step": 1035 + }, + { + "epoch": 2.7317073170731705, + "grad_norm": 10.373785972595215, + "learning_rate": 7.169476486246672e-07, + "log_odds_chosen": 0.4543488621711731, + "log_odds_ratio": -0.5315455794334412, + "logits/chosen": -1.2191540002822876, + "logits/rejected": -1.04941987991333, + "logps/chosen": -1.4185079336166382, + "logps/rejected": -1.7377822399139404, + "loss": 2.9067, + "nll_loss": 0.6735115051269531, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1418507844209671, + "rewards/margins": 0.031927429139614105, + "rewards/rejected": -0.1737782210111618, + "step": 1036 + }, + { + "epoch": 2.7343441001977586, + "grad_norm": 10.395633697509766, + "learning_rate": 7.098491570541259e-07, + "log_odds_chosen": 0.471661239862442, + "log_odds_ratio": -0.4966796636581421, + "logits/chosen": -1.2016971111297607, + "logits/rejected": -1.094733476638794, + "logps/chosen": -1.3901854753494263, + "logps/rejected": -1.76613187789917, + "loss": 2.6959, + "nll_loss": 0.6243170499801636, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13901855051517487, + "rewards/margins": 0.03759463131427765, + "rewards/rejected": -0.17661318182945251, + "step": 1037 + }, + { + "epoch": 2.736980883322347, + "grad_norm": 10.377779006958008, + "learning_rate": 7.027506654835847e-07, + "log_odds_chosen": 0.4487267732620239, + "log_odds_ratio": -0.5085635185241699, + "logits/chosen": -1.2070711851119995, + "logits/rejected": -1.0505385398864746, + "logps/chosen": -1.2501126527786255, + "logps/rejected": -1.6021356582641602, + "loss": 2.5371, + "nll_loss": 0.5834246277809143, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12501126527786255, + "rewards/margins": 0.03520228713750839, + "rewards/rejected": -0.16021355986595154, + "step": 1038 + }, + { + "epoch": 2.739617666446935, + "grad_norm": 10.630236625671387, + "learning_rate": 6.956521739130434e-07, + "log_odds_chosen": 0.4647771716117859, + "log_odds_ratio": -0.4932706356048584, + "logits/chosen": -1.10763418674469, + "logits/rejected": -0.9972668886184692, + "logps/chosen": -1.3534834384918213, + "logps/rejected": -1.7255789041519165, + "loss": 2.231, + "nll_loss": 0.5084264874458313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1353483498096466, + "rewards/margins": 0.03720953315496445, + "rewards/rejected": -0.17255789041519165, + "step": 1039 + }, + { + "epoch": 2.7422544495715226, + "grad_norm": 10.011463165283203, + "learning_rate": 6.885536823425022e-07, + "log_odds_chosen": 0.5323688387870789, + "log_odds_ratio": -0.48797640204429626, + "logits/chosen": -1.1349525451660156, + "logits/rejected": -1.0394479036331177, + "logps/chosen": -1.2457324266433716, + "logps/rejected": -1.574986457824707, + "loss": 2.1416, + "nll_loss": 0.4866067171096802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1245732456445694, + "rewards/margins": 0.03292540833353996, + "rewards/rejected": -0.15749865770339966, + "step": 1040 + }, + { + "epoch": 2.744891232696111, + "grad_norm": 10.38745403289795, + "learning_rate": 6.814551907719609e-07, + "log_odds_chosen": 0.7381571531295776, + "log_odds_ratio": -0.424299955368042, + "logits/chosen": -1.064079999923706, + "logits/rejected": -0.9530540704727173, + "logps/chosen": -1.4342451095581055, + "logps/rejected": -2.0505714416503906, + "loss": 2.7753, + "nll_loss": 0.6513826847076416, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14342449605464935, + "rewards/margins": 0.06163264811038971, + "rewards/rejected": -0.20505714416503906, + "step": 1041 + }, + { + "epoch": 2.7475280158206985, + "grad_norm": 10.014318466186523, + "learning_rate": 6.743566992014197e-07, + "log_odds_chosen": 0.41500961780548096, + "log_odds_ratio": -0.5223526358604431, + "logits/chosen": -1.108499526977539, + "logits/rejected": -1.050550103187561, + "logps/chosen": -1.314407229423523, + "logps/rejected": -1.6437593698501587, + "loss": 2.0834, + "nll_loss": 0.46862050890922546, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13144072890281677, + "rewards/margins": 0.032935213297605515, + "rewards/rejected": -0.16437594592571259, + "step": 1042 + }, + { + "epoch": 2.7501647989452866, + "grad_norm": 9.400856018066406, + "learning_rate": 6.672582076308784e-07, + "log_odds_chosen": 0.7966787219047546, + "log_odds_ratio": -0.4011804759502411, + "logits/chosen": -1.0503668785095215, + "logits/rejected": -0.9597200751304626, + "logps/chosen": -1.1999990940093994, + "logps/rejected": -1.771064043045044, + "loss": 1.7316, + "nll_loss": 0.3927770256996155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11999991536140442, + "rewards/margins": 0.05710649862885475, + "rewards/rejected": -0.17710641026496887, + "step": 1043 + }, + { + "epoch": 2.752801582069875, + "grad_norm": 10.262003898620605, + "learning_rate": 6.601597160603372e-07, + "log_odds_chosen": 0.31851726770401, + "log_odds_ratio": -0.5514330863952637, + "logits/chosen": -1.088751196861267, + "logits/rejected": -1.0029819011688232, + "logps/chosen": -1.366959810256958, + "logps/rejected": -1.6166133880615234, + "loss": 2.2747, + "nll_loss": 0.513523519039154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1366959810256958, + "rewards/margins": 0.02496536634862423, + "rewards/rejected": -0.16166135668754578, + "step": 1044 + }, + { + "epoch": 2.755438365194463, + "grad_norm": 9.898816108703613, + "learning_rate": 6.530612244897958e-07, + "log_odds_chosen": 0.5408321022987366, + "log_odds_ratio": -0.47568681836128235, + "logits/chosen": -1.1615869998931885, + "logits/rejected": -1.0784975290298462, + "logps/chosen": -1.4018906354904175, + "logps/rejected": -1.839592695236206, + "loss": 2.6957, + "nll_loss": 0.6263567209243774, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.140189066529274, + "rewards/margins": 0.0437702052295208, + "rewards/rejected": -0.18395927548408508, + "step": 1045 + }, + { + "epoch": 2.7580751483190507, + "grad_norm": 10.726858139038086, + "learning_rate": 6.459627329192546e-07, + "log_odds_chosen": 0.4407050609588623, + "log_odds_ratio": -0.54912930727005, + "logits/chosen": -1.1333410739898682, + "logits/rejected": -1.0956073999404907, + "logps/chosen": -1.3223137855529785, + "logps/rejected": -1.6678136587142944, + "loss": 2.519, + "nll_loss": 0.5748312473297119, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13223138451576233, + "rewards/margins": 0.03454999625682831, + "rewards/rejected": -0.16678138077259064, + "step": 1046 + }, + { + "epoch": 2.760711931443639, + "grad_norm": 9.329651832580566, + "learning_rate": 6.388642413487134e-07, + "log_odds_chosen": 0.33870670199394226, + "log_odds_ratio": -0.547798752784729, + "logits/chosen": -1.0690044164657593, + "logits/rejected": -1.0301674604415894, + "logps/chosen": -1.1900187730789185, + "logps/rejected": -1.4351589679718018, + "loss": 1.7699, + "nll_loss": 0.3877028226852417, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11900188028812408, + "rewards/margins": 0.02451402135193348, + "rewards/rejected": -0.14351589977741241, + "step": 1047 + }, + { + "epoch": 2.7633487145682265, + "grad_norm": 10.1386137008667, + "learning_rate": 6.317657497781721e-07, + "log_odds_chosen": 0.764558732509613, + "log_odds_ratio": -0.404604971408844, + "logits/chosen": -1.1327184438705444, + "logits/rejected": -1.0394474267959595, + "logps/chosen": -1.3747018575668335, + "logps/rejected": -1.9805411100387573, + "loss": 2.3524, + "nll_loss": 0.5476467609405518, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13747017085552216, + "rewards/margins": 0.0605839341878891, + "rewards/rejected": -0.19805411994457245, + "step": 1048 + }, + { + "epoch": 2.7659854976928147, + "grad_norm": 9.465290069580078, + "learning_rate": 6.246672582076309e-07, + "log_odds_chosen": 0.5590122938156128, + "log_odds_ratio": -0.4640856683254242, + "logits/chosen": -1.127828598022461, + "logits/rejected": -1.0180671215057373, + "logps/chosen": -1.2410472631454468, + "logps/rejected": -1.6706857681274414, + "loss": 2.1327, + "nll_loss": 0.48676055669784546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12410472333431244, + "rewards/margins": 0.04296386241912842, + "rewards/rejected": -0.16706860065460205, + "step": 1049 + }, + { + "epoch": 2.768622280817403, + "grad_norm": 10.116673469543457, + "learning_rate": 6.175687666370896e-07, + "log_odds_chosen": 0.6692229509353638, + "log_odds_ratio": -0.41914093494415283, + "logits/chosen": -1.077138066291809, + "logits/rejected": -0.9766216278076172, + "logps/chosen": -1.297964096069336, + "logps/rejected": -1.8100271224975586, + "loss": 1.9948, + "nll_loss": 0.45677730441093445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12979641556739807, + "rewards/margins": 0.05120629817247391, + "rewards/rejected": -0.18100272119045258, + "step": 1050 + }, + { + "epoch": 2.771259063941991, + "grad_norm": 10.480387687683105, + "learning_rate": 6.104702750665484e-07, + "log_odds_chosen": 0.3245909810066223, + "log_odds_ratio": -0.5469909310340881, + "logits/chosen": -1.1878106594085693, + "logits/rejected": -1.1436951160430908, + "logps/chosen": -1.385441780090332, + "logps/rejected": -1.6416648626327515, + "loss": 2.7696, + "nll_loss": 0.6376917958259583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13854418694972992, + "rewards/margins": 0.025622311979532242, + "rewards/rejected": -0.16416651010513306, + "step": 1051 + }, + { + "epoch": 2.7738958470665787, + "grad_norm": 10.034676551818848, + "learning_rate": 6.033717834960071e-07, + "log_odds_chosen": 0.4665338397026062, + "log_odds_ratio": -0.5162329077720642, + "logits/chosen": -1.1020002365112305, + "logits/rejected": -1.0287086963653564, + "logps/chosen": -1.3640254735946655, + "logps/rejected": -1.7066876888275146, + "loss": 2.4071, + "nll_loss": 0.5501580834388733, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13640254735946655, + "rewards/margins": 0.03426623344421387, + "rewards/rejected": -0.17066878080368042, + "step": 1052 + }, + { + "epoch": 2.776532630191167, + "grad_norm": 9.526391983032227, + "learning_rate": 5.962732919254659e-07, + "log_odds_chosen": 0.6806021332740784, + "log_odds_ratio": -0.48156851530075073, + "logits/chosen": -1.1003026962280273, + "logits/rejected": -1.0384246110916138, + "logps/chosen": -1.1482423543930054, + "logps/rejected": -1.5512956380844116, + "loss": 1.8907, + "nll_loss": 0.42451637983322144, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11482425034046173, + "rewards/margins": 0.04030532017350197, + "rewards/rejected": -0.1551295667886734, + "step": 1053 + }, + { + "epoch": 2.7791694133157545, + "grad_norm": 9.275099754333496, + "learning_rate": 5.891748003549245e-07, + "log_odds_chosen": 0.8472751975059509, + "log_odds_ratio": -0.37624391913414, + "logits/chosen": -1.054128646850586, + "logits/rejected": -0.9725082516670227, + "logps/chosen": -1.2834038734436035, + "logps/rejected": -1.9531402587890625, + "loss": 1.7921, + "nll_loss": 0.41038990020751953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12834039330482483, + "rewards/margins": 0.06697362661361694, + "rewards/rejected": -0.19531401991844177, + "step": 1054 + }, + { + "epoch": 2.7818061964403427, + "grad_norm": 10.470935821533203, + "learning_rate": 5.820763087843832e-07, + "log_odds_chosen": 0.8075612783432007, + "log_odds_ratio": -0.3843352198600769, + "logits/chosen": -1.1219369173049927, + "logits/rejected": -1.0306581258773804, + "logps/chosen": -1.3321828842163086, + "logps/rejected": -1.9868505001068115, + "loss": 2.211, + "nll_loss": 0.5143091082572937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13321830332279205, + "rewards/margins": 0.06546676158905029, + "rewards/rejected": -0.19868505001068115, + "step": 1055 + }, + { + "epoch": 2.784442979564931, + "grad_norm": 9.963431358337402, + "learning_rate": 5.74977817213842e-07, + "log_odds_chosen": 0.533049464225769, + "log_odds_ratio": -0.48167720437049866, + "logits/chosen": -1.0858891010284424, + "logits/rejected": -1.0543181896209717, + "logps/chosen": -1.3295893669128418, + "logps/rejected": -1.7450724840164185, + "loss": 2.1299, + "nll_loss": 0.4843091368675232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13295894861221313, + "rewards/margins": 0.04154830053448677, + "rewards/rejected": -0.1745072603225708, + "step": 1056 + }, + { + "epoch": 2.787079762689519, + "grad_norm": 9.712507247924805, + "learning_rate": 5.678793256433007e-07, + "log_odds_chosen": 0.5920177698135376, + "log_odds_ratio": -0.4669216275215149, + "logits/chosen": -1.1910808086395264, + "logits/rejected": -1.0788345336914062, + "logps/chosen": -1.1530779600143433, + "logps/rejected": -1.581312894821167, + "loss": 2.478, + "nll_loss": 0.5728002786636353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11530779302120209, + "rewards/margins": 0.04282350093126297, + "rewards/rejected": -0.15813130140304565, + "step": 1057 + }, + { + "epoch": 2.7897165458141067, + "grad_norm": 9.908315658569336, + "learning_rate": 5.607808340727595e-07, + "log_odds_chosen": 0.884239673614502, + "log_odds_ratio": -0.3546842038631439, + "logits/chosen": -1.1526079177856445, + "logits/rejected": -1.0145677328109741, + "logps/chosen": -1.288661003112793, + "logps/rejected": -1.9862499237060547, + "loss": 2.0295, + "nll_loss": 0.47191494703292847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12886610627174377, + "rewards/margins": 0.06975888460874557, + "rewards/rejected": -0.19862499833106995, + "step": 1058 + }, + { + "epoch": 2.792353328938695, + "grad_norm": 9.855311393737793, + "learning_rate": 5.536823425022182e-07, + "log_odds_chosen": 0.7123335599899292, + "log_odds_ratio": -0.4272412061691284, + "logits/chosen": -1.1070234775543213, + "logits/rejected": -1.0540480613708496, + "logps/chosen": -1.2021454572677612, + "logps/rejected": -1.7236651182174683, + "loss": 2.0172, + "nll_loss": 0.46157926321029663, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1202145516872406, + "rewards/margins": 0.052151963114738464, + "rewards/rejected": -0.17236651480197906, + "step": 1059 + }, + { + "epoch": 2.794990112063283, + "grad_norm": 10.23788070678711, + "learning_rate": 5.46583850931677e-07, + "log_odds_chosen": 0.5830734372138977, + "log_odds_ratio": -0.45574843883514404, + "logits/chosen": -1.072530746459961, + "logits/rejected": -0.9956455826759338, + "logps/chosen": -1.3934428691864014, + "logps/rejected": -1.8281140327453613, + "loss": 2.2617, + "nll_loss": 0.5198531150817871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13934428989887238, + "rewards/margins": 0.043467119336128235, + "rewards/rejected": -0.1828114092350006, + "step": 1060 + }, + { + "epoch": 2.7976268951878707, + "grad_norm": 9.85499382019043, + "learning_rate": 5.394853593611357e-07, + "log_odds_chosen": 0.8616229295730591, + "log_odds_ratio": -0.3569209575653076, + "logits/chosen": -1.1381977796554565, + "logits/rejected": -1.0240377187728882, + "logps/chosen": -1.2420525550842285, + "logps/rejected": -1.9140177965164185, + "loss": 2.1363, + "nll_loss": 0.49839523434638977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12420526146888733, + "rewards/margins": 0.06719652563333511, + "rewards/rejected": -0.19140177965164185, + "step": 1061 + }, + { + "epoch": 2.800263678312459, + "grad_norm": 10.282552719116211, + "learning_rate": 5.323868677905945e-07, + "log_odds_chosen": 0.3330332934856415, + "log_odds_ratio": -0.5529873371124268, + "logits/chosen": -1.1650663614273071, + "logits/rejected": -1.070693016052246, + "logps/chosen": -1.3323811292648315, + "logps/rejected": -1.5864946842193604, + "loss": 2.4555, + "nll_loss": 0.5585712790489197, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13323810696601868, + "rewards/margins": 0.025411371141672134, + "rewards/rejected": -0.1586494743824005, + "step": 1062 + }, + { + "epoch": 2.802900461437047, + "grad_norm": 10.069133758544922, + "learning_rate": 5.252883762200532e-07, + "log_odds_chosen": 0.7132698893547058, + "log_odds_ratio": -0.4185967445373535, + "logits/chosen": -1.0890593528747559, + "logits/rejected": -1.0079560279846191, + "logps/chosen": -1.2254939079284668, + "logps/rejected": -1.7366520166397095, + "loss": 2.1125, + "nll_loss": 0.48625391721725464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1225493997335434, + "rewards/margins": 0.05111580714583397, + "rewards/rejected": -0.17366519570350647, + "step": 1063 + }, + { + "epoch": 2.8055372445616347, + "grad_norm": 10.150778770446777, + "learning_rate": 5.18189884649512e-07, + "log_odds_chosen": 0.5579012632369995, + "log_odds_ratio": -0.4871813654899597, + "logits/chosen": -1.117282509803772, + "logits/rejected": -1.0329712629318237, + "logps/chosen": -1.3524284362792969, + "logps/rejected": -1.8107136487960815, + "loss": 2.4066, + "nll_loss": 0.5529344081878662, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13524284958839417, + "rewards/margins": 0.04582851752638817, + "rewards/rejected": -0.18107135593891144, + "step": 1064 + }, + { + "epoch": 2.808174027686223, + "grad_norm": 9.787497520446777, + "learning_rate": 5.110913930789707e-07, + "log_odds_chosen": 0.619898796081543, + "log_odds_ratio": -0.4415004551410675, + "logits/chosen": -1.0507391691207886, + "logits/rejected": -0.9704097509384155, + "logps/chosen": -1.3528366088867188, + "logps/rejected": -1.8461878299713135, + "loss": 2.2188, + "nll_loss": 0.5105401277542114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13528364896774292, + "rewards/margins": 0.04933511093258858, + "rewards/rejected": -0.1846187710762024, + "step": 1065 + }, + { + "epoch": 2.810810810810811, + "grad_norm": 10.181516647338867, + "learning_rate": 5.039929015084295e-07, + "log_odds_chosen": 0.4866787791252136, + "log_odds_ratio": -0.5008502006530762, + "logits/chosen": -1.135515570640564, + "logits/rejected": -1.0233913660049438, + "logps/chosen": -1.3594303131103516, + "logps/rejected": -1.7316761016845703, + "loss": 2.1966, + "nll_loss": 0.49907565116882324, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13594304025173187, + "rewards/margins": 0.037224579602479935, + "rewards/rejected": -0.1731676161289215, + "step": 1066 + }, + { + "epoch": 2.8134475939353987, + "grad_norm": 10.298295974731445, + "learning_rate": 4.968944099378881e-07, + "log_odds_chosen": 0.40575116872787476, + "log_odds_ratio": -0.5261607766151428, + "logits/chosen": -1.1635947227478027, + "logits/rejected": -1.082903265953064, + "logps/chosen": -1.376260757446289, + "logps/rejected": -1.6973493099212646, + "loss": 2.7818, + "nll_loss": 0.6428461670875549, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13762608170509338, + "rewards/margins": 0.0321088507771492, + "rewards/rejected": -0.169734925031662, + "step": 1067 + }, + { + "epoch": 2.816084377059987, + "grad_norm": 9.991827964782715, + "learning_rate": 4.897959183673469e-07, + "log_odds_chosen": 0.5437269806861877, + "log_odds_ratio": -0.4750468134880066, + "logits/chosen": -1.203829288482666, + "logits/rejected": -1.0508911609649658, + "logps/chosen": -1.3043091297149658, + "logps/rejected": -1.7314250469207764, + "loss": 2.4969, + "nll_loss": 0.5767324566841125, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1304309070110321, + "rewards/margins": 0.04271160066127777, + "rewards/rejected": -0.17314249277114868, + "step": 1068 + }, + { + "epoch": 2.818721160184575, + "grad_norm": 9.926018714904785, + "learning_rate": 4.826974267968056e-07, + "log_odds_chosen": 0.6212695837020874, + "log_odds_ratio": -0.4382473826408386, + "logits/chosen": -1.1351120471954346, + "logits/rejected": -1.0052011013031006, + "logps/chosen": -1.2674087285995483, + "logps/rejected": -1.7482867240905762, + "loss": 2.0097, + "nll_loss": 0.4585903286933899, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12674087285995483, + "rewards/margins": 0.04808780550956726, + "rewards/rejected": -0.1748286783695221, + "step": 1069 + }, + { + "epoch": 2.8213579433091627, + "grad_norm": 9.707475662231445, + "learning_rate": 4.7559893522626436e-07, + "log_odds_chosen": 0.8172865509986877, + "log_odds_ratio": -0.39224159717559814, + "logits/chosen": -1.1351253986358643, + "logits/rejected": -0.9938197135925293, + "logps/chosen": -1.233187198638916, + "logps/rejected": -1.8230117559432983, + "loss": 2.2785, + "nll_loss": 0.5304071307182312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12331871688365936, + "rewards/margins": 0.05898246169090271, + "rewards/rejected": -0.18230119347572327, + "step": 1070 + }, + { + "epoch": 2.823994726433751, + "grad_norm": 9.760191917419434, + "learning_rate": 4.685004436557231e-07, + "log_odds_chosen": 0.48987436294555664, + "log_odds_ratio": -0.5002853274345398, + "logits/chosen": -1.0603246688842773, + "logits/rejected": -1.0183500051498413, + "logps/chosen": -1.2384836673736572, + "logps/rejected": -1.5949968099594116, + "loss": 2.3909, + "nll_loss": 0.5476902723312378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12384836375713348, + "rewards/margins": 0.03565133363008499, + "rewards/rejected": -0.15949970483779907, + "step": 1071 + }, + { + "epoch": 2.826631509558339, + "grad_norm": 10.26170539855957, + "learning_rate": 4.614019520851819e-07, + "log_odds_chosen": 0.6463963985443115, + "log_odds_ratio": -0.4377222955226898, + "logits/chosen": -1.1595723628997803, + "logits/rejected": -1.0640404224395752, + "logps/chosen": -1.2628695964813232, + "logps/rejected": -1.7612965106964111, + "loss": 2.3593, + "nll_loss": 0.5460578799247742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12628696858882904, + "rewards/margins": 0.04984269291162491, + "rewards/rejected": -0.17612966895103455, + "step": 1072 + }, + { + "epoch": 2.8292682926829267, + "grad_norm": 9.662675857543945, + "learning_rate": 4.5430346051464064e-07, + "log_odds_chosen": 0.44240912795066833, + "log_odds_ratio": -0.5153103470802307, + "logits/chosen": -1.1566162109375, + "logits/rejected": -1.0911320447921753, + "logps/chosen": -1.233819842338562, + "logps/rejected": -1.562687873840332, + "loss": 2.2785, + "nll_loss": 0.51810622215271, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12338199466466904, + "rewards/margins": 0.0328868106007576, + "rewards/rejected": -0.15626880526542664, + "step": 1073 + }, + { + "epoch": 2.831905075807515, + "grad_norm": 10.193406105041504, + "learning_rate": 4.472049689440994e-07, + "log_odds_chosen": 0.8326815962791443, + "log_odds_ratio": -0.38279303908348083, + "logits/chosen": -1.1431697607040405, + "logits/rejected": -1.0575648546218872, + "logps/chosen": -1.3324904441833496, + "logps/rejected": -2.0087924003601074, + "loss": 2.2275, + "nll_loss": 0.5186055898666382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13324904441833496, + "rewards/margins": 0.06763018667697906, + "rewards/rejected": -0.20087924599647522, + "step": 1074 + }, + { + "epoch": 2.834541858932103, + "grad_norm": 10.405094146728516, + "learning_rate": 4.401064773735581e-07, + "log_odds_chosen": 0.7254148721694946, + "log_odds_ratio": -0.4014751613140106, + "logits/chosen": -1.1544923782348633, + "logits/rejected": -0.9924436211585999, + "logps/chosen": -1.3107327222824097, + "logps/rejected": -1.877253770828247, + "loss": 2.3186, + "nll_loss": 0.5394942760467529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13107328116893768, + "rewards/margins": 0.05665209889411926, + "rewards/rejected": -0.18772536516189575, + "step": 1075 + }, + { + "epoch": 2.8371786420566907, + "grad_norm": 9.991864204406738, + "learning_rate": 4.3300798580301686e-07, + "log_odds_chosen": 0.766412615776062, + "log_odds_ratio": -0.40515220165252686, + "logits/chosen": -1.0913034677505493, + "logits/rejected": -1.0271958112716675, + "logps/chosen": -1.3255186080932617, + "logps/rejected": -1.9466503858566284, + "loss": 2.022, + "nll_loss": 0.4649835228919983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1325518637895584, + "rewards/margins": 0.06211317703127861, + "rewards/rejected": -0.19466502964496613, + "step": 1076 + }, + { + "epoch": 2.839815425181279, + "grad_norm": 10.100076675415039, + "learning_rate": 4.2590949423247555e-07, + "log_odds_chosen": 0.6009469032287598, + "log_odds_ratio": -0.45665615797042847, + "logits/chosen": -1.1655510663986206, + "logits/rejected": -1.0256352424621582, + "logps/chosen": -1.3076218366622925, + "logps/rejected": -1.7924292087554932, + "loss": 2.5451, + "nll_loss": 0.5906082987785339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13076218962669373, + "rewards/margins": 0.04848073422908783, + "rewards/rejected": -0.17924290895462036, + "step": 1077 + }, + { + "epoch": 2.842452208305867, + "grad_norm": 9.875173568725586, + "learning_rate": 4.188110026619343e-07, + "log_odds_chosen": 0.7876337766647339, + "log_odds_ratio": -0.42633935809135437, + "logits/chosen": -1.1450986862182617, + "logits/rejected": -1.0313525199890137, + "logps/chosen": -1.335124135017395, + "logps/rejected": -2.00093936920166, + "loss": 2.1638, + "nll_loss": 0.49831515550613403, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13351240754127502, + "rewards/margins": 0.06658150255680084, + "rewards/rejected": -0.20009392499923706, + "step": 1078 + }, + { + "epoch": 2.8450889914304547, + "grad_norm": 9.214183807373047, + "learning_rate": 4.1171251109139303e-07, + "log_odds_chosen": 0.5808621644973755, + "log_odds_ratio": -0.49951937794685364, + "logits/chosen": -1.079819679260254, + "logits/rejected": -1.0531589984893799, + "logps/chosen": -1.077141523361206, + "logps/rejected": -1.4757481813430786, + "loss": 1.7651, + "nll_loss": 0.39131635427474976, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10771416127681732, + "rewards/margins": 0.039860669523477554, + "rewards/rejected": -0.14757482707500458, + "step": 1079 + }, + { + "epoch": 2.847725774555043, + "grad_norm": 10.297467231750488, + "learning_rate": 4.0461401952085177e-07, + "log_odds_chosen": 0.4811196029186249, + "log_odds_ratio": -0.4953339695930481, + "logits/chosen": -1.0673904418945312, + "logits/rejected": -0.9793952703475952, + "logps/chosen": -1.2865219116210938, + "logps/rejected": -1.6545686721801758, + "loss": 2.3899, + "nll_loss": 0.5479344725608826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1286522001028061, + "rewards/margins": 0.03680466115474701, + "rewards/rejected": -0.1654568612575531, + "step": 1080 + }, + { + "epoch": 2.850362557679631, + "grad_norm": 9.687300682067871, + "learning_rate": 3.975155279503105e-07, + "log_odds_chosen": 0.7552241086959839, + "log_odds_ratio": -0.40107399225234985, + "logits/chosen": -1.1260749101638794, + "logits/rejected": -1.0342764854431152, + "logps/chosen": -1.2746638059616089, + "logps/rejected": -1.8723293542861938, + "loss": 2.1534, + "nll_loss": 0.4982360005378723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1274663805961609, + "rewards/margins": 0.05976655334234238, + "rewards/rejected": -0.18723294138908386, + "step": 1081 + }, + { + "epoch": 2.852999340804219, + "grad_norm": 10.732072830200195, + "learning_rate": 3.904170363797693e-07, + "log_odds_chosen": 0.6202609539031982, + "log_odds_ratio": -0.45681479573249817, + "logits/chosen": -1.0848345756530762, + "logits/rejected": -1.0748814344406128, + "logps/chosen": -1.1798266172409058, + "logps/rejected": -1.6549553871154785, + "loss": 1.8035, + "nll_loss": 0.4052017629146576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1179826632142067, + "rewards/margins": 0.047512881457805634, + "rewards/rejected": -0.16549554467201233, + "step": 1082 + }, + { + "epoch": 2.855636123928807, + "grad_norm": 10.704569816589355, + "learning_rate": 3.8331854480922805e-07, + "log_odds_chosen": 0.4765079617500305, + "log_odds_ratio": -0.49119317531585693, + "logits/chosen": -1.2375128269195557, + "logits/rejected": -1.0925644636154175, + "logps/chosen": -1.4868674278259277, + "logps/rejected": -1.8717963695526123, + "loss": 2.8802, + "nll_loss": 0.6709375977516174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1486867368221283, + "rewards/margins": 0.03849291056394577, + "rewards/rejected": -0.18717963993549347, + "step": 1083 + }, + { + "epoch": 2.858272907053395, + "grad_norm": 10.494728088378906, + "learning_rate": 3.762200532386868e-07, + "log_odds_chosen": 0.6826547384262085, + "log_odds_ratio": -0.4175352454185486, + "logits/chosen": -1.1123088598251343, + "logits/rejected": -1.03884756565094, + "logps/chosen": -1.3633010387420654, + "logps/rejected": -1.9031158685684204, + "loss": 2.2859, + "nll_loss": 0.5297098159790039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13633009791374207, + "rewards/margins": 0.053981490433216095, + "rewards/rejected": -0.19031159579753876, + "step": 1084 + }, + { + "epoch": 2.8609096901779827, + "grad_norm": 10.636738777160645, + "learning_rate": 3.6912156166814553e-07, + "log_odds_chosen": 0.46351560950279236, + "log_odds_ratio": -0.49620485305786133, + "logits/chosen": -1.127618432044983, + "logits/rejected": -1.0073745250701904, + "logps/chosen": -1.481499433517456, + "logps/rejected": -1.860403299331665, + "loss": 2.6967, + "nll_loss": 0.6245455741882324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14814993739128113, + "rewards/margins": 0.037890393286943436, + "rewards/rejected": -0.18604034185409546, + "step": 1085 + }, + { + "epoch": 2.863546473302571, + "grad_norm": 10.276351928710938, + "learning_rate": 3.6202307009760427e-07, + "log_odds_chosen": 0.5640963315963745, + "log_odds_ratio": -0.4584580361843109, + "logits/chosen": -1.1923645734786987, + "logits/rejected": -1.0474330186843872, + "logps/chosen": -1.3473620414733887, + "logps/rejected": -1.795386791229248, + "loss": 2.3472, + "nll_loss": 0.540945291519165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13473621010780334, + "rewards/margins": 0.0448024608194828, + "rewards/rejected": -0.17953866720199585, + "step": 1086 + }, + { + "epoch": 2.866183256427159, + "grad_norm": 9.075798034667969, + "learning_rate": 3.5492457852706296e-07, + "log_odds_chosen": 0.740669846534729, + "log_odds_ratio": -0.40852227807044983, + "logits/chosen": -1.0822901725769043, + "logits/rejected": -1.02024245262146, + "logps/chosen": -1.1343278884887695, + "logps/rejected": -1.6959733963012695, + "loss": 1.8156, + "nll_loss": 0.4130437970161438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11343279480934143, + "rewards/margins": 0.05616454780101776, + "rewards/rejected": -0.1695973426103592, + "step": 1087 + }, + { + "epoch": 2.868820039551747, + "grad_norm": 10.139521598815918, + "learning_rate": 3.478260869565217e-07, + "log_odds_chosen": 0.6256661415100098, + "log_odds_ratio": -0.4504834711551666, + "logits/chosen": -1.1899868249893188, + "logits/rejected": -1.0847022533416748, + "logps/chosen": -1.3469818830490112, + "logps/rejected": -1.8414311408996582, + "loss": 2.4276, + "nll_loss": 0.5618480443954468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13469818234443665, + "rewards/margins": 0.04944493994116783, + "rewards/rejected": -0.18414312601089478, + "step": 1088 + }, + { + "epoch": 2.871456822676335, + "grad_norm": 9.940681457519531, + "learning_rate": 3.4072759538598044e-07, + "log_odds_chosen": 0.6833094358444214, + "log_odds_ratio": -0.42807725071907043, + "logits/chosen": -1.134655237197876, + "logits/rejected": -1.0335592031478882, + "logps/chosen": -1.3730700016021729, + "logps/rejected": -1.9044145345687866, + "loss": 2.3156, + "nll_loss": 0.5361028909683228, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13730700314044952, + "rewards/margins": 0.05313445255160332, + "rewards/rejected": -0.19044145941734314, + "step": 1089 + }, + { + "epoch": 2.874093605800923, + "grad_norm": 9.627927780151367, + "learning_rate": 3.336291038154392e-07, + "log_odds_chosen": 0.5087884664535522, + "log_odds_ratio": -0.4863870143890381, + "logits/chosen": -1.1259316205978394, + "logits/rejected": -1.076722502708435, + "logps/chosen": -1.1586689949035645, + "logps/rejected": -1.541443109512329, + "loss": 1.8021, + "nll_loss": 0.40189826488494873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11586690694093704, + "rewards/margins": 0.03827742487192154, + "rewards/rejected": -0.15414433181285858, + "step": 1090 + }, + { + "epoch": 2.8767303889255107, + "grad_norm": 10.469525337219238, + "learning_rate": 3.265306122448979e-07, + "log_odds_chosen": 0.6726866364479065, + "log_odds_ratio": -0.4167555570602417, + "logits/chosen": -1.1142288446426392, + "logits/rejected": -1.0072951316833496, + "logps/chosen": -1.269399642944336, + "logps/rejected": -1.794497013092041, + "loss": 2.2729, + "nll_loss": 0.5265529751777649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12693995237350464, + "rewards/margins": 0.05250975862145424, + "rewards/rejected": -0.17944972217082977, + "step": 1091 + }, + { + "epoch": 2.879367172050099, + "grad_norm": 10.261611938476562, + "learning_rate": 3.194321206743567e-07, + "log_odds_chosen": 0.3666892647743225, + "log_odds_ratio": -0.5469551086425781, + "logits/chosen": -1.1116154193878174, + "logits/rejected": -1.0619001388549805, + "logps/chosen": -1.2703733444213867, + "logps/rejected": -1.5594353675842285, + "loss": 2.1767, + "nll_loss": 0.4894874095916748, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12703733146190643, + "rewards/margins": 0.028906218707561493, + "rewards/rejected": -0.15594354271888733, + "step": 1092 + }, + { + "epoch": 2.882003955174687, + "grad_norm": 9.441848754882812, + "learning_rate": 3.1233362910381546e-07, + "log_odds_chosen": 0.5594149231910706, + "log_odds_ratio": -0.48090699315071106, + "logits/chosen": -1.0834064483642578, + "logits/rejected": -1.0091333389282227, + "logps/chosen": -1.3397884368896484, + "logps/rejected": -1.7731108665466309, + "loss": 2.0474, + "nll_loss": 0.46376368403434753, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13397884368896484, + "rewards/margins": 0.043332237750291824, + "rewards/rejected": -0.17731109261512756, + "step": 1093 + }, + { + "epoch": 2.884640738299275, + "grad_norm": 10.408712387084961, + "learning_rate": 3.052351375332742e-07, + "log_odds_chosen": 0.42781007289886475, + "log_odds_ratio": -0.5116385817527771, + "logits/chosen": -1.1213773488998413, + "logits/rejected": -1.0789302587509155, + "logps/chosen": -1.3997325897216797, + "logps/rejected": -1.7490270137786865, + "loss": 2.2369, + "nll_loss": 0.5080611705780029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1399732530117035, + "rewards/margins": 0.03492945432662964, + "rewards/rejected": -0.17490270733833313, + "step": 1094 + }, + { + "epoch": 2.887277521423863, + "grad_norm": 9.774060249328613, + "learning_rate": 2.9813664596273294e-07, + "log_odds_chosen": 0.7794356346130371, + "log_odds_ratio": -0.3976297378540039, + "logits/chosen": -1.1235860586166382, + "logits/rejected": -1.064821720123291, + "logps/chosen": -1.1625523567199707, + "logps/rejected": -1.7622184753417969, + "loss": 1.855, + "nll_loss": 0.4239905774593353, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11625523865222931, + "rewards/margins": 0.05996660143136978, + "rewards/rejected": -0.1762218326330185, + "step": 1095 + }, + { + "epoch": 2.889914304548451, + "grad_norm": 10.065715789794922, + "learning_rate": 2.910381543921916e-07, + "log_odds_chosen": 0.6282092928886414, + "log_odds_ratio": -0.4407644271850586, + "logits/chosen": -1.1285327672958374, + "logits/rejected": -1.012041449546814, + "logps/chosen": -1.172711968421936, + "logps/rejected": -1.6262803077697754, + "loss": 2.2328, + "nll_loss": 0.5141250491142273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11727119982242584, + "rewards/margins": 0.04535682499408722, + "rewards/rejected": -0.16262802481651306, + "step": 1096 + }, + { + "epoch": 2.8925510876730387, + "grad_norm": 11.159720420837402, + "learning_rate": 2.8393966282165037e-07, + "log_odds_chosen": 0.35956841707229614, + "log_odds_ratio": -0.5396775007247925, + "logits/chosen": -1.1792635917663574, + "logits/rejected": -1.032005786895752, + "logps/chosen": -1.3383150100708008, + "logps/rejected": -1.6125526428222656, + "loss": 2.9255, + "nll_loss": 0.6774047613143921, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13383150100708008, + "rewards/margins": 0.027423767372965813, + "rewards/rejected": -0.16125527024269104, + "step": 1097 + }, + { + "epoch": 2.895187870797627, + "grad_norm": 11.200017929077148, + "learning_rate": 2.768411712511091e-07, + "log_odds_chosen": 0.47986552119255066, + "log_odds_ratio": -0.49582991003990173, + "logits/chosen": -1.0967844724655151, + "logits/rejected": -1.026170253753662, + "logps/chosen": -1.4546232223510742, + "logps/rejected": -1.823494791984558, + "loss": 2.9436, + "nll_loss": 0.6863148808479309, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14546233415603638, + "rewards/margins": 0.03688715398311615, + "rewards/rejected": -0.18234948813915253, + "step": 1098 + }, + { + "epoch": 2.897824653922215, + "grad_norm": 11.039349555969238, + "learning_rate": 2.6974267968056785e-07, + "log_odds_chosen": 0.4787980020046234, + "log_odds_ratio": -0.4917075037956238, + "logits/chosen": -1.0969046354293823, + "logits/rejected": -1.0258355140686035, + "logps/chosen": -1.362617015838623, + "logps/rejected": -1.7399983406066895, + "loss": 2.0708, + "nll_loss": 0.4685228765010834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1362617015838623, + "rewards/margins": 0.0377381332218647, + "rewards/rejected": -0.1739998459815979, + "step": 1099 + }, + { + "epoch": 2.900461437046803, + "grad_norm": 10.629711151123047, + "learning_rate": 2.626441881100266e-07, + "log_odds_chosen": 0.38331663608551025, + "log_odds_ratio": -0.5311778783798218, + "logits/chosen": -1.2185980081558228, + "logits/rejected": -1.1109384298324585, + "logps/chosen": -1.3727366924285889, + "logps/rejected": -1.677282691001892, + "loss": 2.5856, + "nll_loss": 0.5932785272598267, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1372736692428589, + "rewards/margins": 0.03045460395514965, + "rewards/rejected": -0.1677282750606537, + "step": 1100 + }, + { + "epoch": 2.903098220171391, + "grad_norm": 10.82974910736084, + "learning_rate": 2.5554569653948533e-07, + "log_odds_chosen": 0.7012743353843689, + "log_odds_ratio": -0.48878979682922363, + "logits/chosen": -1.1110811233520508, + "logits/rejected": -1.0399723052978516, + "logps/chosen": -1.218574047088623, + "logps/rejected": -1.8044946193695068, + "loss": 2.571, + "nll_loss": 0.5938761830329895, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1218574047088623, + "rewards/margins": 0.058592043817043304, + "rewards/rejected": -0.180449441075325, + "step": 1101 + }, + { + "epoch": 2.905735003295979, + "grad_norm": 9.74654483795166, + "learning_rate": 2.4844720496894407e-07, + "log_odds_chosen": 0.4575854539871216, + "log_odds_ratio": -0.49557802081108093, + "logits/chosen": -1.0428321361541748, + "logits/rejected": -0.9787229299545288, + "logps/chosen": -1.2805407047271729, + "logps/rejected": -1.61838960647583, + "loss": 2.0251, + "nll_loss": 0.45671436190605164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12805408239364624, + "rewards/margins": 0.033784881234169006, + "rewards/rejected": -0.16183894872665405, + "step": 1102 + }, + { + "epoch": 2.9083717864205667, + "grad_norm": 10.815143585205078, + "learning_rate": 2.413487133984028e-07, + "log_odds_chosen": 0.3740006983280182, + "log_odds_ratio": -0.5430451035499573, + "logits/chosen": -1.1571646928787231, + "logits/rejected": -1.0375688076019287, + "logps/chosen": -1.4973931312561035, + "logps/rejected": -1.8008571863174438, + "loss": 2.8253, + "nll_loss": 0.652025043964386, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1497393250465393, + "rewards/margins": 0.030346402898430824, + "rewards/rejected": -0.18008573353290558, + "step": 1103 + }, + { + "epoch": 2.911008569545155, + "grad_norm": 10.645405769348145, + "learning_rate": 2.3425022182786155e-07, + "log_odds_chosen": 0.49522343277931213, + "log_odds_ratio": -0.4888048470020294, + "logits/chosen": -1.2563395500183105, + "logits/rejected": -1.1153737306594849, + "logps/chosen": -1.2137320041656494, + "logps/rejected": -1.5825142860412598, + "loss": 2.5768, + "nll_loss": 0.5953096747398376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12137319892644882, + "rewards/margins": 0.036878228187561035, + "rewards/rejected": -0.15825143456459045, + "step": 1104 + }, + { + "epoch": 2.913645352669743, + "grad_norm": 9.802302360534668, + "learning_rate": 2.2715173025732032e-07, + "log_odds_chosen": 0.7142266035079956, + "log_odds_ratio": -0.4076310098171234, + "logits/chosen": -1.095991611480713, + "logits/rejected": -0.9405829906463623, + "logps/chosen": -1.2853018045425415, + "logps/rejected": -1.8538234233856201, + "loss": 2.0316, + "nll_loss": 0.46713969111442566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12853017449378967, + "rewards/margins": 0.05685216188430786, + "rewards/rejected": -0.18538233637809753, + "step": 1105 + }, + { + "epoch": 2.916282135794331, + "grad_norm": 10.226502418518066, + "learning_rate": 2.2005323868677906e-07, + "log_odds_chosen": 0.44650131464004517, + "log_odds_ratio": -0.49772655963897705, + "logits/chosen": -1.0860422849655151, + "logits/rejected": -1.0122694969177246, + "logps/chosen": -1.528808832168579, + "logps/rejected": -1.8934898376464844, + "loss": 2.2996, + "nll_loss": 0.5251332521438599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15288087725639343, + "rewards/margins": 0.03646809235215187, + "rewards/rejected": -0.1893489807844162, + "step": 1106 + }, + { + "epoch": 2.918918918918919, + "grad_norm": 11.037799835205078, + "learning_rate": 2.1295474711623777e-07, + "log_odds_chosen": 0.36173897981643677, + "log_odds_ratio": -0.5456938147544861, + "logits/chosen": -1.2418875694274902, + "logits/rejected": -1.1130543947219849, + "logps/chosen": -1.4015865325927734, + "logps/rejected": -1.6816915273666382, + "loss": 2.9973, + "nll_loss": 0.6947578191757202, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14015865325927734, + "rewards/margins": 0.028010498732328415, + "rewards/rejected": -0.16816917061805725, + "step": 1107 + }, + { + "epoch": 2.921555702043507, + "grad_norm": 9.955268859863281, + "learning_rate": 2.0585625554569652e-07, + "log_odds_chosen": 0.5130655169487, + "log_odds_ratio": -0.4895630180835724, + "logits/chosen": -1.1983622312545776, + "logits/rejected": -1.0804213285446167, + "logps/chosen": -1.347536563873291, + "logps/rejected": -1.7593469619750977, + "loss": 2.4842, + "nll_loss": 0.5720826983451843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13475365936756134, + "rewards/margins": 0.041181016713380814, + "rewards/rejected": -0.17593468725681305, + "step": 1108 + }, + { + "epoch": 2.9241924851680947, + "grad_norm": 11.561691284179688, + "learning_rate": 1.9875776397515526e-07, + "log_odds_chosen": 0.33087798953056335, + "log_odds_ratio": -0.5504778623580933, + "logits/chosen": -1.0546035766601562, + "logits/rejected": -0.987433135509491, + "logps/chosen": -1.4973101615905762, + "logps/rejected": -1.7580746412277222, + "loss": 2.6428, + "nll_loss": 0.6056430339813232, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14973101019859314, + "rewards/margins": 0.026076439768075943, + "rewards/rejected": -0.17580744624137878, + "step": 1109 + }, + { + "epoch": 2.926829268292683, + "grad_norm": 9.803023338317871, + "learning_rate": 1.9165927240461402e-07, + "log_odds_chosen": 0.6108183860778809, + "log_odds_ratio": -0.4591115117073059, + "logits/chosen": -1.1352243423461914, + "logits/rejected": -1.1190838813781738, + "logps/chosen": -1.2892515659332275, + "logps/rejected": -1.7547543048858643, + "loss": 2.2918, + "nll_loss": 0.5270448327064514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.128925159573555, + "rewards/margins": 0.04655028134584427, + "rewards/rejected": -0.17547544836997986, + "step": 1110 + }, + { + "epoch": 2.929466051417271, + "grad_norm": 10.088869094848633, + "learning_rate": 1.8456078083407276e-07, + "log_odds_chosen": 0.6438597440719604, + "log_odds_ratio": -0.44443920254707336, + "logits/chosen": -1.1769981384277344, + "logits/rejected": -0.9903606176376343, + "logps/chosen": -1.3865466117858887, + "logps/rejected": -1.9032771587371826, + "loss": 2.5475, + "nll_loss": 0.592436671257019, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1386546492576599, + "rewards/margins": 0.051673054695129395, + "rewards/rejected": -0.1903277039527893, + "step": 1111 + }, + { + "epoch": 2.932102834541859, + "grad_norm": 10.34731388092041, + "learning_rate": 1.7746228926353148e-07, + "log_odds_chosen": 0.8123482465744019, + "log_odds_ratio": -0.3837769627571106, + "logits/chosen": -1.1974098682403564, + "logits/rejected": -1.0578560829162598, + "logps/chosen": -1.285667061805725, + "logps/rejected": -1.8774243593215942, + "loss": 2.5631, + "nll_loss": 0.6024080514907837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.128566712141037, + "rewards/margins": 0.05917573720216751, + "rewards/rejected": -0.1877424567937851, + "step": 1112 + }, + { + "epoch": 2.934739617666447, + "grad_norm": 9.824599266052246, + "learning_rate": 1.7036379769299022e-07, + "log_odds_chosen": 0.6928430199623108, + "log_odds_ratio": -0.4147317409515381, + "logits/chosen": -1.1532843112945557, + "logits/rejected": -1.0601625442504883, + "logps/chosen": -1.162044644355774, + "logps/rejected": -1.6832411289215088, + "loss": 1.9871, + "nll_loss": 0.455294668674469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11620447039604187, + "rewards/margins": 0.05211963877081871, + "rewards/rejected": -0.16832411289215088, + "step": 1113 + }, + { + "epoch": 2.937376400791035, + "grad_norm": 10.522456169128418, + "learning_rate": 1.6326530612244896e-07, + "log_odds_chosen": 0.3960050940513611, + "log_odds_ratio": -0.5374730229377747, + "logits/chosen": -1.1554762125015259, + "logits/rejected": -1.0569953918457031, + "logps/chosen": -1.5269532203674316, + "logps/rejected": -1.844236135482788, + "loss": 2.5369, + "nll_loss": 0.5804662704467773, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15269532799720764, + "rewards/margins": 0.03172829747200012, + "rewards/rejected": -0.18442362546920776, + "step": 1114 + }, + { + "epoch": 2.9400131839156227, + "grad_norm": 9.986109733581543, + "learning_rate": 1.5616681455190773e-07, + "log_odds_chosen": 0.7218302488327026, + "log_odds_ratio": -0.4211685359477997, + "logits/chosen": -1.066129207611084, + "logits/rejected": -0.9383447766304016, + "logps/chosen": -1.347313642501831, + "logps/rejected": -1.9073268175125122, + "loss": 2.5148, + "nll_loss": 0.5865715742111206, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13473136723041534, + "rewards/margins": 0.05600131303071976, + "rewards/rejected": -0.1907326877117157, + "step": 1115 + }, + { + "epoch": 2.942649967040211, + "grad_norm": 10.249882698059082, + "learning_rate": 1.4906832298136647e-07, + "log_odds_chosen": 0.5400087237358093, + "log_odds_ratio": -0.46870461106300354, + "logits/chosen": -1.1922576427459717, + "logits/rejected": -1.0959630012512207, + "logps/chosen": -1.3472270965576172, + "logps/rejected": -1.7658050060272217, + "loss": 2.8924, + "nll_loss": 0.6762197017669678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13472270965576172, + "rewards/margins": 0.041857779026031494, + "rewards/rejected": -0.1765804886817932, + "step": 1116 + }, + { + "epoch": 2.945286750164799, + "grad_norm": 10.107986450195312, + "learning_rate": 1.4196983141082518e-07, + "log_odds_chosen": 0.6772925853729248, + "log_odds_ratio": -0.4206685423851013, + "logits/chosen": -1.0564854145050049, + "logits/rejected": -1.0141981840133667, + "logps/chosen": -1.4403352737426758, + "logps/rejected": -1.99358069896698, + "loss": 2.1219, + "nll_loss": 0.4884171485900879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1440335214138031, + "rewards/margins": 0.05532454699277878, + "rewards/rejected": -0.19935807585716248, + "step": 1117 + }, + { + "epoch": 2.947923533289387, + "grad_norm": 10.683755874633789, + "learning_rate": 1.3487133984028392e-07, + "log_odds_chosen": 0.5642454624176025, + "log_odds_ratio": -0.4553692638874054, + "logits/chosen": -1.1627285480499268, + "logits/rejected": -1.0698070526123047, + "logps/chosen": -1.3500248193740845, + "logps/rejected": -1.7949421405792236, + "loss": 2.4335, + "nll_loss": 0.5628418922424316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1350024789571762, + "rewards/margins": 0.044491734355688095, + "rewards/rejected": -0.1794942021369934, + "step": 1118 + }, + { + "epoch": 2.950560316413975, + "grad_norm": 10.497546195983887, + "learning_rate": 1.2777284826974266e-07, + "log_odds_chosen": 0.6890730261802673, + "log_odds_ratio": -0.41999584436416626, + "logits/chosen": -1.1632972955703735, + "logits/rejected": -1.0566202402114868, + "logps/chosen": -1.183455228805542, + "logps/rejected": -1.707749366760254, + "loss": 2.0868, + "nll_loss": 0.47970157861709595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11834551393985748, + "rewards/margins": 0.052429430186748505, + "rewards/rejected": -0.1707749366760254, + "step": 1119 + }, + { + "epoch": 2.953197099538563, + "grad_norm": 9.32829761505127, + "learning_rate": 1.206743566992014e-07, + "log_odds_chosen": 0.49325263500213623, + "log_odds_ratio": -0.4847280979156494, + "logits/chosen": -1.024897575378418, + "logits/rejected": -0.9912652969360352, + "logps/chosen": -1.1244151592254639, + "logps/rejected": -1.4825202226638794, + "loss": 1.5474, + "nll_loss": 0.338373064994812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1124415248632431, + "rewards/margins": 0.03581049665808678, + "rewards/rejected": -0.14825202524662018, + "step": 1120 + }, + { + "epoch": 2.9558338826631507, + "grad_norm": 9.516386985778809, + "learning_rate": 1.1357586512866016e-07, + "log_odds_chosen": 0.7329659461975098, + "log_odds_ratio": -0.41048258543014526, + "logits/chosen": -1.1054126024246216, + "logits/rejected": -1.0205689668655396, + "logps/chosen": -1.1692774295806885, + "logps/rejected": -1.732006549835205, + "loss": 1.8455, + "nll_loss": 0.42032352089881897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11692775040864944, + "rewards/margins": 0.056272901594638824, + "rewards/rejected": -0.17320065200328827, + "step": 1121 + }, + { + "epoch": 2.958470665787739, + "grad_norm": 10.708715438842773, + "learning_rate": 1.0647737355811889e-07, + "log_odds_chosen": 0.40624159574508667, + "log_odds_ratio": -0.5160567760467529, + "logits/chosen": -1.0800803899765015, + "logits/rejected": -1.0116575956344604, + "logps/chosen": -1.4815709590911865, + "logps/rejected": -1.7999603748321533, + "loss": 2.5454, + "nll_loss": 0.5847477912902832, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14815710484981537, + "rewards/margins": 0.031838931143283844, + "rewards/rejected": -0.1799960434436798, + "step": 1122 + }, + { + "epoch": 2.961107448912327, + "grad_norm": 9.641554832458496, + "learning_rate": 9.937888198757763e-08, + "log_odds_chosen": 0.8635714650154114, + "log_odds_ratio": -0.38064178824424744, + "logits/chosen": -1.1759165525436401, + "logits/rejected": -1.0470998287200928, + "logps/chosen": -1.2402002811431885, + "logps/rejected": -1.936885118484497, + "loss": 2.0752, + "nll_loss": 0.48072531819343567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1240200400352478, + "rewards/margins": 0.0696684867143631, + "rewards/rejected": -0.1936885118484497, + "step": 1123 + }, + { + "epoch": 2.963744232036915, + "grad_norm": 10.646029472351074, + "learning_rate": 9.228039041703638e-08, + "log_odds_chosen": 0.4761095345020294, + "log_odds_ratio": -0.499419629573822, + "logits/chosen": -1.171363353729248, + "logits/rejected": -1.0856765508651733, + "logps/chosen": -1.3061269521713257, + "logps/rejected": -1.6765310764312744, + "loss": 2.4182, + "nll_loss": 0.5546119213104248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13061270117759705, + "rewards/margins": 0.03704041987657547, + "rewards/rejected": -0.1676531285047531, + "step": 1124 + }, + { + "epoch": 2.966381015161503, + "grad_norm": 9.832589149475098, + "learning_rate": 8.518189884649511e-08, + "log_odds_chosen": 0.620135486125946, + "log_odds_ratio": -0.44997426867485046, + "logits/chosen": -1.1520901918411255, + "logits/rejected": -1.0546834468841553, + "logps/chosen": -1.2627184391021729, + "logps/rejected": -1.7329437732696533, + "loss": 2.2203, + "nll_loss": 0.5100676417350769, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12627184391021729, + "rewards/margins": 0.04702254384756088, + "rewards/rejected": -0.17329436540603638, + "step": 1125 + }, + { + "epoch": 2.969017798286091, + "grad_norm": 10.106729507446289, + "learning_rate": 7.808340727595386e-08, + "log_odds_chosen": 0.4591771364212036, + "log_odds_ratio": -0.5125027894973755, + "logits/chosen": -1.1476519107818604, + "logits/rejected": -1.0820443630218506, + "logps/chosen": -1.341275691986084, + "logps/rejected": -1.709936499595642, + "loss": 2.5364, + "nll_loss": 0.5828558206558228, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13412757217884064, + "rewards/margins": 0.03686608374118805, + "rewards/rejected": -0.1709936410188675, + "step": 1126 + }, + { + "epoch": 2.9716545814106787, + "grad_norm": 10.069293022155762, + "learning_rate": 7.098491570541259e-08, + "log_odds_chosen": 0.5392491221427917, + "log_odds_ratio": -0.47593629360198975, + "logits/chosen": -1.0493738651275635, + "logits/rejected": -0.9915323257446289, + "logps/chosen": -1.2978014945983887, + "logps/rejected": -1.7215747833251953, + "loss": 2.1687, + "nll_loss": 0.49458765983581543, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12978015840053558, + "rewards/margins": 0.04237733036279678, + "rewards/rejected": -0.17215748131275177, + "step": 1127 + }, + { + "epoch": 2.974291364535267, + "grad_norm": 9.97978687286377, + "learning_rate": 6.388642413487133e-08, + "log_odds_chosen": 0.6298450231552124, + "log_odds_ratio": -0.4396544098854065, + "logits/chosen": -1.1477969884872437, + "logits/rejected": -0.9882994890213013, + "logps/chosen": -1.461350440979004, + "logps/rejected": -1.9838271141052246, + "loss": 2.4329, + "nll_loss": 0.5642586946487427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14613503217697144, + "rewards/margins": 0.052247680723667145, + "rewards/rejected": -0.19838272035121918, + "step": 1128 + }, + { + "epoch": 2.976928147659855, + "grad_norm": 10.508831977844238, + "learning_rate": 5.678793256433008e-08, + "log_odds_chosen": 0.25923722982406616, + "log_odds_ratio": -0.5890034437179565, + "logits/chosen": -1.1681803464889526, + "logits/rejected": -1.0757927894592285, + "logps/chosen": -1.3958204984664917, + "logps/rejected": -1.6009249687194824, + "loss": 2.7438, + "nll_loss": 0.6270577311515808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1395820528268814, + "rewards/margins": 0.020510446280241013, + "rewards/rejected": -0.16009250283241272, + "step": 1129 + }, + { + "epoch": 2.979564930784443, + "grad_norm": 10.465526580810547, + "learning_rate": 4.9689440993788814e-08, + "log_odds_chosen": 0.502131998538971, + "log_odds_ratio": -0.48989468812942505, + "logits/chosen": -1.0431938171386719, + "logits/rejected": -0.9729862213134766, + "logps/chosen": -1.2752975225448608, + "logps/rejected": -1.6618212461471558, + "loss": 1.6744, + "nll_loss": 0.3696220815181732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12752975523471832, + "rewards/margins": 0.03865238279104233, + "rewards/rejected": -0.16618213057518005, + "step": 1130 + }, + { + "epoch": 2.982201713909031, + "grad_norm": 10.241425514221191, + "learning_rate": 4.2590949423247555e-08, + "log_odds_chosen": 0.5095721483230591, + "log_odds_ratio": -0.4832859933376312, + "logits/chosen": -1.03834867477417, + "logits/rejected": -0.9979722499847412, + "logps/chosen": -1.2013146877288818, + "logps/rejected": -1.5752215385437012, + "loss": 2.0638, + "nll_loss": 0.46762895584106445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1201314628124237, + "rewards/margins": 0.037390682846307755, + "rewards/rejected": -0.15752214193344116, + "step": 1131 + }, + { + "epoch": 2.984838497033619, + "grad_norm": 9.945733070373535, + "learning_rate": 3.5492457852706296e-08, + "log_odds_chosen": 0.5073897242546082, + "log_odds_ratio": -0.48286592960357666, + "logits/chosen": -1.1669549942016602, + "logits/rejected": -1.0747926235198975, + "logps/chosen": -1.213772177696228, + "logps/rejected": -1.6001689434051514, + "loss": 2.2927, + "nll_loss": 0.5248969197273254, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12137721478939056, + "rewards/margins": 0.03863967955112457, + "rewards/rejected": -0.16001689434051514, + "step": 1132 + }, + { + "epoch": 2.9874752801582067, + "grad_norm": 10.28751277923584, + "learning_rate": 2.839396628216504e-08, + "log_odds_chosen": 0.5915455222129822, + "log_odds_ratio": -0.44667544960975647, + "logits/chosen": -1.124068021774292, + "logits/rejected": -0.9785863161087036, + "logps/chosen": -1.3051058053970337, + "logps/rejected": -1.7666263580322266, + "loss": 2.0903, + "nll_loss": 0.477913498878479, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1305105835199356, + "rewards/margins": 0.046152062714099884, + "rewards/rejected": -0.1766626536846161, + "step": 1133 + }, + { + "epoch": 2.990112063282795, + "grad_norm": 10.091132164001465, + "learning_rate": 2.1295474711623777e-08, + "log_odds_chosen": 0.5201779007911682, + "log_odds_ratio": -0.4749506711959839, + "logits/chosen": -1.118676781654358, + "logits/rejected": -1.0187113285064697, + "logps/chosen": -1.2404669523239136, + "logps/rejected": -1.6390795707702637, + "loss": 2.0141, + "nll_loss": 0.4560197591781616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1240466982126236, + "rewards/margins": 0.03986125811934471, + "rewards/rejected": -0.1639079451560974, + "step": 1134 + }, + { + "epoch": 2.992748846407383, + "grad_norm": 11.591419219970703, + "learning_rate": 1.419698314108252e-08, + "log_odds_chosen": 0.4546339809894562, + "log_odds_ratio": -0.5016353726387024, + "logits/chosen": -1.183355450630188, + "logits/rejected": -1.048933744430542, + "logps/chosen": -1.4572858810424805, + "logps/rejected": -1.8160556554794312, + "loss": 3.2187, + "nll_loss": 0.7545139193534851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14572857320308685, + "rewards/margins": 0.03587697818875313, + "rewards/rejected": -0.18160554766654968, + "step": 1135 + }, + { + "epoch": 2.995385629531971, + "grad_norm": 10.501843452453613, + "learning_rate": 7.09849157054126e-09, + "log_odds_chosen": 0.46021127700805664, + "log_odds_ratio": -0.558140754699707, + "logits/chosen": -1.1137306690216064, + "logits/rejected": -0.9876527190208435, + "logps/chosen": -1.424965500831604, + "logps/rejected": -1.8375980854034424, + "loss": 2.3125, + "nll_loss": 0.5223027467727661, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14249655604362488, + "rewards/margins": 0.041263267397880554, + "rewards/rejected": -0.18375982344150543, + "step": 1136 + }, + { + "epoch": 2.998022412656559, + "grad_norm": 10.061162948608398, + "learning_rate": 0.0, + "log_odds_chosen": 0.4221417009830475, + "log_odds_ratio": -0.5208521485328674, + "logits/chosen": -1.1026612520217896, + "logits/rejected": -1.025718331336975, + "logps/chosen": -1.3111175298690796, + "logps/rejected": -1.620469570159912, + "loss": 2.165, + "nll_loss": 0.48915278911590576, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1311117708683014, + "rewards/margins": 0.030935190618038177, + "rewards/rejected": -0.16204693913459778, + "step": 1137 + } + ], + "logging_steps": 1, + "max_steps": 1137, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}