diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,5487 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.000825763831544, - "eval_steps": 500, - "global_step": 303, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0033030553261767133, - "grad_norm": 2.261504650115967, - "learning_rate": 4e-07, - "log_odds_chosen": -0.2241445928812027, - "log_odds_ratio": -0.8586174249649048, - "logits/chosen": -0.33469390869140625, - "logits/rejected": -0.1143772155046463, - "logps/chosen": -1.5753452777862549, - "logps/rejected": -1.3763688802719116, - "loss": 1.8326, - "nll_loss": 1.7467615604400635, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.15753452479839325, - "rewards/margins": -0.01989762857556343, - "rewards/rejected": -0.13763689994812012, - "step": 1 - }, - { - "epoch": 0.006606110652353427, - "grad_norm": 5.068044662475586, - "learning_rate": 8e-07, - "log_odds_chosen": -0.7606785297393799, - "log_odds_ratio": -1.199891448020935, - "logits/chosen": -0.29058611392974854, - "logits/rejected": -0.07472708821296692, - "logps/chosen": -1.8422915935516357, - "logps/rejected": -1.2578465938568115, - "loss": 2.0929, - "nll_loss": 1.9729175567626953, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.18422916531562805, - "rewards/margins": -0.05844450369477272, - "rewards/rejected": -0.12578466534614563, - "step": 2 - }, - { - "epoch": 0.00990916597853014, - "grad_norm": 2.8977644443511963, - "learning_rate": 1.2e-06, - "log_odds_chosen": -0.36561280488967896, - "log_odds_ratio": -0.8982158899307251, - "logits/chosen": -0.09766542911529541, - "logits/rejected": -0.2578570246696472, - "logps/chosen": -1.6280708312988281, - "logps/rejected": -1.3394700288772583, - "loss": 1.8728, - "nll_loss": 1.7829303741455078, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.16280707716941833, - "rewards/margins": -0.028860075399279594, - "rewards/rejected": -0.13394701480865479, - "step": 3 - }, - { - "epoch": 0.013212221304706853, - "grad_norm": 1.4845504760742188, - "learning_rate": 1.6e-06, - "log_odds_chosen": -0.834815263748169, - "log_odds_ratio": -1.2288198471069336, - "logits/chosen": -0.35591068863868713, - "logits/rejected": 0.0010098591446876526, - "logps/chosen": -1.985548734664917, - "logps/rejected": -1.3153765201568604, - "loss": 2.2439, - "nll_loss": 2.1209845542907715, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.1985548734664917, - "rewards/margins": -0.06701722741127014, - "rewards/rejected": -0.13153764605522156, - "step": 4 - }, - { - "epoch": 0.016515276630883566, - "grad_norm": 1.2717643976211548, - "learning_rate": 2e-06, - "log_odds_chosen": -0.9116457104682922, - "log_odds_ratio": -1.250096321105957, - "logits/chosen": -0.500062108039856, - "logits/rejected": -0.05940089747309685, - "logps/chosen": -2.0961155891418457, - "logps/rejected": -1.3541979789733887, - "loss": 2.3095, - "nll_loss": 2.1844778060913086, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.20961156487464905, - "rewards/margins": -0.07419176399707794, - "rewards/rejected": -0.1354198008775711, - "step": 5 - }, - { - "epoch": 0.01981833195706028, - "grad_norm": 1.7615158557891846, - "learning_rate": 2.4e-06, - "log_odds_chosen": -0.6846175193786621, - "log_odds_ratio": -1.1524499654769897, - "logits/chosen": -0.26127660274505615, - "logits/rejected": 0.0617867074906826, - "logps/chosen": -1.662479043006897, - "logps/rejected": -1.1213412284851074, - "loss": 1.9308, - "nll_loss": 1.8155540227890015, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.1662479043006897, - "rewards/margins": -0.054113782942295074, - "rewards/rejected": -0.11213412135839462, - "step": 6 - }, - { - "epoch": 0.023121387283236993, - "grad_norm": 2.280388593673706, - "learning_rate": 2.8e-06, - "log_odds_chosen": -0.749167799949646, - "log_odds_ratio": -1.1788314580917358, - "logits/chosen": -0.39172622561454773, - "logits/rejected": -0.19565054774284363, - "logps/chosen": -1.8560761213302612, - "logps/rejected": -1.2627700567245483, - "loss": 2.0831, - "nll_loss": 1.9652044773101807, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.18560759723186493, - "rewards/margins": -0.05933060497045517, - "rewards/rejected": -0.12627699971199036, - "step": 7 - }, - { - "epoch": 0.026424442609413706, - "grad_norm": 0.9076488614082336, - "learning_rate": 3.2e-06, - "log_odds_chosen": -0.874955952167511, - "log_odds_ratio": -1.2600207328796387, - "logits/chosen": -0.43234679102897644, - "logits/rejected": -0.20656439661979675, - "logps/chosen": -1.9642348289489746, - "logps/rejected": -1.266099452972412, - "loss": 2.2067, - "nll_loss": 2.080700397491455, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.1964234858751297, - "rewards/margins": -0.06981353461742401, - "rewards/rejected": -0.1266099363565445, - "step": 8 - }, - { - "epoch": 0.02972749793559042, - "grad_norm": 1.871086597442627, - "learning_rate": 3.6e-06, - "log_odds_chosen": -0.9555003046989441, - "log_odds_ratio": -1.2941690683364868, - "logits/chosen": -0.6065811514854431, - "logits/rejected": -0.27269935607910156, - "logps/chosen": -1.9514598846435547, - "logps/rejected": -1.1959837675094604, - "loss": 2.2025, - "nll_loss": 2.073038101196289, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.19514599442481995, - "rewards/margins": -0.07554760575294495, - "rewards/rejected": -0.1195983737707138, - "step": 9 - }, - { - "epoch": 0.03303055326176713, - "grad_norm": 1.770780324935913, - "learning_rate": 4e-06, - "log_odds_chosen": -0.8512029051780701, - "log_odds_ratio": -1.2830898761749268, - "logits/chosen": -0.3718774914741516, - "logits/rejected": 0.0023509846068918705, - "logps/chosen": -1.790738821029663, - "logps/rejected": -1.0968332290649414, - "loss": 2.0389, - "nll_loss": 1.9105803966522217, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.17907389998435974, - "rewards/margins": -0.06939057260751724, - "rewards/rejected": -0.1096833199262619, - "step": 10 - }, - { - "epoch": 0.03633360858794385, - "grad_norm": 1.9129818677902222, - "learning_rate": 3.9863481228668935e-06, - "log_odds_chosen": -0.17513173818588257, - "log_odds_ratio": -0.7939130663871765, - "logits/chosen": -0.3047102391719818, - "logits/rejected": -0.38946613669395447, - "logps/chosen": -1.4973294734954834, - "logps/rejected": -1.371248722076416, - "loss": 1.7686, - "nll_loss": 1.6892013549804688, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.14973296225070953, - "rewards/margins": -0.012608081102371216, - "rewards/rejected": -0.13712486624717712, - "step": 11 - }, - { - "epoch": 0.03963666391412056, - "grad_norm": 1.1834336519241333, - "learning_rate": 3.972696245733788e-06, - "log_odds_chosen": -1.05147123336792, - "log_odds_ratio": -1.3749547004699707, - "logits/chosen": -0.379026859998703, - "logits/rejected": -0.06965988874435425, - "logps/chosen": -2.0190250873565674, - "logps/rejected": -1.1910419464111328, - "loss": 2.2812, - "nll_loss": 2.143681049346924, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.20190252363681793, - "rewards/margins": -0.08279832452535629, - "rewards/rejected": -0.11910419911146164, - "step": 12 - }, - { - "epoch": 0.042939719240297276, - "grad_norm": 6.578203201293945, - "learning_rate": 3.9590443686006824e-06, - "log_odds_chosen": -0.5029235482215881, - "log_odds_ratio": -0.9773486852645874, - "logits/chosen": -0.48467978835105896, - "logits/rejected": 0.33332502841949463, - "logps/chosen": -1.5493595600128174, - "logps/rejected": -1.175106406211853, - "loss": 1.8216, - "nll_loss": 1.723886251449585, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.15493595600128174, - "rewards/margins": -0.037425316870212555, - "rewards/rejected": -0.11751063913106918, - "step": 13 - }, - { - "epoch": 0.046242774566473986, - "grad_norm": 1.820356845855713, - "learning_rate": 3.945392491467577e-06, - "log_odds_chosen": -0.8386282324790955, - "log_odds_ratio": -1.224194049835205, - "logits/chosen": -0.44413310289382935, - "logits/rejected": 0.060963451862335205, - "logps/chosen": -1.854997158050537, - "logps/rejected": -1.2076776027679443, - "loss": 2.1122, - "nll_loss": 1.9897370338439941, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.18549972772598267, - "rewards/margins": -0.06473197042942047, - "rewards/rejected": -0.1207677572965622, - "step": 14 - }, - { - "epoch": 0.0495458298926507, - "grad_norm": 1.8836688995361328, - "learning_rate": 3.931740614334471e-06, - "log_odds_chosen": -0.3730139136314392, - "log_odds_ratio": -0.9139858484268188, - "logits/chosen": -0.3070000410079956, - "logits/rejected": -0.10518673807382584, - "logps/chosen": -1.5751094818115234, - "logps/rejected": -1.2814321517944336, - "loss": 1.8184, - "nll_loss": 1.7270081043243408, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.15751096606254578, - "rewards/margins": -0.029367735609412193, - "rewards/rejected": -0.12814322113990784, - "step": 15 - }, - { - "epoch": 0.05284888521882741, - "grad_norm": 1.6279934644699097, - "learning_rate": 3.918088737201365e-06, - "log_odds_chosen": -0.6225821375846863, - "log_odds_ratio": -1.066037654876709, - "logits/chosen": -0.22608086466789246, - "logits/rejected": -0.11721956729888916, - "logps/chosen": -1.685497760772705, - "logps/rejected": -1.2046910524368286, - "loss": 1.9465, - "nll_loss": 1.8399428129196167, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.1685497760772705, - "rewards/margins": -0.04808066785335541, - "rewards/rejected": -0.1204691082239151, - "step": 16 - }, - { - "epoch": 0.05615194054500413, - "grad_norm": 1.6133079528808594, - "learning_rate": 3.904436860068259e-06, - "log_odds_chosen": -1.0321464538574219, - "log_odds_ratio": -1.3406400680541992, - "logits/chosen": -0.33597350120544434, - "logits/rejected": 0.0039563365280628204, - "logps/chosen": -1.8919739723205566, - "logps/rejected": -1.1098377704620361, - "loss": 2.144, - "nll_loss": 2.0098912715911865, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.18919740617275238, - "rewards/margins": -0.07821360975503922, - "rewards/rejected": -0.11098378896713257, - "step": 17 - }, - { - "epoch": 0.05945499587118084, - "grad_norm": 1.9833312034606934, - "learning_rate": 3.890784982935153e-06, - "log_odds_chosen": -0.4404023289680481, - "log_odds_ratio": -0.9424636363983154, - "logits/chosen": -0.2950669527053833, - "logits/rejected": 0.11747706681489944, - "logps/chosen": -1.6311291456222534, - "logps/rejected": -1.29816472530365, - "loss": 1.8668, - "nll_loss": 1.7725696563720703, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.16311290860176086, - "rewards/margins": -0.033296436071395874, - "rewards/rejected": -0.129816472530365, - "step": 18 - }, - { - "epoch": 0.06275805119735756, - "grad_norm": 1.3093953132629395, - "learning_rate": 3.877133105802048e-06, - "log_odds_chosen": -0.40361231565475464, - "log_odds_ratio": -0.9634995460510254, - "logits/chosen": -0.2628612816333771, - "logits/rejected": -0.24200260639190674, - "logps/chosen": -1.6113650798797607, - "logps/rejected": -1.289243221282959, - "loss": 1.8111, - "nll_loss": 1.7147669792175293, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.16113652288913727, - "rewards/margins": -0.03221219778060913, - "rewards/rejected": -0.12892432510852814, - "step": 19 - }, - { - "epoch": 0.06606110652353427, - "grad_norm": 1.1489125490188599, - "learning_rate": 3.863481228668942e-06, - "log_odds_chosen": -0.6072632670402527, - "log_odds_ratio": -1.099987506866455, - "logits/chosen": -0.3231860399246216, - "logits/rejected": -0.02258267253637314, - "logps/chosen": -1.688952922821045, - "logps/rejected": -1.222412347793579, - "loss": 1.9576, - "nll_loss": 1.8475888967514038, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.16889530420303345, - "rewards/margins": -0.046654053032398224, - "rewards/rejected": -0.12224124372005463, - "step": 20 - }, - { - "epoch": 0.06936416184971098, - "grad_norm": 1.7391713857650757, - "learning_rate": 3.849829351535836e-06, - "log_odds_chosen": -0.37405818700790405, - "log_odds_ratio": -0.9096152782440186, - "logits/chosen": -0.1269126534461975, - "logits/rejected": -0.15477940440177917, - "logps/chosen": -1.494234561920166, - "logps/rejected": -1.2063360214233398, - "loss": 1.7802, - "nll_loss": 1.6892691850662231, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.14942346513271332, - "rewards/margins": -0.028789857402443886, - "rewards/rejected": -0.12063360959291458, - "step": 21 - }, - { - "epoch": 0.0726672171758877, - "grad_norm": 1.5705915689468384, - "learning_rate": 3.83617747440273e-06, - "log_odds_chosen": -0.3138054609298706, - "log_odds_ratio": -0.8670409917831421, - "logits/chosen": 0.01080230064690113, - "logits/rejected": -0.22399091720581055, - "logps/chosen": -1.4993152618408203, - "logps/rejected": -1.2646830081939697, - "loss": 1.7592, - "nll_loss": 1.6724696159362793, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.14993152022361755, - "rewards/margins": -0.023463226854801178, - "rewards/rejected": -0.12646830081939697, - "step": 22 - }, - { - "epoch": 0.07597027250206441, - "grad_norm": 1.604570746421814, - "learning_rate": 3.822525597269625e-06, - "log_odds_chosen": -0.34233999252319336, - "log_odds_ratio": -1.0020124912261963, - "logits/chosen": -0.10082289576530457, - "logits/rejected": -0.17032966017723083, - "logps/chosen": -1.2560243606567383, - "logps/rejected": -1.0354701280593872, - "loss": 1.5196, - "nll_loss": 1.4194191694259644, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.12560243904590607, - "rewards/margins": -0.02205541357398033, - "rewards/rejected": -0.10354701429605484, - "step": 23 - }, - { - "epoch": 0.07927332782824112, - "grad_norm": 2.2825939655303955, - "learning_rate": 3.8088737201365185e-06, - "log_odds_chosen": -0.8352683782577515, - "log_odds_ratio": -1.2052345275878906, - "logits/chosen": -0.34466612339019775, - "logits/rejected": -0.13504016399383545, - "logps/chosen": -1.82598078250885, - "logps/rejected": -1.181395411491394, - "loss": 2.0952, - "nll_loss": 1.974715232849121, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.1825980693101883, - "rewards/margins": -0.06445853412151337, - "rewards/rejected": -0.11813954263925552, - "step": 24 - }, - { - "epoch": 0.08257638315441784, - "grad_norm": 1.9671064615249634, - "learning_rate": 3.795221843003413e-06, - "log_odds_chosen": -0.003498941659927368, - "log_odds_ratio": -0.7276492118835449, - "logits/chosen": 0.016146566718816757, - "logits/rejected": -0.3570386469364166, - "logps/chosen": -1.3244566917419434, - "logps/rejected": -1.3133331537246704, - "loss": 1.5365, - "nll_loss": 1.4637832641601562, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.13244566321372986, - "rewards/margins": -0.0011123623698949814, - "rewards/rejected": -0.13133332133293152, - "step": 25 - }, - { - "epoch": 0.08587943848059455, - "grad_norm": 1.3590399026870728, - "learning_rate": 3.781569965870307e-06, - "log_odds_chosen": -0.19886480271816254, - "log_odds_ratio": -0.8894391059875488, - "logits/chosen": -0.09169130027294159, - "logits/rejected": -0.14744126796722412, - "logps/chosen": -1.5443627834320068, - "logps/rejected": -1.3618804216384888, - "loss": 1.7938, - "nll_loss": 1.7048976421356201, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.15443627536296844, - "rewards/margins": -0.01824822835624218, - "rewards/rejected": -0.13618804514408112, - "step": 26 - }, - { - "epoch": 0.08918249380677126, - "grad_norm": 2.1883881092071533, - "learning_rate": 3.7679180887372015e-06, - "log_odds_chosen": -0.5189663171768188, - "log_odds_ratio": -1.0485285520553589, - "logits/chosen": -0.3165411055088043, - "logits/rejected": -0.0027812160551548004, - "logps/chosen": -1.689698576927185, - "logps/rejected": -1.2608164548873901, - "loss": 1.9414, - "nll_loss": 1.8365681171417236, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.16896985471248627, - "rewards/margins": -0.042888201773166656, - "rewards/rejected": -0.1260816603899002, - "step": 27 - }, - { - "epoch": 0.09248554913294797, - "grad_norm": 1.3411200046539307, - "learning_rate": 3.754266211604095e-06, - "log_odds_chosen": -1.0685815811157227, - "log_odds_ratio": -1.3774856328964233, - "logits/chosen": -0.3910760283470154, - "logits/rejected": 0.19439667463302612, - "logps/chosen": -1.792892336845398, - "logps/rejected": -0.9963111877441406, - "loss": 2.0545, - "nll_loss": 1.9167814254760742, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.17928922176361084, - "rewards/margins": -0.07965810596942902, - "rewards/rejected": -0.09963111579418182, - "step": 28 - }, - { - "epoch": 0.0957886044591247, - "grad_norm": 1.5361754894256592, - "learning_rate": 3.7406143344709896e-06, - "log_odds_chosen": 0.6445412635803223, - "log_odds_ratio": -0.46553248167037964, - "logits/chosen": -0.04740822687745094, - "logits/rejected": -0.24291253089904785, - "logps/chosen": -0.9259082674980164, - "logps/rejected": -1.3082175254821777, - "loss": 1.2241, - "nll_loss": 1.1775027513504028, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0925908237695694, - "rewards/margins": 0.03823093697428703, - "rewards/rejected": -0.13082176446914673, - "step": 29 - }, - { - "epoch": 0.0990916597853014, - "grad_norm": 2.747184991836548, - "learning_rate": 3.7269624573378837e-06, - "log_odds_chosen": -0.21407535672187805, - "log_odds_ratio": -0.8265931010246277, - "logits/chosen": -0.06337215006351471, - "logits/rejected": 0.05660734325647354, - "logps/chosen": -1.5289347171783447, - "logps/rejected": -1.3582427501678467, - "loss": 1.7827, - "nll_loss": 1.7000188827514648, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.15289348363876343, - "rewards/margins": -0.017069198191165924, - "rewards/rejected": -0.1358242779970169, - "step": 30 - }, - { - "epoch": 0.10239471511147812, - "grad_norm": 1.6208003759384155, - "learning_rate": 3.713310580204778e-06, - "log_odds_chosen": -0.27356743812561035, - "log_odds_ratio": -0.8871296644210815, - "logits/chosen": -0.0922427847981453, - "logits/rejected": 0.20727695524692535, - "logps/chosen": -1.3880324363708496, - "logps/rejected": -1.156721591949463, - "loss": 1.6756, - "nll_loss": 1.5869338512420654, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.13880324363708496, - "rewards/margins": -0.023131079971790314, - "rewards/rejected": -0.11567215621471405, - "step": 31 - }, - { - "epoch": 0.10569777043765483, - "grad_norm": 3.222088575363159, - "learning_rate": 3.6996587030716723e-06, - "log_odds_chosen": -0.00714544951915741, - "log_odds_ratio": -0.7292194366455078, - "logits/chosen": -0.01911495253443718, - "logits/rejected": -0.07217317074537277, - "logps/chosen": -1.2674195766448975, - "logps/rejected": -1.240881085395813, - "loss": 1.5203, - "nll_loss": 1.4474025964736938, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.12674197554588318, - "rewards/margins": -0.002653861418366432, - "rewards/rejected": -0.1240881085395813, - "step": 32 - }, - { - "epoch": 0.10900082576383155, - "grad_norm": 2.328028440475464, - "learning_rate": 3.6860068259385667e-06, - "log_odds_chosen": -0.3341265916824341, - "log_odds_ratio": -0.9037232398986816, - "logits/chosen": -0.00612066313624382, - "logits/rejected": 0.02111620083451271, - "logps/chosen": -1.3508132696151733, - "logps/rejected": -1.1107620000839233, - "loss": 1.6615, - "nll_loss": 1.571164846420288, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.13508132100105286, - "rewards/margins": -0.02400512620806694, - "rewards/rejected": -0.11107620596885681, - "step": 33 - }, - { - "epoch": 0.11230388109000826, - "grad_norm": 2.603714942932129, - "learning_rate": 3.6723549488054604e-06, - "log_odds_chosen": -0.5846607089042664, - "log_odds_ratio": -1.0745846033096313, - "logits/chosen": -0.09262420237064362, - "logits/rejected": -0.22659257054328918, - "logps/chosen": -1.6576080322265625, - "logps/rejected": -1.2060959339141846, - "loss": 1.9077, - "nll_loss": 1.8002398014068604, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.165760800242424, - "rewards/margins": -0.045151207596063614, - "rewards/rejected": -0.1206095963716507, - "step": 34 - }, - { - "epoch": 0.11560693641618497, - "grad_norm": 1.6697063446044922, - "learning_rate": 3.6587030716723545e-06, - "log_odds_chosen": 0.030604317784309387, - "log_odds_ratio": -0.7238311767578125, - "logits/chosen": -0.021191485226154327, - "logits/rejected": -0.028810784220695496, - "logps/chosen": -1.2691432237625122, - "logps/rejected": -1.289353847503662, - "loss": 1.5125, - "nll_loss": 1.4401429891586304, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.12691432237625122, - "rewards/margins": 0.002021070569753647, - "rewards/rejected": -0.12893539667129517, - "step": 35 - }, - { - "epoch": 0.11890999174236168, - "grad_norm": 1.4854084253311157, - "learning_rate": 3.645051194539249e-06, - "log_odds_chosen": -0.24459637701511383, - "log_odds_ratio": -0.8275293707847595, - "logits/chosen": -0.007174644619226456, - "logits/rejected": 0.323642373085022, - "logps/chosen": -1.2843918800354004, - "logps/rejected": -1.1100748777389526, - "loss": 1.6011, - "nll_loss": 1.51833176612854, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.12843920290470123, - "rewards/margins": -0.017431715503335, - "rewards/rejected": -0.11100748181343079, - "step": 36 - }, - { - "epoch": 0.1222130470685384, - "grad_norm": 1.3110160827636719, - "learning_rate": 3.631399317406143e-06, - "log_odds_chosen": 0.2147403061389923, - "log_odds_ratio": -0.6777112483978271, - "logits/chosen": 0.14219588041305542, - "logits/rejected": -0.04788453131914139, - "logps/chosen": -1.011021375656128, - "logps/rejected": -1.0979650020599365, - "loss": 1.2625, - "nll_loss": 1.1946935653686523, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.10110214352607727, - "rewards/margins": 0.008694373071193695, - "rewards/rejected": -0.10979650914669037, - "step": 37 - }, - { - "epoch": 0.1255161023947151, - "grad_norm": 2.543696641921997, - "learning_rate": 3.6177474402730375e-06, - "log_odds_chosen": -0.46568354964256287, - "log_odds_ratio": -1.0046223402023315, - "logits/chosen": 0.057684965431690216, - "logits/rejected": 0.019088633358478546, - "logps/chosen": -1.4504691362380981, - "logps/rejected": -1.0863721370697021, - "loss": 1.7947, - "nll_loss": 1.694282054901123, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.1450469046831131, - "rewards/margins": -0.03640969842672348, - "rewards/rejected": -0.10863721370697021, - "step": 38 - }, - { - "epoch": 0.12881915772089184, - "grad_norm": 1.487676739692688, - "learning_rate": 3.6040955631399316e-06, - "log_odds_chosen": -0.2106037437915802, - "log_odds_ratio": -0.9186347126960754, - "logits/chosen": -0.0638580173254013, - "logits/rejected": 0.16561198234558105, - "logps/chosen": -1.3079239130020142, - "logps/rejected": -1.1577972173690796, - "loss": 1.6198, - "nll_loss": 1.5279350280761719, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.13079239428043365, - "rewards/margins": -0.015012674033641815, - "rewards/rejected": -0.11577971279621124, - "step": 39 - }, - { - "epoch": 0.13212221304706853, - "grad_norm": 1.6674723625183105, - "learning_rate": 3.590443686006826e-06, - "log_odds_chosen": -0.02980533242225647, - "log_odds_ratio": -0.7470666170120239, - "logits/chosen": -0.22099313139915466, - "logits/rejected": 0.24727749824523926, - "logps/chosen": -1.1870090961456299, - "logps/rejected": -1.1325592994689941, - "loss": 1.4636, - "nll_loss": 1.3888877630233765, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.11870090663433075, - "rewards/margins": -0.005444982089102268, - "rewards/rejected": -0.11325592547655106, - "step": 40 - }, - { - "epoch": 0.13542526837324526, - "grad_norm": 0.9602457880973816, - "learning_rate": 3.5767918088737197e-06, - "log_odds_chosen": 0.15464849770069122, - "log_odds_ratio": -0.6338274478912354, - "logits/chosen": 0.08133335411548615, - "logits/rejected": -0.15244567394256592, - "logps/chosen": -1.187782883644104, - "logps/rejected": -1.2900636196136475, - "loss": 1.4829, - "nll_loss": 1.4195501804351807, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.1187782883644104, - "rewards/margins": 0.010228084400296211, - "rewards/rejected": -0.12900637090206146, - "step": 41 - }, - { - "epoch": 0.13872832369942195, - "grad_norm": 1.298644781112671, - "learning_rate": 3.563139931740614e-06, - "log_odds_chosen": 0.37367990612983704, - "log_odds_ratio": -0.5938945412635803, - "logits/chosen": 0.1457584649324417, - "logits/rejected": -0.12019777297973633, - "logps/chosen": -0.9941174983978271, - "logps/rejected": -1.217413067817688, - "loss": 1.3211, - "nll_loss": 1.2616686820983887, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.09941175580024719, - "rewards/margins": 0.022329553961753845, - "rewards/rejected": -0.12174130976200104, - "step": 42 - }, - { - "epoch": 0.14203137902559868, - "grad_norm": 2.239881753921509, - "learning_rate": 3.5494880546075083e-06, - "log_odds_chosen": 0.016376741230487823, - "log_odds_ratio": -0.6877104043960571, - "logits/chosen": -0.08168008178472519, - "logits/rejected": 0.37156015634536743, - "logps/chosen": -1.2163668870925903, - "logps/rejected": -1.2259128093719482, - "loss": 1.6268, - "nll_loss": 1.5580016374588013, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.12163668870925903, - "rewards/margins": 0.0009545907378196716, - "rewards/rejected": -0.1225912794470787, - "step": 43 - }, - { - "epoch": 0.1453344343517754, - "grad_norm": 1.5388121604919434, - "learning_rate": 3.5358361774744028e-06, - "log_odds_chosen": 0.21293184161186218, - "log_odds_ratio": -0.6478387117385864, - "logits/chosen": 0.13047321140766144, - "logits/rejected": -0.20692084729671478, - "logps/chosen": -1.218898892402649, - "logps/rejected": -1.3630651235580444, - "loss": 1.4907, - "nll_loss": 1.425947666168213, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.12188988924026489, - "rewards/margins": 0.014416629448533058, - "rewards/rejected": -0.1363065242767334, - "step": 44 - }, - { - "epoch": 0.1486374896779521, - "grad_norm": 1.5294818878173828, - "learning_rate": 3.522184300341297e-06, - "log_odds_chosen": -0.7696317434310913, - "log_odds_ratio": -1.1844816207885742, - "logits/chosen": -0.11731045693159103, - "logits/rejected": 0.03397466614842415, - "logps/chosen": -1.7747812271118164, - "logps/rejected": -1.1698225736618042, - "loss": 2.0263, - "nll_loss": 1.9078510999679565, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.1774781346321106, - "rewards/margins": -0.06049586087465286, - "rewards/rejected": -0.11698225885629654, - "step": 45 - }, - { - "epoch": 0.15194054500412882, - "grad_norm": 1.614054560661316, - "learning_rate": 3.508532423208191e-06, - "log_odds_chosen": -0.1655791997909546, - "log_odds_ratio": -0.7934306859970093, - "logits/chosen": 0.10322938859462738, - "logits/rejected": 0.1284104287624359, - "logps/chosen": -1.3065404891967773, - "logps/rejected": -1.1828889846801758, - "loss": 1.6249, - "nll_loss": 1.5455988645553589, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.13065403699874878, - "rewards/margins": -0.012365149334073067, - "rewards/rejected": -0.11828890442848206, - "step": 46 - }, - { - "epoch": 0.15524360033030554, - "grad_norm": 1.1637400388717651, - "learning_rate": 3.4948805460750854e-06, - "log_odds_chosen": -0.19159258902072906, - "log_odds_ratio": -0.8494549989700317, - "logits/chosen": 0.202142596244812, - "logits/rejected": 0.3397216796875, - "logps/chosen": -1.171367883682251, - "logps/rejected": -1.0078306198120117, - "loss": 1.4706, - "nll_loss": 1.3856534957885742, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.11713679134845734, - "rewards/margins": -0.016353726387023926, - "rewards/rejected": -0.10078307241201401, - "step": 47 - }, - { - "epoch": 0.15854665565648224, - "grad_norm": 2.3482017517089844, - "learning_rate": 3.481228668941979e-06, - "log_odds_chosen": 0.18030907213687897, - "log_odds_ratio": -0.7781872153282166, - "logits/chosen": 0.08281873166561127, - "logits/rejected": -0.1745750606060028, - "logps/chosen": -1.2742396593093872, - "logps/rejected": -1.2624688148498535, - "loss": 1.559, - "nll_loss": 1.4812242984771729, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.1274239718914032, - "rewards/margins": -0.0011770911514759064, - "rewards/rejected": -0.1262468695640564, - "step": 48 - }, - { - "epoch": 0.16184971098265896, - "grad_norm": 1.387762188911438, - "learning_rate": 3.4675767918088735e-06, - "log_odds_chosen": 0.3673129081726074, - "log_odds_ratio": -0.6305878162384033, - "logits/chosen": 0.2012062668800354, - "logits/rejected": -0.013118837028741837, - "logps/chosen": -0.9903988242149353, - "logps/rejected": -1.1660479307174683, - "loss": 1.2736, - "nll_loss": 1.2104918956756592, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.09903987497091293, - "rewards/margins": 0.017564916983246803, - "rewards/rejected": -0.11660479754209518, - "step": 49 - }, - { - "epoch": 0.16515276630883569, - "grad_norm": 1.5532907247543335, - "learning_rate": 3.4539249146757676e-06, - "log_odds_chosen": -0.0854019895195961, - "log_odds_ratio": -0.7372137308120728, - "logits/chosen": 0.2503063678741455, - "logits/rejected": 0.18286439776420593, - "logps/chosen": -1.0614218711853027, - "logps/rejected": -1.0076258182525635, - "loss": 1.3841, - "nll_loss": 1.3103489875793457, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.10614218562841415, - "rewards/margins": -0.005379604175686836, - "rewards/rejected": -0.10076258331537247, - "step": 50 - }, - { - "epoch": 0.16845582163501238, - "grad_norm": 1.0156859159469604, - "learning_rate": 3.440273037542662e-06, - "log_odds_chosen": 0.15012328326702118, - "log_odds_ratio": -0.6443244218826294, - "logits/chosen": 0.40128853917121887, - "logits/rejected": 0.11220703274011612, - "logps/chosen": -0.9645686149597168, - "logps/rejected": -1.0516890287399292, - "loss": 1.3232, - "nll_loss": 1.258718729019165, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0964568629860878, - "rewards/margins": 0.008712041191756725, - "rewards/rejected": -0.1051689013838768, - "step": 51 - }, - { - "epoch": 0.1717588769611891, - "grad_norm": 2.3944637775421143, - "learning_rate": 3.426621160409556e-06, - "log_odds_chosen": -0.45090052485466003, - "log_odds_ratio": -0.9613892436027527, - "logits/chosen": 0.09633634984493256, - "logits/rejected": 0.0037075355648994446, - "logps/chosen": -1.3988513946533203, - "logps/rejected": -1.072731852531433, - "loss": 1.6967, - "nll_loss": 1.6005160808563232, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.13988515734672546, - "rewards/margins": -0.03261195123195648, - "rewards/rejected": -0.10727319121360779, - "step": 52 - }, - { - "epoch": 0.1750619322873658, - "grad_norm": 1.168386697769165, - "learning_rate": 3.4129692832764506e-06, - "log_odds_chosen": 0.5482217669487, - "log_odds_ratio": -0.4816707670688629, - "logits/chosen": 0.2505857050418854, - "logits/rejected": 0.11532285809516907, - "logps/chosen": -0.9029267430305481, - "logps/rejected": -1.2398170232772827, - "loss": 1.2415, - "nll_loss": 1.1933367252349854, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.09029267728328705, - "rewards/margins": 0.033689022064208984, - "rewards/rejected": -0.12398170679807663, - "step": 53 - }, - { - "epoch": 0.17836498761354252, - "grad_norm": 1.3717758655548096, - "learning_rate": 3.3993174061433447e-06, - "log_odds_chosen": 0.4121208190917969, - "log_odds_ratio": -0.5163588523864746, - "logits/chosen": 0.24769145250320435, - "logits/rejected": -0.020016469061374664, - "logps/chosen": -0.9984882473945618, - "logps/rejected": -1.2739909887313843, - "loss": 1.2964, - "nll_loss": 1.2447164058685303, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.09984882175922394, - "rewards/margins": 0.027550268918275833, - "rewards/rejected": -0.12739908695220947, - "step": 54 - }, - { - "epoch": 0.18166804293971925, - "grad_norm": 1.3395096063613892, - "learning_rate": 3.3856655290102388e-06, - "log_odds_chosen": 0.4934118986129761, - "log_odds_ratio": -0.5204886794090271, - "logits/chosen": 0.33327436447143555, - "logits/rejected": 0.06457202881574631, - "logps/chosen": -0.8350604772567749, - "logps/rejected": -1.127985954284668, - "loss": 1.2058, - "nll_loss": 1.1537377834320068, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08350604772567749, - "rewards/margins": 0.029292549937963486, - "rewards/rejected": -0.11279859393835068, - "step": 55 - }, - { - "epoch": 0.18497109826589594, - "grad_norm": 0.9853953123092651, - "learning_rate": 3.372013651877133e-06, - "log_odds_chosen": 0.678905189037323, - "log_odds_ratio": -0.510511040687561, - "logits/chosen": 0.30492129921913147, - "logits/rejected": -0.18459868431091309, - "logps/chosen": -0.9296345710754395, - "logps/rejected": -1.2819163799285889, - "loss": 1.2198, - "nll_loss": 1.1687099933624268, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.09296346455812454, - "rewards/margins": 0.0352281779050827, - "rewards/rejected": -0.12819163501262665, - "step": 56 - }, - { - "epoch": 0.18827415359207267, - "grad_norm": 1.602625846862793, - "learning_rate": 3.358361774744027e-06, - "log_odds_chosen": -0.20913547277450562, - "log_odds_ratio": -0.8255297541618347, - "logits/chosen": 0.24480052292346954, - "logits/rejected": 0.023934409022331238, - "logps/chosen": -1.2878193855285645, - "logps/rejected": -1.1472501754760742, - "loss": 1.6427, - "nll_loss": 1.5601569414138794, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.12878192961215973, - "rewards/margins": -0.014056919142603874, - "rewards/rejected": -0.1147250160574913, - "step": 57 - }, - { - "epoch": 0.1915772089182494, - "grad_norm": 1.5717257261276245, - "learning_rate": 3.3447098976109214e-06, - "log_odds_chosen": 0.31682324409484863, - "log_odds_ratio": -0.5778417587280273, - "logits/chosen": 0.29345977306365967, - "logits/rejected": -0.06646635383367538, - "logps/chosen": -0.8851097226142883, - "logps/rejected": -1.0440860986709595, - "loss": 1.1914, - "nll_loss": 1.1336610317230225, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08851097524166107, - "rewards/margins": 0.015897639095783234, - "rewards/rejected": -0.10440860688686371, - "step": 58 - }, - { - "epoch": 0.1948802642444261, - "grad_norm": 1.311073660850525, - "learning_rate": 3.3310580204778155e-06, - "log_odds_chosen": 0.3530902564525604, - "log_odds_ratio": -0.558120846748352, - "logits/chosen": 0.3703135848045349, - "logits/rejected": 0.11563435941934586, - "logps/chosen": -0.8639000058174133, - "logps/rejected": -1.0713303089141846, - "loss": 1.187, - "nll_loss": 1.131213665008545, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08639000356197357, - "rewards/margins": 0.020743027329444885, - "rewards/rejected": -0.10713303089141846, - "step": 59 - }, - { - "epoch": 0.1981833195706028, - "grad_norm": 1.425840973854065, - "learning_rate": 3.31740614334471e-06, - "log_odds_chosen": -0.09280906617641449, - "log_odds_ratio": -0.7923089265823364, - "logits/chosen": 0.1388135701417923, - "logits/rejected": 0.006213553249835968, - "logps/chosen": -1.249396562576294, - "logps/rejected": -1.1770614385604858, - "loss": 1.6631, - "nll_loss": 1.5838831663131714, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.12493965029716492, - "rewards/margins": -0.007233509793877602, - "rewards/rejected": -0.11770614236593246, - "step": 60 - }, - { - "epoch": 0.2014863748967795, - "grad_norm": 1.3393603563308716, - "learning_rate": 3.3037542662116036e-06, - "log_odds_chosen": 0.013612426817417145, - "log_odds_ratio": -0.7091065645217896, - "logits/chosen": 0.22753876447677612, - "logits/rejected": 0.11159134656190872, - "logps/chosen": -1.1189181804656982, - "logps/rejected": -1.1187776327133179, - "loss": 1.4101, - "nll_loss": 1.339219331741333, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.11189180612564087, - "rewards/margins": -1.4049932360649109e-05, - "rewards/rejected": -0.11187776178121567, - "step": 61 - }, - { - "epoch": 0.20478943022295623, - "grad_norm": 1.471448540687561, - "learning_rate": 3.290102389078498e-06, - "log_odds_chosen": -0.23059682548046112, - "log_odds_ratio": -0.8621479272842407, - "logits/chosen": 0.3145146369934082, - "logits/rejected": -0.17347364127635956, - "logps/chosen": -1.1732659339904785, - "logps/rejected": -0.9960653781890869, - "loss": 1.4586, - "nll_loss": 1.3724157810211182, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.11732660233974457, - "rewards/margins": -0.017720066010951996, - "rewards/rejected": -0.09960653632879257, - "step": 62 - }, - { - "epoch": 0.20809248554913296, - "grad_norm": 1.3989598751068115, - "learning_rate": 3.276450511945392e-06, - "log_odds_chosen": 0.44106149673461914, - "log_odds_ratio": -0.5329040288925171, - "logits/chosen": 0.4486686885356903, - "logits/rejected": -0.04529336839914322, - "logps/chosen": -1.0107343196868896, - "logps/rejected": -1.314638614654541, - "loss": 1.3858, - "nll_loss": 1.3324730396270752, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.10107343643903732, - "rewards/margins": 0.030390417203307152, - "rewards/rejected": -0.13146385550498962, - "step": 63 - }, - { - "epoch": 0.21139554087530965, - "grad_norm": 1.1700671911239624, - "learning_rate": 3.2627986348122866e-06, - "log_odds_chosen": 0.6254209876060486, - "log_odds_ratio": -0.4567137062549591, - "logits/chosen": 0.3980596661567688, - "logits/rejected": 0.06197816878557205, - "logps/chosen": -0.7298104763031006, - "logps/rejected": -1.082977294921875, - "loss": 1.1067, - "nll_loss": 1.0610079765319824, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07298105210065842, - "rewards/margins": 0.03531668335199356, - "rewards/rejected": -0.10829772800207138, - "step": 64 - }, - { - "epoch": 0.21469859620148637, - "grad_norm": 0.9773806929588318, - "learning_rate": 3.2491467576791807e-06, - "log_odds_chosen": 0.2239941507577896, - "log_odds_ratio": -0.6214407682418823, - "logits/chosen": 0.27695244550704956, - "logits/rejected": -0.10752972215414047, - "logps/chosen": -1.0057926177978516, - "logps/rejected": -1.1450005769729614, - "loss": 1.2884, - "nll_loss": 1.2262191772460938, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.10057926923036575, - "rewards/margins": 0.013920795172452927, - "rewards/rejected": -0.11450006067752838, - "step": 65 - }, - { - "epoch": 0.2180016515276631, - "grad_norm": 1.2681691646575928, - "learning_rate": 3.235494880546075e-06, - "log_odds_chosen": -0.03643546998500824, - "log_odds_ratio": -0.7387456893920898, - "logits/chosen": 0.3773193955421448, - "logits/rejected": 0.13032642006874084, - "logps/chosen": -1.0140798091888428, - "logps/rejected": -0.9628314971923828, - "loss": 1.3563, - "nll_loss": 1.28244948387146, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.1014079824090004, - "rewards/margins": -0.005124838091433048, - "rewards/rejected": -0.09628315269947052, - "step": 66 - }, - { - "epoch": 0.2213047068538398, - "grad_norm": 1.3886656761169434, - "learning_rate": 3.2218430034129693e-06, - "log_odds_chosen": 0.4924757182598114, - "log_odds_ratio": -0.4842131733894348, - "logits/chosen": 0.2824667990207672, - "logits/rejected": 0.19784978032112122, - "logps/chosen": -0.858705461025238, - "logps/rejected": -1.1593170166015625, - "loss": 1.2296, - "nll_loss": 1.181227445602417, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08587054908275604, - "rewards/margins": 0.030061157420277596, - "rewards/rejected": -0.11593170464038849, - "step": 67 - }, - { - "epoch": 0.22460776218001652, - "grad_norm": 1.2738990783691406, - "learning_rate": 3.2081911262798638e-06, - "log_odds_chosen": 0.40338289737701416, - "log_odds_ratio": -0.5863211750984192, - "logits/chosen": 0.5026859045028687, - "logits/rejected": 0.13951237499713898, - "logps/chosen": -0.8762617111206055, - "logps/rejected": -1.1232064962387085, - "loss": 1.1882, - "nll_loss": 1.129590392112732, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08762617409229279, - "rewards/margins": 0.02469448931515217, - "rewards/rejected": -0.1123206615447998, - "step": 68 - }, - { - "epoch": 0.22791081750619324, - "grad_norm": 1.3676021099090576, - "learning_rate": 3.1945392491467574e-06, - "log_odds_chosen": 0.48370876908302307, - "log_odds_ratio": -0.5044476389884949, - "logits/chosen": 0.4093031883239746, - "logits/rejected": -0.1596689522266388, - "logps/chosen": -0.8799825310707092, - "logps/rejected": -1.2003448009490967, - "loss": 1.1814, - "nll_loss": 1.130968451499939, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08799825608730316, - "rewards/margins": 0.032036229968070984, - "rewards/rejected": -0.12003448605537415, - "step": 69 - }, - { - "epoch": 0.23121387283236994, - "grad_norm": 1.2153899669647217, - "learning_rate": 3.1808873720136515e-06, - "log_odds_chosen": 0.22648712992668152, - "log_odds_ratio": -0.7219368815422058, - "logits/chosen": 0.3737250864505768, - "logits/rejected": 0.04505196586251259, - "logps/chosen": -0.8673282861709595, - "logps/rejected": -1.02438223361969, - "loss": 1.1814, - "nll_loss": 1.10923171043396, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08673283457756042, - "rewards/margins": 0.01570538990199566, - "rewards/rejected": -0.10243822634220123, - "step": 70 - }, - { - "epoch": 0.23451692815854666, - "grad_norm": 0.6360374689102173, - "learning_rate": 3.167235494880546e-06, - "log_odds_chosen": 1.2540688514709473, - "log_odds_ratio": -0.303214967250824, - "logits/chosen": 0.525089681148529, - "logits/rejected": -0.017755810171365738, - "logps/chosen": -0.5769282579421997, - "logps/rejected": -1.2423179149627686, - "loss": 0.8803, - "nll_loss": 0.8500240445137024, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05769282951951027, - "rewards/margins": 0.06653895974159241, - "rewards/rejected": -0.12423178553581238, - "step": 71 - }, - { - "epoch": 0.23781998348472336, - "grad_norm": 0.8371473550796509, - "learning_rate": 3.15358361774744e-06, - "log_odds_chosen": 1.0189234018325806, - "log_odds_ratio": -0.3158828616142273, - "logits/chosen": 0.4066252112388611, - "logits/rejected": -0.12710878252983093, - "logps/chosen": -0.6726198792457581, - "logps/rejected": -1.2981152534484863, - "loss": 1.0054, - "nll_loss": 0.973814845085144, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06726198643445969, - "rewards/margins": 0.06254953145980835, - "rewards/rejected": -0.12981152534484863, - "step": 72 - }, - { - "epoch": 0.24112303881090008, - "grad_norm": 1.2844346761703491, - "learning_rate": 3.1399317406143345e-06, - "log_odds_chosen": 0.2540326416492462, - "log_odds_ratio": -0.5854588747024536, - "logits/chosen": 0.35007244348526, - "logits/rejected": 0.2108241319656372, - "logps/chosen": -0.8633500933647156, - "logps/rejected": -1.0183669328689575, - "loss": 1.2038, - "nll_loss": 1.1452605724334717, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08633501082658768, - "rewards/margins": 0.015501681715250015, - "rewards/rejected": -0.10183669626712799, - "step": 73 - }, - { - "epoch": 0.2444260941370768, - "grad_norm": 0.7940986156463623, - "learning_rate": 3.1262798634812286e-06, - "log_odds_chosen": 1.001118779182434, - "log_odds_ratio": -0.3670634925365448, - "logits/chosen": 0.4843432903289795, - "logits/rejected": 0.06729845702648163, - "logps/chosen": -0.6586004495620728, - "logps/rejected": -1.1920722723007202, - "loss": 1.036, - "nll_loss": 0.9993428587913513, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06586004793643951, - "rewards/margins": 0.053347185254096985, - "rewards/rejected": -0.1192072331905365, - "step": 74 - }, - { - "epoch": 0.2477291494632535, - "grad_norm": 1.2507742643356323, - "learning_rate": 3.1126279863481226e-06, - "log_odds_chosen": 0.12684106826782227, - "log_odds_ratio": -0.656596839427948, - "logits/chosen": 0.3413503170013428, - "logits/rejected": 0.18167835474014282, - "logps/chosen": -1.048732042312622, - "logps/rejected": -1.1256974935531616, - "loss": 1.3611, - "nll_loss": 1.2954506874084473, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.10487319529056549, - "rewards/margins": 0.007696554064750671, - "rewards/rejected": -0.11256975680589676, - "step": 75 - }, - { - "epoch": 0.2510322047894302, - "grad_norm": 1.23664391040802, - "learning_rate": 3.0989761092150167e-06, - "log_odds_chosen": 0.006515480577945709, - "log_odds_ratio": -0.7416260242462158, - "logits/chosen": 0.34474775195121765, - "logits/rejected": 0.15199719369411469, - "logps/chosen": -1.0519925355911255, - "logps/rejected": -1.0338687896728516, - "loss": 1.3727, - "nll_loss": 1.2985050678253174, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.10519925504922867, - "rewards/margins": -0.001812376081943512, - "rewards/rejected": -0.10338687896728516, - "step": 76 - }, - { - "epoch": 0.2543352601156069, - "grad_norm": 1.4781817197799683, - "learning_rate": 3.085324232081911e-06, - "log_odds_chosen": 0.5096539855003357, - "log_odds_ratio": -0.5132606029510498, - "logits/chosen": 0.26825007796287537, - "logits/rejected": -0.017844028770923615, - "logps/chosen": -0.9370253086090088, - "logps/rejected": -1.255677342414856, - "loss": 1.2938, - "nll_loss": 1.2424519062042236, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.093702532351017, - "rewards/margins": 0.031865209341049194, - "rewards/rejected": -0.1255677342414856, - "step": 77 - }, - { - "epoch": 0.25763831544178367, - "grad_norm": 1.016451120376587, - "learning_rate": 3.0716723549488053e-06, - "log_odds_chosen": 0.668674647808075, - "log_odds_ratio": -0.4263528883457184, - "logits/chosen": 0.33459198474884033, - "logits/rejected": 0.013969972729682922, - "logps/chosen": -0.7937272787094116, - "logps/rejected": -1.180464267730713, - "loss": 1.1436, - "nll_loss": 1.100918173789978, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07937273383140564, - "rewards/margins": 0.03867369890213013, - "rewards/rejected": -0.11804643273353577, - "step": 78 - }, - { - "epoch": 0.26094137076796037, - "grad_norm": 0.8388687968254089, - "learning_rate": 3.0580204778156998e-06, - "log_odds_chosen": 0.5917883515357971, - "log_odds_ratio": -0.4903685450553894, - "logits/chosen": 0.4077008366584778, - "logits/rejected": 0.1414472609758377, - "logps/chosen": -0.8088873028755188, - "logps/rejected": -1.131054401397705, - "loss": 1.1003, - "nll_loss": 1.0512502193450928, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08088873326778412, - "rewards/margins": 0.03221671283245087, - "rewards/rejected": -0.11310544610023499, - "step": 79 - }, - { - "epoch": 0.26424442609413706, - "grad_norm": 0.8946303129196167, - "learning_rate": 3.044368600682594e-06, - "log_odds_chosen": 0.4237121641635895, - "log_odds_ratio": -0.5523289442062378, - "logits/chosen": 0.4203716516494751, - "logits/rejected": 0.0446341335773468, - "logps/chosen": -0.932843804359436, - "logps/rejected": -1.1971027851104736, - "loss": 1.239, - "nll_loss": 1.1837332248687744, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.09328439086675644, - "rewards/margins": 0.026425888761878014, - "rewards/rejected": -0.1197102814912796, - "step": 80 - }, - { - "epoch": 0.2675474814203138, - "grad_norm": 1.2848907709121704, - "learning_rate": 3.030716723549488e-06, - "log_odds_chosen": 0.12938204407691956, - "log_odds_ratio": -0.6383733749389648, - "logits/chosen": 0.45814579725265503, - "logits/rejected": 0.007169784978032112, - "logps/chosen": -0.943265438079834, - "logps/rejected": -1.0218572616577148, - "loss": 1.2972, - "nll_loss": 1.2333593368530273, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.09432655572891235, - "rewards/margins": 0.007859177887439728, - "rewards/rejected": -0.10218573361635208, - "step": 81 - }, - { - "epoch": 0.2708505367464905, - "grad_norm": 1.1255342960357666, - "learning_rate": 3.017064846416382e-06, - "log_odds_chosen": 0.12974008917808533, - "log_odds_ratio": -0.6517421007156372, - "logits/chosen": 0.2315722107887268, - "logits/rejected": 0.1167377233505249, - "logps/chosen": -1.0166023969650269, - "logps/rejected": -1.1003549098968506, - "loss": 1.3907, - "nll_loss": 1.325517177581787, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.10166024416685104, - "rewards/margins": 0.008375251665711403, - "rewards/rejected": -0.1100354939699173, - "step": 82 - }, - { - "epoch": 0.2741535920726672, - "grad_norm": 1.1137655973434448, - "learning_rate": 3.003412969283276e-06, - "log_odds_chosen": -0.6862425804138184, - "log_odds_ratio": -1.2488682270050049, - "logits/chosen": 0.33115825057029724, - "logits/rejected": 0.03100370615720749, - "logps/chosen": -1.0604417324066162, - "logps/rejected": -0.7394245266914368, - "loss": 1.4168, - "nll_loss": 1.29189133644104, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.10604417324066162, - "rewards/margins": -0.03210172802209854, - "rewards/rejected": -0.07394245266914368, - "step": 83 - }, - { - "epoch": 0.2774566473988439, - "grad_norm": 1.1524357795715332, - "learning_rate": 2.9897610921501705e-06, - "log_odds_chosen": 0.5086053609848022, - "log_odds_ratio": -0.49087783694267273, - "logits/chosen": 0.5789463520050049, - "logits/rejected": -0.05085635557770729, - "logps/chosen": -0.7870698571205139, - "logps/rejected": -1.11079740524292, - "loss": 1.1111, - "nll_loss": 1.0620100498199463, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07870698720216751, - "rewards/margins": 0.03237275034189224, - "rewards/rejected": -0.11107973754405975, - "step": 84 - }, - { - "epoch": 0.28075970272502065, - "grad_norm": 0.8835132718086243, - "learning_rate": 2.9761092150170646e-06, - "log_odds_chosen": 0.7111002206802368, - "log_odds_ratio": -0.4590810537338257, - "logits/chosen": 0.3972591161727905, - "logits/rejected": 0.19307100772857666, - "logps/chosen": -0.7318575978279114, - "logps/rejected": -1.1309598684310913, - "loss": 1.075, - "nll_loss": 1.029056429862976, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0731857568025589, - "rewards/margins": 0.03991022706031799, - "rewards/rejected": -0.11309598386287689, - "step": 85 - }, - { - "epoch": 0.28406275805119735, - "grad_norm": 1.279604196548462, - "learning_rate": 2.962457337883959e-06, - "log_odds_chosen": 0.08707094192504883, - "log_odds_ratio": -0.6983841061592102, - "logits/chosen": 0.32801157236099243, - "logits/rejected": 0.11828978359699249, - "logps/chosen": -0.9827994704246521, - "logps/rejected": -1.000153660774231, - "loss": 1.313, - "nll_loss": 1.2431302070617676, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.09827995300292969, - "rewards/margins": 0.0017354153096675873, - "rewards/rejected": -0.10001536458730698, - "step": 86 - }, - { - "epoch": 0.28736581337737405, - "grad_norm": 1.1214733123779297, - "learning_rate": 2.948805460750853e-06, - "log_odds_chosen": 0.18517102301120758, - "log_odds_ratio": -0.646797239780426, - "logits/chosen": 0.43594810366630554, - "logits/rejected": -0.06761805713176727, - "logps/chosen": -1.0289931297302246, - "logps/rejected": -1.1406700611114502, - "loss": 1.331, - "nll_loss": 1.2662867307662964, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.10289931297302246, - "rewards/margins": 0.01116769015789032, - "rewards/rejected": -0.11406701058149338, - "step": 87 - }, - { - "epoch": 0.2906688687035508, - "grad_norm": 0.7855659127235413, - "learning_rate": 2.9351535836177476e-06, - "log_odds_chosen": 0.5958869457244873, - "log_odds_ratio": -0.4478607177734375, - "logits/chosen": 0.682604193687439, - "logits/rejected": 0.3858082890510559, - "logps/chosen": -0.7105749249458313, - "logps/rejected": -1.044779658317566, - "loss": 1.0961, - "nll_loss": 1.0513228178024292, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07105749100446701, - "rewards/margins": 0.03342047706246376, - "rewards/rejected": -0.10447797179222107, - "step": 88 - }, - { - "epoch": 0.2939719240297275, - "grad_norm": 0.7172762155532837, - "learning_rate": 2.9215017064846413e-06, - "log_odds_chosen": -0.02115604281425476, - "log_odds_ratio": -0.757754921913147, - "logits/chosen": 0.5184760093688965, - "logits/rejected": 0.06454671919345856, - "logps/chosen": -0.7995362281799316, - "logps/rejected": -0.8837530016899109, - "loss": 1.1953, - "nll_loss": 1.1195471286773682, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0799536257982254, - "rewards/margins": 0.008421673439443111, - "rewards/rejected": -0.08837530016899109, - "step": 89 - }, - { - "epoch": 0.2972749793559042, - "grad_norm": 0.9205667972564697, - "learning_rate": 2.9078498293515358e-06, - "log_odds_chosen": 0.335008442401886, - "log_odds_ratio": -0.5804576277732849, - "logits/chosen": 0.30128976702690125, - "logits/rejected": 0.25877803564071655, - "logps/chosen": -0.860328733921051, - "logps/rejected": -1.056859016418457, - "loss": 1.221, - "nll_loss": 1.1629877090454102, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08603286743164062, - "rewards/margins": 0.01965303160250187, - "rewards/rejected": -0.10568590462207794, - "step": 90 - }, - { - "epoch": 0.30057803468208094, - "grad_norm": 1.1853687763214111, - "learning_rate": 2.89419795221843e-06, - "log_odds_chosen": 0.07484885305166245, - "log_odds_ratio": -0.7150307893753052, - "logits/chosen": 0.5426656603813171, - "logits/rejected": 0.2499132603406906, - "logps/chosen": -1.0105516910552979, - "logps/rejected": -1.0506744384765625, - "loss": 1.3738, - "nll_loss": 1.3022829294204712, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.10105516016483307, - "rewards/margins": 0.004012288525700569, - "rewards/rejected": -0.10506745427846909, - "step": 91 - }, - { - "epoch": 0.30388109000825764, - "grad_norm": 0.8514940142631531, - "learning_rate": 2.8805460750853243e-06, - "log_odds_chosen": 0.5499486327171326, - "log_odds_ratio": -0.4569864273071289, - "logits/chosen": 0.6064792275428772, - "logits/rejected": 0.08982174098491669, - "logps/chosen": -0.7721064686775208, - "logps/rejected": -1.1043424606323242, - "loss": 1.1563, - "nll_loss": 1.1105719804763794, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07721064984798431, - "rewards/margins": 0.03322359174489975, - "rewards/rejected": -0.11043424159288406, - "step": 92 - }, - { - "epoch": 0.30718414533443433, - "grad_norm": 0.8174107670783997, - "learning_rate": 2.8668941979522184e-06, - "log_odds_chosen": -0.19816720485687256, - "log_odds_ratio": -0.8099085092544556, - "logits/chosen": 0.5801234245300293, - "logits/rejected": 0.35534408688545227, - "logps/chosen": -1.0743424892425537, - "logps/rejected": -0.9395486116409302, - "loss": 1.3831, - "nll_loss": 1.302116870880127, - "rewards/accuracies": 0.25, - "rewards/chosen": -0.10743425786495209, - "rewards/margins": -0.013479400426149368, - "rewards/rejected": -0.09395486116409302, - "step": 93 - }, - { - "epoch": 0.3104872006606111, - "grad_norm": 0.8138275742530823, - "learning_rate": 2.8532423208191125e-06, - "log_odds_chosen": 0.7817922830581665, - "log_odds_ratio": -0.4440228044986725, - "logits/chosen": 0.4808114767074585, - "logits/rejected": 0.15742731094360352, - "logps/chosen": -0.7326540946960449, - "logps/rejected": -1.1574732065200806, - "loss": 1.0265, - "nll_loss": 0.9820656180381775, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07326541095972061, - "rewards/margins": 0.0424819141626358, - "rewards/rejected": -0.11574732512235641, - "step": 94 - }, - { - "epoch": 0.3137902559867878, - "grad_norm": 0.8577831387519836, - "learning_rate": 2.839590443686007e-06, - "log_odds_chosen": 0.4487288296222687, - "log_odds_ratio": -0.495725154876709, - "logits/chosen": 0.39521241188049316, - "logits/rejected": -0.06009801849722862, - "logps/chosen": -0.9034541845321655, - "logps/rejected": -1.1937315464019775, - "loss": 1.2667, - "nll_loss": 1.2171058654785156, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.09034541994333267, - "rewards/margins": 0.029027728363871574, - "rewards/rejected": -0.1193731427192688, - "step": 95 - }, - { - "epoch": 0.3170933113129645, - "grad_norm": 0.7311953902244568, - "learning_rate": 2.8259385665529006e-06, - "log_odds_chosen": 0.40546202659606934, - "log_odds_ratio": -0.5469982028007507, - "logits/chosen": 0.5457305312156677, - "logits/rejected": 0.2208119034767151, - "logps/chosen": -0.7815107703208923, - "logps/rejected": -0.9772369265556335, - "loss": 1.1189, - "nll_loss": 1.0642268657684326, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07815107703208923, - "rewards/margins": 0.019572610035538673, - "rewards/rejected": -0.09772369265556335, - "step": 96 - }, - { - "epoch": 0.32039636663914123, - "grad_norm": 0.8934459686279297, - "learning_rate": 2.812286689419795e-06, - "log_odds_chosen": 0.5334745645523071, - "log_odds_ratio": -0.47480255365371704, - "logits/chosen": 0.5572829246520996, - "logits/rejected": 0.33124563097953796, - "logps/chosen": -0.7753319144248962, - "logps/rejected": -1.1118357181549072, - "loss": 1.1156, - "nll_loss": 1.068089485168457, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07753318548202515, - "rewards/margins": 0.03365037217736244, - "rewards/rejected": -0.11118356883525848, - "step": 97 - }, - { - "epoch": 0.3236994219653179, - "grad_norm": 0.7352209091186523, - "learning_rate": 2.798634812286689e-06, - "log_odds_chosen": 0.9403247833251953, - "log_odds_ratio": -0.33582523465156555, - "logits/chosen": 0.6253050565719604, - "logits/rejected": -0.1108190268278122, - "logps/chosen": -0.6143428683280945, - "logps/rejected": -1.1553306579589844, - "loss": 0.9359, - "nll_loss": 0.9023321866989136, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06143428385257721, - "rewards/margins": 0.05409877747297287, - "rewards/rejected": -0.11553306877613068, - "step": 98 - }, - { - "epoch": 0.3270024772914946, - "grad_norm": 0.895953357219696, - "learning_rate": 2.7849829351535836e-06, - "log_odds_chosen": 1.2954692840576172, - "log_odds_ratio": -0.3093319535255432, - "logits/chosen": 0.49900615215301514, - "logits/rejected": -0.18668656051158905, - "logps/chosen": -0.5909051895141602, - "logps/rejected": -1.213058590888977, - "loss": 0.9142, - "nll_loss": 0.883228063583374, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.059090521186590195, - "rewards/margins": 0.06221533566713333, - "rewards/rejected": -0.12130585312843323, - "step": 99 - }, - { - "epoch": 0.33030553261767137, - "grad_norm": 0.6220764517784119, - "learning_rate": 2.7713310580204777e-06, - "log_odds_chosen": 0.5481612682342529, - "log_odds_ratio": -0.48265036940574646, - "logits/chosen": 0.431689977645874, - "logits/rejected": 0.4104776978492737, - "logps/chosen": -0.7251861691474915, - "logps/rejected": -1.0164110660552979, - "loss": 1.0365, - "nll_loss": 0.9882212281227112, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07251861691474915, - "rewards/margins": 0.02912249229848385, - "rewards/rejected": -0.10164111852645874, - "step": 100 - }, - { - "epoch": 0.33360858794384807, - "grad_norm": 0.6763557195663452, - "learning_rate": 2.757679180887372e-06, - "log_odds_chosen": 0.4268907904624939, - "log_odds_ratio": -0.5276908874511719, - "logits/chosen": 0.530775249004364, - "logits/rejected": -0.13289478421211243, - "logps/chosen": -0.7357723116874695, - "logps/rejected": -0.977557897567749, - "loss": 1.0852, - "nll_loss": 1.0323927402496338, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07357723265886307, - "rewards/margins": 0.024178560823202133, - "rewards/rejected": -0.0977557897567749, - "step": 101 - }, - { - "epoch": 0.33691164327002476, - "grad_norm": 0.7945391535758972, - "learning_rate": 2.744027303754266e-06, - "log_odds_chosen": 0.9158729910850525, - "log_odds_ratio": -0.3650895357131958, - "logits/chosen": 0.6995807886123657, - "logits/rejected": -0.15730887651443481, - "logps/chosen": -0.6724094748497009, - "logps/rejected": -1.2214854955673218, - "loss": 0.9728, - "nll_loss": 0.9362743496894836, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06724094599485397, - "rewards/margins": 0.054907605051994324, - "rewards/rejected": -0.1221485510468483, - "step": 102 - }, - { - "epoch": 0.34021469859620146, - "grad_norm": 1.2288607358932495, - "learning_rate": 2.7303754266211603e-06, - "log_odds_chosen": 0.5417200326919556, - "log_odds_ratio": -0.4730820655822754, - "logits/chosen": 0.6150412559509277, - "logits/rejected": -0.1422542780637741, - "logps/chosen": -0.7383185625076294, - "logps/rejected": -1.0598341226577759, - "loss": 1.048, - "nll_loss": 1.0007333755493164, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07383185625076294, - "rewards/margins": 0.032151561230421066, - "rewards/rejected": -0.10598340630531311, - "step": 103 - }, - { - "epoch": 0.3435177539223782, - "grad_norm": 0.7345607280731201, - "learning_rate": 2.7167235494880544e-06, - "log_odds_chosen": 0.6553727388381958, - "log_odds_ratio": -0.470994234085083, - "logits/chosen": 0.6395617723464966, - "logits/rejected": 0.14781363308429718, - "logps/chosen": -0.7219505310058594, - "logps/rejected": -1.085981845855713, - "loss": 1.0708, - "nll_loss": 1.0237462520599365, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07219505310058594, - "rewards/margins": 0.03640313819050789, - "rewards/rejected": -0.10859820246696472, - "step": 104 - }, - { - "epoch": 0.3468208092485549, - "grad_norm": 0.8081974983215332, - "learning_rate": 2.7030716723549485e-06, - "log_odds_chosen": 0.734655499458313, - "log_odds_ratio": -0.3969581425189972, - "logits/chosen": 0.7286203503608704, - "logits/rejected": 0.28220099210739136, - "logps/chosen": -0.6707303524017334, - "logps/rejected": -1.1003150939941406, - "loss": 0.9537, - "nll_loss": 0.9139864444732666, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0670730397105217, - "rewards/margins": 0.042958468198776245, - "rewards/rejected": -0.11003150790929794, - "step": 105 - }, - { - "epoch": 0.3501238645747316, - "grad_norm": 0.8724524974822998, - "learning_rate": 2.689419795221843e-06, - "log_odds_chosen": 0.5849220752716064, - "log_odds_ratio": -0.4571787118911743, - "logits/chosen": 0.6666327714920044, - "logits/rejected": -0.026708556339144707, - "logps/chosen": -0.6546144485473633, - "logps/rejected": -0.963706374168396, - "loss": 1.0169, - "nll_loss": 0.9711433053016663, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06546144187450409, - "rewards/margins": 0.03090919926762581, - "rewards/rejected": -0.0963706448674202, - "step": 106 - }, - { - "epoch": 0.35342691990090835, - "grad_norm": 0.8738275170326233, - "learning_rate": 2.675767918088737e-06, - "log_odds_chosen": 0.5298919081687927, - "log_odds_ratio": -0.4781203269958496, - "logits/chosen": 0.682755708694458, - "logits/rejected": 0.33328163623809814, - "logps/chosen": -0.7246373295783997, - "logps/rejected": -1.0272271633148193, - "loss": 1.1165, - "nll_loss": 1.0687222480773926, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0724637359380722, - "rewards/margins": 0.03025898151099682, - "rewards/rejected": -0.10272271186113358, - "step": 107 - }, - { - "epoch": 0.35672997522708505, - "grad_norm": 0.7264381051063538, - "learning_rate": 2.6621160409556315e-06, - "log_odds_chosen": 0.8004027009010315, - "log_odds_ratio": -0.43748778104782104, - "logits/chosen": 0.5338699221611023, - "logits/rejected": 0.06740663200616837, - "logps/chosen": -0.7055500745773315, - "logps/rejected": -1.0877232551574707, - "loss": 1.0526, - "nll_loss": 1.0088627338409424, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07055500894784927, - "rewards/margins": 0.03821731358766556, - "rewards/rejected": -0.10877232253551483, - "step": 108 - }, - { - "epoch": 0.36003303055326175, - "grad_norm": 1.0142654180526733, - "learning_rate": 2.648464163822525e-06, - "log_odds_chosen": -0.2071620225906372, - "log_odds_ratio": -0.9656941890716553, - "logits/chosen": 0.46369075775146484, - "logits/rejected": 0.13346746563911438, - "logps/chosen": -0.7003957629203796, - "logps/rejected": -0.7551041841506958, - "loss": 1.0429, - "nll_loss": 0.9463555812835693, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07003957778215408, - "rewards/margins": 0.005470845848321915, - "rewards/rejected": -0.0755104273557663, - "step": 109 - }, - { - "epoch": 0.3633360858794385, - "grad_norm": 1.0476593971252441, - "learning_rate": 2.6348122866894197e-06, - "log_odds_chosen": 0.7398121953010559, - "log_odds_ratio": -0.42863211035728455, - "logits/chosen": 0.5278955101966858, - "logits/rejected": 0.4660162925720215, - "logps/chosen": -0.5549638271331787, - "logps/rejected": -0.9216379523277283, - "loss": 0.9288, - "nll_loss": 0.8859040141105652, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05549638718366623, - "rewards/margins": 0.036667414009571075, - "rewards/rejected": -0.0921638011932373, - "step": 110 - }, - { - "epoch": 0.3666391412056152, - "grad_norm": 0.7171781063079834, - "learning_rate": 2.6211604095563137e-06, - "log_odds_chosen": -0.017989829182624817, - "log_odds_ratio": -0.7669973373413086, - "logits/chosen": 0.4505044221878052, - "logits/rejected": 0.2871386408805847, - "logps/chosen": -0.9541218876838684, - "logps/rejected": -0.9366425275802612, - "loss": 1.3288, - "nll_loss": 1.2520873546600342, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.09541219472885132, - "rewards/margins": -0.0017479397356510162, - "rewards/rejected": -0.09366425126791, - "step": 111 - }, - { - "epoch": 0.3699421965317919, - "grad_norm": 1.002966284751892, - "learning_rate": 2.607508532423208e-06, - "log_odds_chosen": 0.627768337726593, - "log_odds_ratio": -0.43289071321487427, - "logits/chosen": 0.6650068759918213, - "logits/rejected": 0.513642430305481, - "logps/chosen": -0.6991225481033325, - "logps/rejected": -1.0533337593078613, - "loss": 1.0365, - "nll_loss": 0.9932342171669006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06991225481033325, - "rewards/margins": 0.03542111814022064, - "rewards/rejected": -0.10533338040113449, - "step": 112 - }, - { - "epoch": 0.37324525185796864, - "grad_norm": 0.7886227965354919, - "learning_rate": 2.5938566552901023e-06, - "log_odds_chosen": 0.4568031132221222, - "log_odds_ratio": -0.523139238357544, - "logits/chosen": 0.44230565428733826, - "logits/rejected": 0.5951439142227173, - "logps/chosen": -0.741425633430481, - "logps/rejected": -0.969498336315155, - "loss": 1.1481, - "nll_loss": 1.0958025455474854, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07414256781339645, - "rewards/margins": 0.02280726656317711, - "rewards/rejected": -0.09694983065128326, - "step": 113 - }, - { - "epoch": 0.37654830718414534, - "grad_norm": 0.6207331418991089, - "learning_rate": 2.5802047781569968e-06, - "log_odds_chosen": 0.6316211819648743, - "log_odds_ratio": -0.43251290917396545, - "logits/chosen": 0.6266273260116577, - "logits/rejected": 0.16535808145999908, - "logps/chosen": -0.6410987973213196, - "logps/rejected": -0.9948447942733765, - "loss": 1.0384, - "nll_loss": 0.9951103925704956, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06410987675189972, - "rewards/margins": 0.03537460416555405, - "rewards/rejected": -0.09948448836803436, - "step": 114 - }, - { - "epoch": 0.37985136251032203, - "grad_norm": 0.8144450783729553, - "learning_rate": 2.566552901023891e-06, - "log_odds_chosen": 0.5685396194458008, - "log_odds_ratio": -0.4503624141216278, - "logits/chosen": 0.7210501432418823, - "logits/rejected": -0.08553921431303024, - "logps/chosen": -0.8061234951019287, - "logps/rejected": -1.1550283432006836, - "loss": 1.0947, - "nll_loss": 1.049682378768921, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08061234652996063, - "rewards/margins": 0.03489048779010773, - "rewards/rejected": -0.11550284177064896, - "step": 115 - }, - { - "epoch": 0.3831544178364988, - "grad_norm": 0.734238862991333, - "learning_rate": 2.5529010238907845e-06, - "log_odds_chosen": 0.47525152564048767, - "log_odds_ratio": -0.4975365102291107, - "logits/chosen": 0.6590664982795715, - "logits/rejected": 0.3357282876968384, - "logps/chosen": -0.751646101474762, - "logps/rejected": -1.030682921409607, - "loss": 1.1038, - "nll_loss": 1.0540626049041748, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07516461610794067, - "rewards/margins": 0.027903679758310318, - "rewards/rejected": -0.1030682921409607, - "step": 116 - }, - { - "epoch": 0.3864574731626755, - "grad_norm": 0.7743604779243469, - "learning_rate": 2.539249146757679e-06, - "log_odds_chosen": 0.4323543906211853, - "log_odds_ratio": -0.5651507377624512, - "logits/chosen": 0.6451403498649597, - "logits/rejected": 0.21245254576206207, - "logps/chosen": -0.8353888988494873, - "logps/rejected": -1.0568689107894897, - "loss": 1.1548, - "nll_loss": 1.0982999801635742, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08353888988494873, - "rewards/margins": 0.022148000076413155, - "rewards/rejected": -0.10568689554929733, - "step": 117 - }, - { - "epoch": 0.3897605284888522, - "grad_norm": 0.6527377367019653, - "learning_rate": 2.525597269624573e-06, - "log_odds_chosen": 0.38210755586624146, - "log_odds_ratio": -0.5950049161911011, - "logits/chosen": 0.6632624268531799, - "logits/rejected": 0.2996896803379059, - "logps/chosen": -0.8620520830154419, - "logps/rejected": -1.0867505073547363, - "loss": 1.1638, - "nll_loss": 1.1043139696121216, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08620521426200867, - "rewards/margins": 0.022469839081168175, - "rewards/rejected": -0.1086750477552414, - "step": 118 - }, - { - "epoch": 0.3930635838150289, - "grad_norm": 0.8690906763076782, - "learning_rate": 2.5119453924914675e-06, - "log_odds_chosen": 0.48861873149871826, - "log_odds_ratio": -0.5146056413650513, - "logits/chosen": 0.6304864883422852, - "logits/rejected": 0.32620173692703247, - "logps/chosen": -0.7583458423614502, - "logps/rejected": -1.0321035385131836, - "loss": 1.1091, - "nll_loss": 1.0576279163360596, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07583457976579666, - "rewards/margins": 0.027375776320695877, - "rewards/rejected": -0.10321035236120224, - "step": 119 - }, - { - "epoch": 0.3963666391412056, - "grad_norm": 0.5061293244361877, - "learning_rate": 2.4982935153583616e-06, - "log_odds_chosen": 0.6727900505065918, - "log_odds_ratio": -0.44206640124320984, - "logits/chosen": 0.6417202949523926, - "logits/rejected": -0.03143083304166794, - "logps/chosen": -0.6724810600280762, - "logps/rejected": -1.0444282293319702, - "loss": 0.9949, - "nll_loss": 0.9507288336753845, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06724810600280762, - "rewards/margins": 0.037194713950157166, - "rewards/rejected": -0.10444281995296478, - "step": 120 - }, - { - "epoch": 0.3996696944673823, - "grad_norm": 0.6905210614204407, - "learning_rate": 2.484641638225256e-06, - "log_odds_chosen": 1.2802870273590088, - "log_odds_ratio": -0.28402161598205566, - "logits/chosen": 0.5914779901504517, - "logits/rejected": -0.08999274671077728, - "logps/chosen": -0.5331635475158691, - "logps/rejected": -1.246229887008667, - "loss": 0.7956, - "nll_loss": 0.7672004103660583, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.053316354751586914, - "rewards/margins": 0.07130663096904755, - "rewards/rejected": -0.12462298572063446, - "step": 121 - }, - { - "epoch": 0.402972749793559, - "grad_norm": 0.831581711769104, - "learning_rate": 2.47098976109215e-06, - "log_odds_chosen": 0.2573418617248535, - "log_odds_ratio": -0.5937476754188538, - "logits/chosen": 0.664549708366394, - "logits/rejected": 0.28668779134750366, - "logps/chosen": -0.8047072887420654, - "logps/rejected": -0.9520777463912964, - "loss": 1.138, - "nll_loss": 1.0785871744155884, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0804707258939743, - "rewards/margins": 0.014737047255039215, - "rewards/rejected": -0.09520778059959412, - "step": 122 - }, - { - "epoch": 0.40627580511973577, - "grad_norm": 0.6514219045639038, - "learning_rate": 2.4573378839590442e-06, - "log_odds_chosen": 1.022546410560608, - "log_odds_ratio": -0.31934893131256104, - "logits/chosen": 0.6199050545692444, - "logits/rejected": 0.2979932725429535, - "logps/chosen": -0.5263546109199524, - "logps/rejected": -1.0373154878616333, - "loss": 0.904, - "nll_loss": 0.8720792531967163, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05263546109199524, - "rewards/margins": 0.05109608918428421, - "rewards/rejected": -0.10373155772686005, - "step": 123 - }, - { - "epoch": 0.40957886044591246, - "grad_norm": 0.6361023783683777, - "learning_rate": 2.4436860068259383e-06, - "log_odds_chosen": 0.26351723074913025, - "log_odds_ratio": -0.6138995289802551, - "logits/chosen": 0.6541872024536133, - "logits/rejected": 0.40502843260765076, - "logps/chosen": -0.8180014491081238, - "logps/rejected": -0.9363760948181152, - "loss": 1.2027, - "nll_loss": 1.1413019895553589, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08180014789104462, - "rewards/margins": 0.011837461963295937, - "rewards/rejected": -0.0936376079916954, - "step": 124 - }, - { - "epoch": 0.41288191577208916, - "grad_norm": 0.8529154062271118, - "learning_rate": 2.4300341296928328e-06, - "log_odds_chosen": 0.867598295211792, - "log_odds_ratio": -0.3573214113712311, - "logits/chosen": 0.5902390480041504, - "logits/rejected": 0.05114760622382164, - "logps/chosen": -0.5824288129806519, - "logps/rejected": -1.0604617595672607, - "loss": 0.9525, - "nll_loss": 0.9167202115058899, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.058242879807949066, - "rewards/margins": 0.04780329018831253, - "rewards/rejected": -0.1060461774468422, - "step": 125 - }, - { - "epoch": 0.4161849710982659, - "grad_norm": 0.9999712109565735, - "learning_rate": 2.416382252559727e-06, - "log_odds_chosen": 0.0918276458978653, - "log_odds_ratio": -0.6688194870948792, - "logits/chosen": 0.6384463906288147, - "logits/rejected": 0.21827232837677002, - "logps/chosen": -0.943254828453064, - "logps/rejected": -0.9869657754898071, - "loss": 1.2607, - "nll_loss": 1.19384765625, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0943254828453064, - "rewards/margins": 0.004371089860796928, - "rewards/rejected": -0.09869657456874847, - "step": 126 - }, - { - "epoch": 0.4194880264244426, - "grad_norm": 0.7320705652236938, - "learning_rate": 2.4027303754266213e-06, - "log_odds_chosen": 0.5666298866271973, - "log_odds_ratio": -0.4734857678413391, - "logits/chosen": 0.6386197805404663, - "logits/rejected": 0.09126316010951996, - "logps/chosen": -0.6534008383750916, - "logps/rejected": -0.9641337990760803, - "loss": 1.0196, - "nll_loss": 0.972294270992279, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0653400793671608, - "rewards/margins": 0.031073302030563354, - "rewards/rejected": -0.09641338139772415, - "step": 127 - }, - { - "epoch": 0.4227910817506193, - "grad_norm": 0.9497851729393005, - "learning_rate": 2.3890784982935154e-06, - "log_odds_chosen": 0.27134937047958374, - "log_odds_ratio": -0.6309201717376709, - "logits/chosen": 0.5959702730178833, - "logits/rejected": 0.1148364469408989, - "logps/chosen": -0.9391365051269531, - "logps/rejected": -1.085074543952942, - "loss": 1.2355, - "nll_loss": 1.1724547147750854, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.09391365200281143, - "rewards/margins": 0.014593806117773056, - "rewards/rejected": -0.10850745439529419, - "step": 128 - }, - { - "epoch": 0.42609413707679605, - "grad_norm": 0.7547330856323242, - "learning_rate": 2.3754266211604095e-06, - "log_odds_chosen": 0.8483239412307739, - "log_odds_ratio": -0.3575970530509949, - "logits/chosen": 0.6268570423126221, - "logits/rejected": -0.00109090656042099, - "logps/chosen": -0.6226826906204224, - "logps/rejected": -1.1026033163070679, - "loss": 0.9846, - "nll_loss": 0.9488646984100342, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.062268272042274475, - "rewards/margins": 0.04799206554889679, - "rewards/rejected": -0.11026033759117126, - "step": 129 - }, - { - "epoch": 0.42939719240297275, - "grad_norm": 0.6572033166885376, - "learning_rate": 2.3617747440273035e-06, - "log_odds_chosen": 0.5940951108932495, - "log_odds_ratio": -0.4972843527793884, - "logits/chosen": 0.6453897953033447, - "logits/rejected": 0.04134559631347656, - "logps/chosen": -0.7034021615982056, - "logps/rejected": -1.0336124897003174, - "loss": 1.0994, - "nll_loss": 1.0497130155563354, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07034021615982056, - "rewards/margins": 0.03302102908492088, - "rewards/rejected": -0.10336124897003174, - "step": 130 - }, - { - "epoch": 0.43270024772914945, - "grad_norm": 0.5858656764030457, - "learning_rate": 2.3481228668941976e-06, - "log_odds_chosen": 0.8026277422904968, - "log_odds_ratio": -0.3926485478878021, - "logits/chosen": 0.6088622212409973, - "logits/rejected": -0.12077679485082626, - "logps/chosen": -0.6523738503456116, - "logps/rejected": -1.066943645477295, - "loss": 0.9985, - "nll_loss": 0.9592774510383606, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0652373880147934, - "rewards/margins": 0.04145697131752968, - "rewards/rejected": -0.10669435560703278, - "step": 131 - }, - { - "epoch": 0.4360033030553262, - "grad_norm": 0.5979248881340027, - "learning_rate": 2.334470989761092e-06, - "log_odds_chosen": 0.49501678347587585, - "log_odds_ratio": -0.5009665489196777, - "logits/chosen": 0.5630398988723755, - "logits/rejected": 0.3187583386898041, - "logps/chosen": -0.7123638391494751, - "logps/rejected": -0.9846349954605103, - "loss": 1.1451, - "nll_loss": 1.095015048980713, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07123638689517975, - "rewards/margins": 0.027227114886045456, - "rewards/rejected": -0.0984634980559349, - "step": 132 - }, - { - "epoch": 0.4393063583815029, - "grad_norm": 0.7982103824615479, - "learning_rate": 2.320819112627986e-06, - "log_odds_chosen": 0.4743131995201111, - "log_odds_ratio": -0.5220986604690552, - "logits/chosen": 0.5729357004165649, - "logits/rejected": 0.3596652150154114, - "logps/chosen": -0.7776125073432922, - "logps/rejected": -1.0502963066101074, - "loss": 1.1329, - "nll_loss": 1.0807304382324219, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07776124775409698, - "rewards/margins": 0.027268387377262115, - "rewards/rejected": -0.1050296351313591, - "step": 133 - }, - { - "epoch": 0.4426094137076796, - "grad_norm": 0.6886408925056458, - "learning_rate": 2.3071672354948806e-06, - "log_odds_chosen": 0.3579927682876587, - "log_odds_ratio": -0.6149266362190247, - "logits/chosen": 0.5614507794380188, - "logits/rejected": 0.19894365966320038, - "logps/chosen": -0.8480393886566162, - "logps/rejected": -1.0118858814239502, - "loss": 1.2431, - "nll_loss": 1.1816036701202393, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08480393886566162, - "rewards/margins": 0.016384650021791458, - "rewards/rejected": -0.10118859261274338, - "step": 134 - }, - { - "epoch": 0.44591246903385634, - "grad_norm": 0.558472752571106, - "learning_rate": 2.2935153583617747e-06, - "log_odds_chosen": 0.4736022353172302, - "log_odds_ratio": -0.4898481070995331, - "logits/chosen": 0.6630843281745911, - "logits/rejected": 0.22687160968780518, - "logps/chosen": -0.634993314743042, - "logps/rejected": -0.8869645595550537, - "loss": 0.9952, - "nll_loss": 0.9462650418281555, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0634993314743042, - "rewards/margins": 0.025197122246026993, - "rewards/rejected": -0.08869645744562149, - "step": 135 - }, - { - "epoch": 0.44921552436003304, - "grad_norm": 0.7527422904968262, - "learning_rate": 2.279863481228669e-06, - "log_odds_chosen": 0.5318613052368164, - "log_odds_ratio": -0.5287194848060608, - "logits/chosen": 0.5180320143699646, - "logits/rejected": 0.2663363218307495, - "logps/chosen": -0.6989588141441345, - "logps/rejected": -1.0014413595199585, - "loss": 1.1069, - "nll_loss": 1.0539921522140503, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06989588588476181, - "rewards/margins": 0.030248260125517845, - "rewards/rejected": -0.1001441478729248, - "step": 136 - }, - { - "epoch": 0.45251857968620973, - "grad_norm": 0.76942378282547, - "learning_rate": 2.266211604095563e-06, - "log_odds_chosen": 0.4214478135108948, - "log_odds_ratio": -0.554980456829071, - "logits/chosen": 0.6156356334686279, - "logits/rejected": 0.23730260133743286, - "logps/chosen": -0.6653836965560913, - "logps/rejected": -0.8582514524459839, - "loss": 1.029, - "nll_loss": 0.97350013256073, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06653837859630585, - "rewards/margins": 0.019286775961518288, - "rewards/rejected": -0.08582515269517899, - "step": 137 - }, - { - "epoch": 0.4558216350123865, - "grad_norm": 0.7126485109329224, - "learning_rate": 2.2525597269624573e-06, - "log_odds_chosen": 0.7915516495704651, - "log_odds_ratio": -0.38648173213005066, - "logits/chosen": 0.6451093554496765, - "logits/rejected": 0.35608258843421936, - "logps/chosen": -0.5877947807312012, - "logps/rejected": -1.0178375244140625, - "loss": 0.9498, - "nll_loss": 0.9111069440841675, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05877947434782982, - "rewards/margins": 0.04300428554415703, - "rewards/rejected": -0.10178375989198685, - "step": 138 - }, - { - "epoch": 0.4591246903385632, - "grad_norm": 0.4039614796638489, - "learning_rate": 2.2389078498293514e-06, - "log_odds_chosen": 1.1989668607711792, - "log_odds_ratio": -0.28698238730430603, - "logits/chosen": 0.446550190448761, - "logits/rejected": 0.07493677735328674, - "logps/chosen": -0.48791831731796265, - "logps/rejected": -1.1377228498458862, - "loss": 1.0169, - "nll_loss": 0.9882245063781738, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.048791833221912384, - "rewards/margins": 0.06498045474290848, - "rewards/rejected": -0.11377228796482086, - "step": 139 - }, - { - "epoch": 0.4624277456647399, - "grad_norm": 0.7523724436759949, - "learning_rate": 2.2252559726962455e-06, - "log_odds_chosen": 0.6140097975730896, - "log_odds_ratio": -0.45013606548309326, - "logits/chosen": 0.6864692568778992, - "logits/rejected": 0.15220129489898682, - "logps/chosen": -0.7216159105300903, - "logps/rejected": -1.0684229135513306, - "loss": 1.0717, - "nll_loss": 1.0266821384429932, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07216159254312515, - "rewards/margins": 0.03468070924282074, - "rewards/rejected": -0.10684230178594589, - "step": 140 - }, - { - "epoch": 0.46573080099091657, - "grad_norm": 0.8801148533821106, - "learning_rate": 2.21160409556314e-06, - "log_odds_chosen": 0.5207540392875671, - "log_odds_ratio": -0.5139726996421814, - "logits/chosen": 0.697577714920044, - "logits/rejected": 0.3358006477355957, - "logps/chosen": -0.6891791820526123, - "logps/rejected": -0.9573766589164734, - "loss": 1.0011, - "nll_loss": 0.9496613144874573, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06891792267560959, - "rewards/margins": 0.026819750666618347, - "rewards/rejected": -0.09573768079280853, - "step": 141 - }, - { - "epoch": 0.4690338563170933, - "grad_norm": 0.38305214047431946, - "learning_rate": 2.197952218430034e-06, - "log_odds_chosen": 1.0533406734466553, - "log_odds_ratio": -0.30386924743652344, - "logits/chosen": 0.5734614133834839, - "logits/rejected": 0.19060435891151428, - "logps/chosen": -0.516394317150116, - "logps/rejected": -1.0770691633224487, - "loss": 0.8964, - "nll_loss": 0.866052508354187, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05163943022489548, - "rewards/margins": 0.056067485362291336, - "rewards/rejected": -0.10770691931247711, - "step": 142 - }, - { - "epoch": 0.47233691164327, - "grad_norm": 0.5582275986671448, - "learning_rate": 2.184300341296928e-06, - "log_odds_chosen": 0.2306412160396576, - "log_odds_ratio": -0.6293576955795288, - "logits/chosen": 0.5739461183547974, - "logits/rejected": 0.31204113364219666, - "logps/chosen": -0.8788301944732666, - "logps/rejected": -0.9704146981239319, - "loss": 1.1996, - "nll_loss": 1.1366775035858154, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.08788301795721054, - "rewards/margins": 0.009158452972769737, - "rewards/rejected": -0.09704147279262543, - "step": 143 - }, - { - "epoch": 0.4756399669694467, - "grad_norm": 0.790081262588501, - "learning_rate": 2.170648464163822e-06, - "log_odds_chosen": 0.5002575516700745, - "log_odds_ratio": -0.5458371639251709, - "logits/chosen": 0.4853112995624542, - "logits/rejected": 0.18581928312778473, - "logps/chosen": -0.7242958545684814, - "logps/rejected": -0.9451141357421875, - "loss": 1.0944, - "nll_loss": 1.0398327112197876, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0724295899271965, - "rewards/margins": 0.022081827744841576, - "rewards/rejected": -0.09451141208410263, - "step": 144 - }, - { - "epoch": 0.47894302229562347, - "grad_norm": 0.9157156348228455, - "learning_rate": 2.1569965870307167e-06, - "log_odds_chosen": 0.14076676964759827, - "log_odds_ratio": -0.6626269817352295, - "logits/chosen": 0.5069540143013, - "logits/rejected": 0.34302204847335815, - "logps/chosen": -0.8916156888008118, - "logps/rejected": -0.9255995750427246, - "loss": 1.2562, - "nll_loss": 1.1899263858795166, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08916157484054565, - "rewards/margins": 0.003398386761546135, - "rewards/rejected": -0.09255996346473694, - "step": 145 - }, - { - "epoch": 0.48224607762180016, - "grad_norm": 0.5149905681610107, - "learning_rate": 2.1433447098976107e-06, - "log_odds_chosen": 0.6664887070655823, - "log_odds_ratio": -0.4260491728782654, - "logits/chosen": 0.6849744319915771, - "logits/rejected": 0.11967531591653824, - "logps/chosen": -0.6410220861434937, - "logps/rejected": -0.9887681007385254, - "loss": 0.9552, - "nll_loss": 0.9125782251358032, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06410221010446548, - "rewards/margins": 0.034774597734212875, - "rewards/rejected": -0.09887681156396866, - "step": 146 - }, - { - "epoch": 0.48554913294797686, - "grad_norm": 0.6020240783691406, - "learning_rate": 2.1296928327645052e-06, - "log_odds_chosen": 1.3857988119125366, - "log_odds_ratio": -0.2468542903661728, - "logits/chosen": 0.5456637144088745, - "logits/rejected": 0.09806689620018005, - "logps/chosen": -0.499085396528244, - "logps/rejected": -1.2230888605117798, - "loss": 0.8467, - "nll_loss": 0.8220336437225342, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04990854114294052, - "rewards/margins": 0.07240035384893417, - "rewards/rejected": -0.1223088949918747, - "step": 147 - }, - { - "epoch": 0.4888521882741536, - "grad_norm": 0.5704212784767151, - "learning_rate": 2.1160409556313993e-06, - "log_odds_chosen": 0.6280186176300049, - "log_odds_ratio": -0.447559654712677, - "logits/chosen": 0.7699200510978699, - "logits/rejected": 0.05550285428762436, - "logps/chosen": -0.682567834854126, - "logps/rejected": -1.0361101627349854, - "loss": 0.9727, - "nll_loss": 0.9278945922851562, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06825678050518036, - "rewards/margins": 0.035354238003492355, - "rewards/rejected": -0.10361102223396301, - "step": 148 - }, - { - "epoch": 0.4921552436003303, - "grad_norm": 0.5886642932891846, - "learning_rate": 2.1023890784982938e-06, - "log_odds_chosen": 0.48575472831726074, - "log_odds_ratio": -0.5011197924613953, - "logits/chosen": 0.5786157250404358, - "logits/rejected": -0.1017485111951828, - "logps/chosen": -0.7173441648483276, - "logps/rejected": -1.003722906112671, - "loss": 1.0521, - "nll_loss": 1.0019950866699219, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07173442095518112, - "rewards/margins": 0.028637876734137535, - "rewards/rejected": -0.1003722995519638, - "step": 149 - }, - { - "epoch": 0.495458298926507, - "grad_norm": 0.5460280179977417, - "learning_rate": 2.0887372013651874e-06, - "log_odds_chosen": 0.1086944043636322, - "log_odds_ratio": -0.6550334095954895, - "logits/chosen": 0.5752851963043213, - "logits/rejected": 0.496643990278244, - "logps/chosen": -0.8731619715690613, - "logps/rejected": -0.9213255643844604, - "loss": 1.1939, - "nll_loss": 1.1283609867095947, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08731620013713837, - "rewards/margins": 0.0048163533210754395, - "rewards/rejected": -0.0921325534582138, - "step": 150 - }, - { - "epoch": 0.49876135425268375, - "grad_norm": 1.2331262826919556, - "learning_rate": 2.0750853242320815e-06, - "log_odds_chosen": 0.41480472683906555, - "log_odds_ratio": -0.5144199728965759, - "logits/chosen": 0.6647451519966125, - "logits/rejected": -0.05466258153319359, - "logps/chosen": -0.7060014009475708, - "logps/rejected": -0.9509197473526001, - "loss": 1.0621, - "nll_loss": 1.010627031326294, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07060013711452484, - "rewards/margins": 0.024491840973496437, - "rewards/rejected": -0.09509197622537613, - "step": 151 - }, - { - "epoch": 0.5020644095788604, - "grad_norm": 0.703823983669281, - "learning_rate": 2.061433447098976e-06, - "log_odds_chosen": -0.047725826501846313, - "log_odds_ratio": -0.7521950006484985, - "logits/chosen": 0.5732386112213135, - "logits/rejected": 0.3454573154449463, - "logps/chosen": -0.942474901676178, - "logps/rejected": -0.9117279052734375, - "loss": 1.2609, - "nll_loss": 1.1856637001037598, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0942474976181984, - "rewards/margins": -0.0030747056007385254, - "rewards/rejected": -0.09117279201745987, - "step": 152 - }, - { - "epoch": 0.5053674649050371, - "grad_norm": 0.6298907995223999, - "learning_rate": 2.04778156996587e-06, - "log_odds_chosen": 0.8662782311439514, - "log_odds_ratio": -0.36877936124801636, - "logits/chosen": 0.6611707806587219, - "logits/rejected": 0.06113681197166443, - "logps/chosen": -0.5339194536209106, - "logps/rejected": -0.9736974239349365, - "loss": 0.9338, - "nll_loss": 0.8969273567199707, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0533919483423233, - "rewards/margins": 0.04397778958082199, - "rewards/rejected": -0.0973697379231453, - "step": 153 - }, - { - "epoch": 0.5086705202312138, - "grad_norm": 0.4954488277435303, - "learning_rate": 2.0341296928327645e-06, - "log_odds_chosen": 1.206967830657959, - "log_odds_ratio": -0.2750350534915924, - "logits/chosen": 0.7229337692260742, - "logits/rejected": -0.12202668190002441, - "logps/chosen": -0.525684118270874, - "logps/rejected": -1.1982414722442627, - "loss": 0.8414, - "nll_loss": 0.8139394521713257, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05256841331720352, - "rewards/margins": 0.06725572049617767, - "rewards/rejected": -0.11982414126396179, - "step": 154 - }, - { - "epoch": 0.5119735755573905, - "grad_norm": 0.6650794148445129, - "learning_rate": 2.0204778156996586e-06, - "log_odds_chosen": 0.8394446969032288, - "log_odds_ratio": -0.388714998960495, - "logits/chosen": 0.6057007908821106, - "logits/rejected": 0.17184212803840637, - "logps/chosen": -0.5483078956604004, - "logps/rejected": -1.016586422920227, - "loss": 0.8773, - "nll_loss": 0.8384051322937012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05483078956604004, - "rewards/margins": 0.046827856451272964, - "rewards/rejected": -0.1016586422920227, - "step": 155 - }, - { - "epoch": 0.5152766308835673, - "grad_norm": 0.775217592716217, - "learning_rate": 2.006825938566553e-06, - "log_odds_chosen": 0.12826915085315704, - "log_odds_ratio": -0.6622228622436523, - "logits/chosen": 0.6797441840171814, - "logits/rejected": 0.3772622346878052, - "logps/chosen": -0.8059318661689758, - "logps/rejected": -0.8711554408073425, - "loss": 1.166, - "nll_loss": 1.0997518301010132, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08059319108724594, - "rewards/margins": 0.0065223537385463715, - "rewards/rejected": -0.08711554110050201, - "step": 156 - }, - { - "epoch": 0.518579686209744, - "grad_norm": 0.42605775594711304, - "learning_rate": 1.9931740614334467e-06, - "log_odds_chosen": 0.8663737773895264, - "log_odds_ratio": -0.39424198865890503, - "logits/chosen": 0.5165660381317139, - "logits/rejected": -0.19137074053287506, - "logps/chosen": -0.7119449377059937, - "logps/rejected": -1.1797124147415161, - "loss": 0.986, - "nll_loss": 0.9465779066085815, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07119449973106384, - "rewards/margins": 0.046776749193668365, - "rewards/rejected": -0.11797124147415161, - "step": 157 - }, - { - "epoch": 0.5218827415359207, - "grad_norm": 0.5299607515335083, - "learning_rate": 1.9795221843003412e-06, - "log_odds_chosen": 0.6188454627990723, - "log_odds_ratio": -0.482281893491745, - "logits/chosen": 0.4113537073135376, - "logits/rejected": 0.10315307974815369, - "logps/chosen": -0.6799213886260986, - "logps/rejected": -0.9837106466293335, - "loss": 0.9986, - "nll_loss": 0.9504036903381348, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06799213588237762, - "rewards/margins": 0.030378922820091248, - "rewards/rejected": -0.09837105870246887, - "step": 158 - }, - { - "epoch": 0.5251857968620974, - "grad_norm": 0.7565478086471558, - "learning_rate": 1.9658703071672353e-06, - "log_odds_chosen": 0.8473410606384277, - "log_odds_ratio": -0.3682926893234253, - "logits/chosen": 0.5713667869567871, - "logits/rejected": -0.0424153134226799, - "logps/chosen": -0.6636235117912292, - "logps/rejected": -1.170135736465454, - "loss": 1.0202, - "nll_loss": 0.9833855032920837, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06636235117912292, - "rewards/margins": 0.050651226192712784, - "rewards/rejected": -0.11701358109712601, - "step": 159 - }, - { - "epoch": 0.5284888521882741, - "grad_norm": 0.5173296332359314, - "learning_rate": 1.9522184300341294e-06, - "log_odds_chosen": 0.5269410610198975, - "log_odds_ratio": -0.5084446668624878, - "logits/chosen": 0.6337931752204895, - "logits/rejected": 0.09304094314575195, - "logps/chosen": -0.7049773931503296, - "logps/rejected": -0.9590980410575867, - "loss": 0.9939, - "nll_loss": 0.9430906772613525, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07049773633480072, - "rewards/margins": 0.025412069633603096, - "rewards/rejected": -0.09590981155633926, - "step": 160 - }, - { - "epoch": 0.5317919075144508, - "grad_norm": 0.5421537160873413, - "learning_rate": 1.938566552901024e-06, - "log_odds_chosen": 0.9586564302444458, - "log_odds_ratio": -0.3345176875591278, - "logits/chosen": 0.6567977666854858, - "logits/rejected": 0.1456572562456131, - "logps/chosen": -0.5851159691810608, - "logps/rejected": -1.1247538328170776, - "loss": 0.9176, - "nll_loss": 0.8841060400009155, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05851159989833832, - "rewards/margins": 0.053963787853717804, - "rewards/rejected": -0.11247538775205612, - "step": 161 - }, - { - "epoch": 0.5350949628406276, - "grad_norm": 0.6951601505279541, - "learning_rate": 1.924914675767918e-06, - "log_odds_chosen": 0.3711157739162445, - "log_odds_ratio": -0.531294584274292, - "logits/chosen": 0.5802691578865051, - "logits/rejected": 0.33292317390441895, - "logps/chosen": -0.770725667476654, - "logps/rejected": -0.9837435483932495, - "loss": 1.1056, - "nll_loss": 1.0524581670761108, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07707256823778152, - "rewards/margins": 0.021301789209246635, - "rewards/rejected": -0.09837435930967331, - "step": 162 - }, - { - "epoch": 0.5383980181668043, - "grad_norm": 0.5734198093414307, - "learning_rate": 1.9112627986348124e-06, - "log_odds_chosen": 0.5242481231689453, - "log_odds_ratio": -0.47526150941848755, - "logits/chosen": 0.4572170078754425, - "logits/rejected": 0.12766218185424805, - "logps/chosen": -0.7241344451904297, - "logps/rejected": -1.0218114852905273, - "loss": 1.2036, - "nll_loss": 1.1560336351394653, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07241344451904297, - "rewards/margins": 0.029767706990242004, - "rewards/rejected": -0.10218115150928497, - "step": 163 - }, - { - "epoch": 0.541701073492981, - "grad_norm": 0.6838796138763428, - "learning_rate": 1.8976109215017065e-06, - "log_odds_chosen": 0.661807656288147, - "log_odds_ratio": -0.46860355138778687, - "logits/chosen": 0.5418363213539124, - "logits/rejected": 0.2826703190803528, - "logps/chosen": -0.6394753456115723, - "logps/rejected": -0.9902697801589966, - "loss": 0.9783, - "nll_loss": 0.9314714670181274, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06394752860069275, - "rewards/margins": 0.03507945314049721, - "rewards/rejected": -0.09902698546648026, - "step": 164 - }, - { - "epoch": 0.5450041288191577, - "grad_norm": 0.4158156216144562, - "learning_rate": 1.8839590443686007e-06, - "log_odds_chosen": 0.4126843214035034, - "log_odds_ratio": -0.6025024652481079, - "logits/chosen": 0.5845680236816406, - "logits/rejected": 0.18366920948028564, - "logps/chosen": -0.6494317650794983, - "logps/rejected": -0.7823015451431274, - "loss": 1.0186, - "nll_loss": 0.9583237171173096, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.06494317948818207, - "rewards/margins": 0.01328697893768549, - "rewards/rejected": -0.07823015749454498, - "step": 165 - }, - { - "epoch": 0.5483071841453344, - "grad_norm": 0.5687664747238159, - "learning_rate": 1.8703071672354948e-06, - "log_odds_chosen": 0.689004123210907, - "log_odds_ratio": -0.45171013474464417, - "logits/chosen": 0.6241335868835449, - "logits/rejected": -0.053701307624578476, - "logps/chosen": -0.7073884010314941, - "logps/rejected": -1.1072502136230469, - "loss": 1.0736, - "nll_loss": 1.0284786224365234, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07073883712291718, - "rewards/margins": 0.03998619318008423, - "rewards/rejected": -0.1107250303030014, - "step": 166 - }, - { - "epoch": 0.5516102394715111, - "grad_norm": 0.4808405637741089, - "learning_rate": 1.856655290102389e-06, - "log_odds_chosen": 0.4109569787979126, - "log_odds_ratio": -0.5352988243103027, - "logits/chosen": 0.6777184009552002, - "logits/rejected": 0.3114359974861145, - "logps/chosen": -0.6541029214859009, - "logps/rejected": -0.8703718185424805, - "loss": 1.0064, - "nll_loss": 0.9528589248657227, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06541029363870621, - "rewards/margins": 0.02162688784301281, - "rewards/rejected": -0.08703717589378357, - "step": 167 - }, - { - "epoch": 0.5549132947976878, - "grad_norm": 0.44688713550567627, - "learning_rate": 1.8430034129692834e-06, - "log_odds_chosen": 0.9721356630325317, - "log_odds_ratio": -0.3439674973487854, - "logits/chosen": 0.6718886494636536, - "logits/rejected": 0.17465560138225555, - "logps/chosen": -0.5059576034545898, - "logps/rejected": -0.9842190146446228, - "loss": 0.8844, - "nll_loss": 0.8500003814697266, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.050595760345458984, - "rewards/margins": 0.047826141119003296, - "rewards/rejected": -0.09842190146446228, - "step": 168 - }, - { - "epoch": 0.5582163501238646, - "grad_norm": 0.6755116581916809, - "learning_rate": 1.8293515358361772e-06, - "log_odds_chosen": 1.0459821224212646, - "log_odds_ratio": -0.3862837851047516, - "logits/chosen": 0.5197728872299194, - "logits/rejected": -0.0458134301006794, - "logps/chosen": -0.5989990830421448, - "logps/rejected": -1.1014612913131714, - "loss": 0.8631, - "nll_loss": 0.8244497776031494, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.059899911284446716, - "rewards/margins": 0.050246212631464005, - "rewards/rejected": -0.11014612764120102, - "step": 169 - }, - { - "epoch": 0.5615194054500413, - "grad_norm": 0.3848477900028229, - "learning_rate": 1.8156996587030715e-06, - "log_odds_chosen": 1.4589920043945312, - "log_odds_ratio": -0.27062079310417175, - "logits/chosen": 0.6395197510719299, - "logits/rejected": 0.09559444338083267, - "logps/chosen": -0.3987687826156616, - "logps/rejected": -1.0001200437545776, - "loss": 0.7565, - "nll_loss": 0.7294365167617798, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03987687826156616, - "rewards/margins": 0.0601351261138916, - "rewards/rejected": -0.10001200437545776, - "step": 170 - }, - { - "epoch": 0.564822460776218, - "grad_norm": 0.5204459428787231, - "learning_rate": 1.8020477815699658e-06, - "log_odds_chosen": 1.196869134902954, - "log_odds_ratio": -0.3024407625198364, - "logits/chosen": 0.5893268585205078, - "logits/rejected": -0.19948892295360565, - "logps/chosen": -0.5559639930725098, - "logps/rejected": -1.1712052822113037, - "loss": 0.9205, - "nll_loss": 0.890262246131897, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05559639632701874, - "rewards/margins": 0.061524130403995514, - "rewards/rejected": -0.11712052673101425, - "step": 171 - }, - { - "epoch": 0.5681255161023947, - "grad_norm": 0.483640193939209, - "learning_rate": 1.7883959044368599e-06, - "log_odds_chosen": 0.41925936937332153, - "log_odds_ratio": -0.5154713988304138, - "logits/chosen": 0.772943913936615, - "logits/rejected": 0.2698158323764801, - "logps/chosen": -0.6530203223228455, - "logps/rejected": -0.8679060935974121, - "loss": 0.9981, - "nll_loss": 0.9465101957321167, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0653020367026329, - "rewards/margins": 0.021488577127456665, - "rewards/rejected": -0.08679061383008957, - "step": 172 - }, - { - "epoch": 0.5714285714285714, - "grad_norm": 0.48385122418403625, - "learning_rate": 1.7747440273037541e-06, - "log_odds_chosen": 0.8258459568023682, - "log_odds_ratio": -0.3774183392524719, - "logits/chosen": 0.7176973819732666, - "logits/rejected": 0.18524163961410522, - "logps/chosen": -0.5668818950653076, - "logps/rejected": -0.9850445985794067, - "loss": 0.8846, - "nll_loss": 0.8468207120895386, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05668818578124046, - "rewards/margins": 0.04181627184152603, - "rewards/rejected": -0.0985044613480568, - "step": 173 - }, - { - "epoch": 0.5747316267547481, - "grad_norm": 0.5723414421081543, - "learning_rate": 1.7610921501706484e-06, - "log_odds_chosen": 1.1594488620758057, - "log_odds_ratio": -0.2846815288066864, - "logits/chosen": 0.6966798901557922, - "logits/rejected": -0.03907991573214531, - "logps/chosen": -0.5779567360877991, - "logps/rejected": -1.2599116563796997, - "loss": 0.9544, - "nll_loss": 0.9259694814682007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.057795681059360504, - "rewards/margins": 0.06819549202919006, - "rewards/rejected": -0.12599116563796997, - "step": 174 - }, - { - "epoch": 0.5780346820809249, - "grad_norm": 0.5580859184265137, - "learning_rate": 1.7474402730375427e-06, - "log_odds_chosen": 0.8451789617538452, - "log_odds_ratio": -0.36418604850769043, - "logits/chosen": 0.5299327969551086, - "logits/rejected": 0.20314545929431915, - "logps/chosen": -0.49265965819358826, - "logps/rejected": -0.9087985157966614, - "loss": 0.8843, - "nll_loss": 0.8479278683662415, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.049265965819358826, - "rewards/margins": 0.04161389172077179, - "rewards/rejected": -0.09087985754013062, - "step": 175 - }, - { - "epoch": 0.5813377374071016, - "grad_norm": 0.7369362711906433, - "learning_rate": 1.7337883959044368e-06, - "log_odds_chosen": 0.45730042457580566, - "log_odds_ratio": -0.5694980025291443, - "logits/chosen": 0.5594283938407898, - "logits/rejected": 0.29504209756851196, - "logps/chosen": -0.7547248005867004, - "logps/rejected": -0.9544341564178467, - "loss": 1.0879, - "nll_loss": 1.0309605598449707, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.07547248154878616, - "rewards/margins": 0.019970936700701714, - "rewards/rejected": -0.09544342011213303, - "step": 176 - }, - { - "epoch": 0.5846407927332783, - "grad_norm": 0.5703591108322144, - "learning_rate": 1.720136518771331e-06, - "log_odds_chosen": 0.48781818151474, - "log_odds_ratio": -0.48935312032699585, - "logits/chosen": 0.4472290277481079, - "logits/rejected": 0.23646193742752075, - "logps/chosen": -0.6610206365585327, - "logps/rejected": -0.9084389209747314, - "loss": 1.1469, - "nll_loss": 1.097981572151184, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06610207259654999, - "rewards/margins": 0.024741819128394127, - "rewards/rejected": -0.09084389358758926, - "step": 177 - }, - { - "epoch": 0.587943848059455, - "grad_norm": 0.4832722246646881, - "learning_rate": 1.7064846416382253e-06, - "log_odds_chosen": 0.9262427687644958, - "log_odds_ratio": -0.3858339488506317, - "logits/chosen": 0.6600658893585205, - "logits/rejected": 0.13712306320667267, - "logps/chosen": -0.6037144660949707, - "logps/rejected": -1.077640414237976, - "loss": 0.9159, - "nll_loss": 0.8772901296615601, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06037144735455513, - "rewards/margins": 0.047392599284648895, - "rewards/rejected": -0.10776405036449432, - "step": 178 - }, - { - "epoch": 0.5912469033856317, - "grad_norm": 0.4645325243473053, - "learning_rate": 1.6928327645051194e-06, - "log_odds_chosen": 0.8836040496826172, - "log_odds_ratio": -0.3974376618862152, - "logits/chosen": 0.6722155809402466, - "logits/rejected": 0.14169231057167053, - "logps/chosen": -0.5342135429382324, - "logps/rejected": -0.9420076012611389, - "loss": 0.851, - "nll_loss": 0.8112754225730896, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05342135205864906, - "rewards/margins": 0.04077940434217453, - "rewards/rejected": -0.0942007526755333, - "step": 179 - }, - { - "epoch": 0.5945499587118084, - "grad_norm": 0.49239474534988403, - "learning_rate": 1.6791808873720134e-06, - "log_odds_chosen": 0.8059144020080566, - "log_odds_ratio": -0.37965869903564453, - "logits/chosen": 0.6557098031044006, - "logits/rejected": 0.161850243806839, - "logps/chosen": -0.5719705820083618, - "logps/rejected": -0.9992337226867676, - "loss": 0.9478, - "nll_loss": 0.9097999334335327, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05719705671072006, - "rewards/margins": 0.0427263081073761, - "rewards/rejected": -0.09992337226867676, - "step": 180 - }, - { - "epoch": 0.5978530140379852, - "grad_norm": 0.5725061297416687, - "learning_rate": 1.6655290102389077e-06, - "log_odds_chosen": 0.8833220601081848, - "log_odds_ratio": -0.3773239254951477, - "logits/chosen": 0.6191695928573608, - "logits/rejected": 0.5692196488380432, - "logps/chosen": -0.5647393465042114, - "logps/rejected": -1.0044803619384766, - "loss": 0.9381, - "nll_loss": 0.9003627300262451, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05647393688559532, - "rewards/margins": 0.04397409036755562, - "rewards/rejected": -0.10044802725315094, - "step": 181 - }, - { - "epoch": 0.6011560693641619, - "grad_norm": 0.6636156439781189, - "learning_rate": 1.6518771331058018e-06, - "log_odds_chosen": 0.4345851242542267, - "log_odds_ratio": -0.5814716815948486, - "logits/chosen": 0.5617907643318176, - "logits/rejected": 0.22097325325012207, - "logps/chosen": -0.8166406154632568, - "logps/rejected": -1.0496996641159058, - "loss": 1.1205, - "nll_loss": 1.062380075454712, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0816640630364418, - "rewards/margins": 0.02330590784549713, - "rewards/rejected": -0.10496997833251953, - "step": 182 - }, - { - "epoch": 0.6044591246903386, - "grad_norm": 0.5797082781791687, - "learning_rate": 1.638225255972696e-06, - "log_odds_chosen": 0.8162098526954651, - "log_odds_ratio": -0.37049978971481323, - "logits/chosen": 0.5102230310440063, - "logits/rejected": 0.2050783634185791, - "logps/chosen": -0.5252861976623535, - "logps/rejected": -0.9432477951049805, - "loss": 0.9841, - "nll_loss": 0.9470739364624023, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05252861976623535, - "rewards/margins": 0.041796162724494934, - "rewards/rejected": -0.09432478994131088, - "step": 183 - }, - { - "epoch": 0.6077621800165153, - "grad_norm": 0.5683942437171936, - "learning_rate": 1.6245733788395904e-06, - "log_odds_chosen": 0.5739707946777344, - "log_odds_ratio": -0.44951844215393066, - "logits/chosen": 0.6993671655654907, - "logits/rejected": 0.20960506796836853, - "logps/chosen": -0.5878472328186035, - "logps/rejected": -0.8822935223579407, - "loss": 0.9794, - "nll_loss": 0.9344406723976135, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05878472700715065, - "rewards/margins": 0.029444627463817596, - "rewards/rejected": -0.08822935074567795, - "step": 184 - }, - { - "epoch": 0.611065235342692, - "grad_norm": 0.5559807419776917, - "learning_rate": 1.6109215017064846e-06, - "log_odds_chosen": 0.9749253988265991, - "log_odds_ratio": -0.33763015270233154, - "logits/chosen": 0.6716713905334473, - "logits/rejected": 0.16542254388332367, - "logps/chosen": -0.5712106823921204, - "logps/rejected": -1.0929224491119385, - "loss": 0.929, - "nll_loss": 0.8952525854110718, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.057121068239212036, - "rewards/margins": 0.052171193063259125, - "rewards/rejected": -0.10929225385189056, - "step": 185 - }, - { - "epoch": 0.6143682906688687, - "grad_norm": 0.569916307926178, - "learning_rate": 1.5972696245733787e-06, - "log_odds_chosen": 0.5634703636169434, - "log_odds_ratio": -0.47419047355651855, - "logits/chosen": 0.5817551612854004, - "logits/rejected": 0.07774267345666885, - "logps/chosen": -0.6836932301521301, - "logps/rejected": -0.9906957149505615, - "loss": 1.1306, - "nll_loss": 1.0832195281982422, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0683693215250969, - "rewards/margins": 0.03070024773478508, - "rewards/rejected": -0.09906957298517227, - "step": 186 - }, - { - "epoch": 0.6176713459950454, - "grad_norm": 0.5632830262184143, - "learning_rate": 1.583617747440273e-06, - "log_odds_chosen": 1.340898036956787, - "log_odds_ratio": -0.2395646572113037, - "logits/chosen": 0.44392338395118713, - "logits/rejected": -0.01059018075466156, - "logps/chosen": -0.43889978528022766, - "logps/rejected": -1.1459397077560425, - "loss": 0.8227, - "nll_loss": 0.7987263202667236, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.043889980763196945, - "rewards/margins": 0.07070398330688477, - "rewards/rejected": -0.11459396779537201, - "step": 187 - }, - { - "epoch": 0.6209744013212222, - "grad_norm": 0.48556017875671387, - "learning_rate": 1.5699658703071673e-06, - "log_odds_chosen": 0.9501125812530518, - "log_odds_ratio": -0.3319471776485443, - "logits/chosen": 0.7496635913848877, - "logits/rejected": -0.03804942965507507, - "logps/chosen": -0.5996420979499817, - "logps/rejected": -1.1412016153335571, - "loss": 0.9775, - "nll_loss": 0.9442574381828308, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05996420979499817, - "rewards/margins": 0.054155949503183365, - "rewards/rejected": -0.11412016302347183, - "step": 188 - }, - { - "epoch": 0.6242774566473989, - "grad_norm": 0.53284752368927, - "learning_rate": 1.5563139931740613e-06, - "log_odds_chosen": 0.42260289192199707, - "log_odds_ratio": -0.5618629455566406, - "logits/chosen": 0.6096940636634827, - "logits/rejected": 0.08672965317964554, - "logps/chosen": -0.7070387601852417, - "logps/rejected": -0.919546365737915, - "loss": 1.0416, - "nll_loss": 0.9853861331939697, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.07070387899875641, - "rewards/margins": 0.021250750869512558, - "rewards/rejected": -0.09195462614297867, - "step": 189 - }, - { - "epoch": 0.6275805119735756, - "grad_norm": 0.6383630037307739, - "learning_rate": 1.5426621160409556e-06, - "log_odds_chosen": 0.5654281377792358, - "log_odds_ratio": -0.4598838686943054, - "logits/chosen": 0.5957477688789368, - "logits/rejected": 0.45544517040252686, - "logps/chosen": -0.6129018068313599, - "logps/rejected": -0.9107569456100464, - "loss": 1.1075, - "nll_loss": 1.061496615409851, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.061290185898542404, - "rewards/margins": 0.029785512015223503, - "rewards/rejected": -0.09107570350170135, - "step": 190 - }, - { - "epoch": 0.6308835672997523, - "grad_norm": 0.7678268551826477, - "learning_rate": 1.5290102389078499e-06, - "log_odds_chosen": 0.7110785245895386, - "log_odds_ratio": -0.422348290681839, - "logits/chosen": 0.5309222936630249, - "logits/rejected": 0.4590129256248474, - "logps/chosen": -0.609477162361145, - "logps/rejected": -0.988329291343689, - "loss": 0.9806, - "nll_loss": 0.9383970499038696, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0609477236866951, - "rewards/margins": 0.037885215133428574, - "rewards/rejected": -0.09883293509483337, - "step": 191 - }, - { - "epoch": 0.634186622625929, - "grad_norm": 0.5356475710868835, - "learning_rate": 1.515358361774744e-06, - "log_odds_chosen": 1.094473958015442, - "log_odds_ratio": -0.2936018407344818, - "logits/chosen": 0.6191961169242859, - "logits/rejected": -0.09346647560596466, - "logps/chosen": -0.5865436792373657, - "logps/rejected": -1.2043328285217285, - "loss": 0.8683, - "nll_loss": 0.8389862179756165, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05865437164902687, - "rewards/margins": 0.06177891045808792, - "rewards/rejected": -0.1204332709312439, - "step": 192 - }, - { - "epoch": 0.6374896779521056, - "grad_norm": 0.3720441460609436, - "learning_rate": 1.501706484641638e-06, - "log_odds_chosen": 0.8363051414489746, - "log_odds_ratio": -0.39341020584106445, - "logits/chosen": 0.5970199704170227, - "logits/rejected": 0.3498647212982178, - "logps/chosen": -0.473673939704895, - "logps/rejected": -0.8736249804496765, - "loss": 0.8973, - "nll_loss": 0.8579722046852112, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.047367390245199203, - "rewards/margins": 0.03999510779976845, - "rewards/rejected": -0.08736249804496765, - "step": 193 - }, - { - "epoch": 0.6407927332782825, - "grad_norm": 0.4866119921207428, - "learning_rate": 1.4880546075085323e-06, - "log_odds_chosen": 1.3016141653060913, - "log_odds_ratio": -0.3650715947151184, - "logits/chosen": 0.5466253757476807, - "logits/rejected": -0.010667689144611359, - "logps/chosen": -0.6213196516036987, - "logps/rejected": -1.2685027122497559, - "loss": 0.9628, - "nll_loss": 0.926327645778656, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06213196739554405, - "rewards/margins": 0.06471830606460571, - "rewards/rejected": -0.12685027718544006, - "step": 194 - }, - { - "epoch": 0.6440957886044592, - "grad_norm": 0.5375180244445801, - "learning_rate": 1.4744027303754266e-06, - "log_odds_chosen": 1.0410693883895874, - "log_odds_ratio": -0.3096345365047455, - "logits/chosen": 0.7241842746734619, - "logits/rejected": 0.25105175375938416, - "logps/chosen": -0.5415096879005432, - "logps/rejected": -1.106784462928772, - "loss": 0.8546, - "nll_loss": 0.8236784338951111, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05415096879005432, - "rewards/margins": 0.056527480483055115, - "rewards/rejected": -0.11067844927310944, - "step": 195 - }, - { - "epoch": 0.6473988439306358, - "grad_norm": 0.3262706696987152, - "learning_rate": 1.4607508532423206e-06, - "log_odds_chosen": 0.8872663378715515, - "log_odds_ratio": -0.3577076196670532, - "logits/chosen": 0.69605952501297, - "logits/rejected": 0.03770583122968674, - "logps/chosen": -0.4998326301574707, - "logps/rejected": -0.9416731595993042, - "loss": 0.8441, - "nll_loss": 0.8083184361457825, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04998326674103737, - "rewards/margins": 0.04418404400348663, - "rewards/rejected": -0.0941673070192337, - "step": 196 - }, - { - "epoch": 0.6507018992568125, - "grad_norm": 0.8134672045707703, - "learning_rate": 1.447098976109215e-06, - "log_odds_chosen": 0.6546661853790283, - "log_odds_ratio": -0.44986793398857117, - "logits/chosen": 0.5431863069534302, - "logits/rejected": 0.2773181200027466, - "logps/chosen": -0.7196569442749023, - "logps/rejected": -1.0657694339752197, - "loss": 1.0268, - "nll_loss": 0.9817945957183838, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07196569442749023, - "rewards/margins": 0.034611254930496216, - "rewards/rejected": -0.10657694935798645, - "step": 197 - }, - { - "epoch": 0.6540049545829892, - "grad_norm": 0.6602051854133606, - "learning_rate": 1.4334470989761092e-06, - "log_odds_chosen": 0.9596164226531982, - "log_odds_ratio": -0.3326166570186615, - "logits/chosen": 0.57683926820755, - "logits/rejected": 0.2176540195941925, - "logps/chosen": -0.4638594388961792, - "logps/rejected": -0.9207676649093628, - "loss": 0.8753, - "nll_loss": 0.8420792818069458, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04638594016432762, - "rewards/margins": 0.04569082334637642, - "rewards/rejected": -0.09207677096128464, - "step": 198 - }, - { - "epoch": 0.6573080099091659, - "grad_norm": 0.8099370002746582, - "learning_rate": 1.4197952218430035e-06, - "log_odds_chosen": 1.158339500427246, - "log_odds_ratio": -0.2753346264362335, - "logits/chosen": 0.7921282649040222, - "logits/rejected": -0.07282574474811554, - "logps/chosen": -0.5225726366043091, - "logps/rejected": -1.1488879919052124, - "loss": 0.7989, - "nll_loss": 0.7713789939880371, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05225726217031479, - "rewards/margins": 0.0626315325498581, - "rewards/rejected": -0.11488879472017288, - "step": 199 - }, - { - "epoch": 0.6606110652353427, - "grad_norm": 0.6994848251342773, - "learning_rate": 1.4061433447098975e-06, - "log_odds_chosen": 0.5506600141525269, - "log_odds_ratio": -0.49804046750068665, - "logits/chosen": 0.49773287773132324, - "logits/rejected": 0.06167655810713768, - "logps/chosen": -0.6549858450889587, - "logps/rejected": -0.9094030857086182, - "loss": 0.9921, - "nll_loss": 0.9422547817230225, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06549859046936035, - "rewards/margins": 0.025441719219088554, - "rewards/rejected": -0.09094031155109406, - "step": 200 - }, - { - "epoch": 0.6639141205615194, - "grad_norm": 0.39140769839286804, - "learning_rate": 1.3924914675767918e-06, - "log_odds_chosen": 0.8358291387557983, - "log_odds_ratio": -0.4507751762866974, - "logits/chosen": 0.6371957659721375, - "logits/rejected": 0.1328539103269577, - "logps/chosen": -0.5850399732589722, - "logps/rejected": -0.9630100727081299, - "loss": 0.9389, - "nll_loss": 0.8937881588935852, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.058504000306129456, - "rewards/margins": 0.03779701143503189, - "rewards/rejected": -0.09630100429058075, - "step": 201 - }, - { - "epoch": 0.6672171758876961, - "grad_norm": 0.38152557611465454, - "learning_rate": 1.378839590443686e-06, - "log_odds_chosen": 0.442609965801239, - "log_odds_ratio": -0.5644446015357971, - "logits/chosen": 0.609871506690979, - "logits/rejected": 0.3032620847225189, - "logps/chosen": -0.716311514377594, - "logps/rejected": -0.9218910932540894, - "loss": 1.0791, - "nll_loss": 1.0226891040802002, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07163114845752716, - "rewards/margins": 0.020557956770062447, - "rewards/rejected": -0.09218911081552505, - "step": 202 - }, - { - "epoch": 0.6705202312138728, - "grad_norm": 0.45840156078338623, - "learning_rate": 1.3651877133105802e-06, - "log_odds_chosen": 0.7839701175689697, - "log_odds_ratio": -0.4063185155391693, - "logits/chosen": 0.6470541954040527, - "logits/rejected": -0.07762344181537628, - "logps/chosen": -0.642772376537323, - "logps/rejected": -1.072174310684204, - "loss": 0.9381, - "nll_loss": 0.8974647521972656, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06427723914384842, - "rewards/margins": 0.04294019564986229, - "rewards/rejected": -0.10721743106842041, - "step": 203 - }, - { - "epoch": 0.6738232865400495, - "grad_norm": 0.706840455532074, - "learning_rate": 1.3515358361774742e-06, - "log_odds_chosen": 0.692832350730896, - "log_odds_ratio": -0.4300873875617981, - "logits/chosen": 0.7253742218017578, - "logits/rejected": 0.09278953075408936, - "logps/chosen": -0.6329224109649658, - "logps/rejected": -1.012474536895752, - "loss": 0.9517, - "nll_loss": 0.9087225198745728, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.0632922425866127, - "rewards/margins": 0.037955205887556076, - "rewards/rejected": -0.10124745219945908, - "step": 204 - }, - { - "epoch": 0.6771263418662262, - "grad_norm": 0.8941081762313843, - "learning_rate": 1.3378839590443685e-06, - "log_odds_chosen": 0.48629921674728394, - "log_odds_ratio": -0.5280619263648987, - "logits/chosen": 0.6185728907585144, - "logits/rejected": 0.27626100182533264, - "logps/chosen": -0.7393509149551392, - "logps/rejected": -0.9961026906967163, - "loss": 1.0557, - "nll_loss": 1.0028753280639648, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07393509149551392, - "rewards/margins": 0.025675185024738312, - "rewards/rejected": -0.09961026906967163, - "step": 205 - }, - { - "epoch": 0.6804293971924029, - "grad_norm": 0.4302248954772949, - "learning_rate": 1.3242320819112626e-06, - "log_odds_chosen": 0.7560518980026245, - "log_odds_ratio": -0.43573513627052307, - "logits/chosen": 0.6392366290092468, - "logits/rejected": 0.40856805443763733, - "logps/chosen": -0.6119944453239441, - "logps/rejected": -1.0334619283676147, - "loss": 0.9814, - "nll_loss": 0.9378758668899536, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06119944900274277, - "rewards/margins": 0.042146749794483185, - "rewards/rejected": -0.10334619879722595, - "step": 206 - }, - { - "epoch": 0.6837324525185797, - "grad_norm": 0.41963085532188416, - "learning_rate": 1.3105802047781569e-06, - "log_odds_chosen": 0.6142325401306152, - "log_odds_ratio": -0.48490890860557556, - "logits/chosen": 0.8145309686660767, - "logits/rejected": 0.19186165928840637, - "logps/chosen": -0.5725026726722717, - "logps/rejected": -0.8954076766967773, - "loss": 0.8733, - "nll_loss": 0.8248522281646729, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05725026875734329, - "rewards/margins": 0.0322904996573925, - "rewards/rejected": -0.0895407646894455, - "step": 207 - }, - { - "epoch": 0.6870355078447564, - "grad_norm": 0.4841734766960144, - "learning_rate": 1.2969283276450511e-06, - "log_odds_chosen": 0.6993224620819092, - "log_odds_ratio": -0.47004106640815735, - "logits/chosen": 0.7209720015525818, - "logits/rejected": 0.10421188175678253, - "logps/chosen": -0.5845094919204712, - "logps/rejected": -0.9023677110671997, - "loss": 0.912, - "nll_loss": 0.8650003671646118, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05845094844698906, - "rewards/margins": 0.03178582713007927, - "rewards/rejected": -0.09023678302764893, - "step": 208 - }, - { - "epoch": 0.6903385631709331, - "grad_norm": 0.4647423326969147, - "learning_rate": 1.2832764505119454e-06, - "log_odds_chosen": 0.9020592570304871, - "log_odds_ratio": -0.3538837432861328, - "logits/chosen": 0.6181030869483948, - "logits/rejected": 0.2527831792831421, - "logps/chosen": -0.5897952318191528, - "logps/rejected": -1.0826042890548706, - "loss": 0.9361, - "nll_loss": 0.9007019996643066, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05897952616214752, - "rewards/margins": 0.04928090423345566, - "rewards/rejected": -0.10826043039560318, - "step": 209 - }, - { - "epoch": 0.6936416184971098, - "grad_norm": 0.4706217646598816, - "learning_rate": 1.2696245733788395e-06, - "log_odds_chosen": 0.6770548820495605, - "log_odds_ratio": -0.42429640889167786, - "logits/chosen": 0.5560393929481506, - "logits/rejected": 0.0986425057053566, - "logps/chosen": -0.6921908259391785, - "logps/rejected": -1.0451680421829224, - "loss": 1.0407, - "nll_loss": 0.9982774257659912, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06921908259391785, - "rewards/margins": 0.03529772162437439, - "rewards/rejected": -0.10451680421829224, - "step": 210 - }, - { - "epoch": 0.6969446738232865, - "grad_norm": 0.6115151643753052, - "learning_rate": 1.2559726962457338e-06, - "log_odds_chosen": 1.3318259716033936, - "log_odds_ratio": -0.2648318409919739, - "logits/chosen": 0.6291557550430298, - "logits/rejected": 0.22125570476055145, - "logps/chosen": -0.4556346535682678, - "logps/rejected": -1.132562518119812, - "loss": 0.915, - "nll_loss": 0.8885064125061035, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0455634668469429, - "rewards/margins": 0.06769278645515442, - "rewards/rejected": -0.11325625330209732, - "step": 211 - }, - { - "epoch": 0.7002477291494632, - "grad_norm": 0.6136762499809265, - "learning_rate": 1.242320819112628e-06, - "log_odds_chosen": -0.17508749663829803, - "log_odds_ratio": -0.7857800126075745, - "logits/chosen": 0.6159629821777344, - "logits/rejected": 0.3486577272415161, - "logps/chosen": -0.8720688819885254, - "logps/rejected": -0.7840614914894104, - "loss": 1.2131, - "nll_loss": 1.1345105171203613, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.0872068852186203, - "rewards/margins": -0.008800733834505081, - "rewards/rejected": -0.07840615510940552, - "step": 212 - }, - { - "epoch": 0.70355078447564, - "grad_norm": 0.5083693861961365, - "learning_rate": 1.2286689419795221e-06, - "log_odds_chosen": 0.5884926319122314, - "log_odds_ratio": -0.497115820646286, - "logits/chosen": 0.6771700382232666, - "logits/rejected": 0.1044008880853653, - "logps/chosen": -0.7319629788398743, - "logps/rejected": -1.0562744140625, - "loss": 1.1531, - "nll_loss": 1.1033875942230225, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07319629937410355, - "rewards/margins": 0.032431140542030334, - "rewards/rejected": -0.10562743991613388, - "step": 213 - }, - { - "epoch": 0.7068538398018167, - "grad_norm": 0.4392530918121338, - "learning_rate": 1.2150170648464164e-06, - "log_odds_chosen": 0.8395816683769226, - "log_odds_ratio": -0.37265893816947937, - "logits/chosen": 0.6045567989349365, - "logits/rejected": 0.23375551402568817, - "logps/chosen": -0.5433681011199951, - "logps/rejected": -0.955073356628418, - "loss": 0.904, - "nll_loss": 0.866721510887146, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05433680862188339, - "rewards/margins": 0.04117053002119064, - "rewards/rejected": -0.09550733864307404, - "step": 214 - }, - { - "epoch": 0.7101568951279934, - "grad_norm": 0.7264850735664368, - "learning_rate": 1.2013651877133107e-06, - "log_odds_chosen": 0.21701355278491974, - "log_odds_ratio": -0.605089545249939, - "logits/chosen": 0.49419716000556946, - "logits/rejected": 0.2875365912914276, - "logps/chosen": -0.7674678564071655, - "logps/rejected": -0.8802107572555542, - "loss": 1.1222, - "nll_loss": 1.0616798400878906, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.07674678415060043, - "rewards/margins": 0.011274297721683979, - "rewards/rejected": -0.08802108466625214, - "step": 215 - }, - { - "epoch": 0.7134599504541701, - "grad_norm": 0.5379980802536011, - "learning_rate": 1.1877133105802047e-06, - "log_odds_chosen": 0.8040729761123657, - "log_odds_ratio": -0.3935239315032959, - "logits/chosen": 0.6190573573112488, - "logits/rejected": 0.16559018194675446, - "logps/chosen": -0.5957576036453247, - "logps/rejected": -1.0329526662826538, - "loss": 0.9571, - "nll_loss": 0.9177417755126953, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05957575887441635, - "rewards/margins": 0.043719515204429626, - "rewards/rejected": -0.10329527407884598, - "step": 216 - }, - { - "epoch": 0.7167630057803468, - "grad_norm": 0.4827168881893158, - "learning_rate": 1.1740614334470988e-06, - "log_odds_chosen": 1.278826355934143, - "log_odds_ratio": -0.254116028547287, - "logits/chosen": 0.5965265035629272, - "logits/rejected": -0.0749644786119461, - "logps/chosen": -0.5376965999603271, - "logps/rejected": -1.2622201442718506, - "loss": 0.9018, - "nll_loss": 0.876410722732544, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.053769662976264954, - "rewards/margins": 0.0724523514509201, - "rewards/rejected": -0.12622201442718506, - "step": 217 - }, - { - "epoch": 0.7200660611065235, - "grad_norm": 0.45818495750427246, - "learning_rate": 1.160409556313993e-06, - "log_odds_chosen": 0.9951336979866028, - "log_odds_ratio": -0.31905636191368103, - "logits/chosen": 0.5958857536315918, - "logits/rejected": 0.1625841110944748, - "logps/chosen": -0.5735612511634827, - "logps/rejected": -1.132023811340332, - "loss": 0.8808, - "nll_loss": 0.848849356174469, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.057356130331754684, - "rewards/margins": 0.055846262723207474, - "rewards/rejected": -0.11320239305496216, - "step": 218 - }, - { - "epoch": 0.7233691164327003, - "grad_norm": 0.3392535448074341, - "learning_rate": 1.1467576791808874e-06, - "log_odds_chosen": 1.391084909439087, - "log_odds_ratio": -0.28087764978408813, - "logits/chosen": 0.8243410587310791, - "logits/rejected": -0.08359797298908234, - "logps/chosen": -0.48731422424316406, - "logps/rejected": -1.135962963104248, - "loss": 0.8242, - "nll_loss": 0.7961446046829224, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.048731423914432526, - "rewards/margins": 0.0648648738861084, - "rewards/rejected": -0.11359630525112152, - "step": 219 - }, - { - "epoch": 0.726672171758877, - "grad_norm": 0.6839402318000793, - "learning_rate": 1.1331058020477814e-06, - "log_odds_chosen": 0.12872269749641418, - "log_odds_ratio": -0.6741952300071716, - "logits/chosen": 0.5966477990150452, - "logits/rejected": 0.15847577154636383, - "logps/chosen": -0.718238890171051, - "logps/rejected": -0.7941676378250122, - "loss": 1.1105, - "nll_loss": 1.0430629253387451, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.07182389497756958, - "rewards/margins": 0.0075928689911961555, - "rewards/rejected": -0.07941675931215286, - "step": 220 - }, - { - "epoch": 0.7299752270850537, - "grad_norm": 0.5558754205703735, - "learning_rate": 1.1194539249146757e-06, - "log_odds_chosen": 0.8164756298065186, - "log_odds_ratio": -0.3905107378959656, - "logits/chosen": 0.7649264335632324, - "logits/rejected": 0.011369393207132816, - "logps/chosen": -0.6230915784835815, - "logps/rejected": -1.0843837261199951, - "loss": 0.9897, - "nll_loss": 0.9506224393844604, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06230916082859039, - "rewards/margins": 0.04612921178340912, - "rewards/rejected": -0.10843837261199951, - "step": 221 - }, - { - "epoch": 0.7332782824112304, - "grad_norm": 0.7961308360099792, - "learning_rate": 1.10580204778157e-06, - "log_odds_chosen": 0.2830323576927185, - "log_odds_ratio": -0.5751523971557617, - "logits/chosen": 0.4822857677936554, - "logits/rejected": 0.2046804279088974, - "logps/chosen": -0.7521154284477234, - "logps/rejected": -0.8943564891815186, - "loss": 1.1026, - "nll_loss": 1.0451338291168213, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0752115473151207, - "rewards/margins": 0.014224106445908546, - "rewards/rejected": -0.0894356518983841, - "step": 222 - }, - { - "epoch": 0.7365813377374071, - "grad_norm": 0.592881977558136, - "learning_rate": 1.092150170648464e-06, - "log_odds_chosen": 0.9480487704277039, - "log_odds_ratio": -0.33181825280189514, - "logits/chosen": 0.6319090127944946, - "logits/rejected": 0.15143048763275146, - "logps/chosen": -0.5448603630065918, - "logps/rejected": -1.053378939628601, - "loss": 0.9177, - "nll_loss": 0.8845430612564087, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05448604002594948, - "rewards/margins": 0.050851866602897644, - "rewards/rejected": -0.10533790290355682, - "step": 223 - }, - { - "epoch": 0.7398843930635838, - "grad_norm": 0.6401156783103943, - "learning_rate": 1.0784982935153583e-06, - "log_odds_chosen": 0.927370548248291, - "log_odds_ratio": -0.35446882247924805, - "logits/chosen": 0.6470073461532593, - "logits/rejected": -0.09239303320646286, - "logps/chosen": -0.5870106816291809, - "logps/rejected": -1.110879898071289, - "loss": 0.9081, - "nll_loss": 0.8726779222488403, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05870106816291809, - "rewards/margins": 0.05238693952560425, - "rewards/rejected": -0.11108800768852234, - "step": 224 - }, - { - "epoch": 0.7431874483897605, - "grad_norm": 0.7251186370849609, - "learning_rate": 1.0648464163822526e-06, - "log_odds_chosen": 1.1196869611740112, - "log_odds_ratio": -0.34080377221107483, - "logits/chosen": 0.5632862448692322, - "logits/rejected": 0.22562305629253387, - "logps/chosen": -0.4798103868961334, - "logps/rejected": -0.974930465221405, - "loss": 0.7876, - "nll_loss": 0.7535117268562317, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04798104241490364, - "rewards/margins": 0.04951200634241104, - "rewards/rejected": -0.09749304503202438, - "step": 225 - }, - { - "epoch": 0.7464905037159373, - "grad_norm": 0.6834261417388916, - "learning_rate": 1.0511945392491469e-06, - "log_odds_chosen": 1.0690453052520752, - "log_odds_ratio": -0.2990497648715973, - "logits/chosen": 0.70441734790802, - "logits/rejected": -0.13668978214263916, - "logps/chosen": -0.5938550233840942, - "logps/rejected": -1.208716869354248, - "loss": 0.9048, - "nll_loss": 0.8748910427093506, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.059385500848293304, - "rewards/margins": 0.061486195772886276, - "rewards/rejected": -0.12087170034646988, - "step": 226 - }, - { - "epoch": 0.749793559042114, - "grad_norm": 0.592964231967926, - "learning_rate": 1.0375426621160407e-06, - "log_odds_chosen": 0.6088603734970093, - "log_odds_ratio": -0.4602106213569641, - "logits/chosen": 0.6256715655326843, - "logits/rejected": 0.10552433133125305, - "logps/chosen": -0.6842691898345947, - "logps/rejected": -1.0225801467895508, - "loss": 1.0159, - "nll_loss": 0.9698607325553894, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06842692196369171, - "rewards/margins": 0.033831093460321426, - "rewards/rejected": -0.10225801169872284, - "step": 227 - }, - { - "epoch": 0.7530966143682907, - "grad_norm": 0.6449509859085083, - "learning_rate": 1.023890784982935e-06, - "log_odds_chosen": 1.2049510478973389, - "log_odds_ratio": -0.27126815915107727, - "logits/chosen": 0.7337619662284851, - "logits/rejected": 0.03950557857751846, - "logps/chosen": -0.5456995964050293, - "logps/rejected": -1.2218623161315918, - "loss": 0.8766, - "nll_loss": 0.8494260907173157, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05456996336579323, - "rewards/margins": 0.06761626899242401, - "rewards/rejected": -0.12218622863292694, - "step": 228 - }, - { - "epoch": 0.7563996696944674, - "grad_norm": 0.4573817849159241, - "learning_rate": 1.0102389078498293e-06, - "log_odds_chosen": 1.0245609283447266, - "log_odds_ratio": -0.3258206844329834, - "logits/chosen": 0.7052476406097412, - "logits/rejected": 0.0735245943069458, - "logps/chosen": -0.500216543674469, - "logps/rejected": -1.0354862213134766, - "loss": 0.8074, - "nll_loss": 0.774804949760437, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05002165585756302, - "rewards/margins": 0.053526971489191055, - "rewards/rejected": -0.10354862362146378, - "step": 229 - }, - { - "epoch": 0.7597027250206441, - "grad_norm": 0.542776882648468, - "learning_rate": 9.965870307167234e-07, - "log_odds_chosen": 0.4714962840080261, - "log_odds_ratio": -0.5570093989372253, - "logits/chosen": 0.6502556800842285, - "logits/rejected": 0.2839999496936798, - "logps/chosen": -0.6580934524536133, - "logps/rejected": -0.9146501421928406, - "loss": 1.0756, - "nll_loss": 1.0199486017227173, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06580934673547745, - "rewards/margins": 0.025655671954154968, - "rewards/rejected": -0.09146501868963242, - "step": 230 - }, - { - "epoch": 0.7630057803468208, - "grad_norm": 0.6820701956748962, - "learning_rate": 9.829351535836176e-07, - "log_odds_chosen": 0.5042517781257629, - "log_odds_ratio": -0.5763360857963562, - "logits/chosen": 0.5764023661613464, - "logits/rejected": 0.0021821577101945877, - "logps/chosen": -0.7690185904502869, - "logps/rejected": -1.022222876548767, - "loss": 1.0778, - "nll_loss": 1.0201334953308105, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0769018605351448, - "rewards/margins": 0.02532043494284153, - "rewards/rejected": -0.10222229361534119, - "step": 231 - }, - { - "epoch": 0.7663088356729976, - "grad_norm": 0.32886484265327454, - "learning_rate": 9.69283276450512e-07, - "log_odds_chosen": 1.264896273612976, - "log_odds_ratio": -0.2536144554615021, - "logits/chosen": 0.7534321546554565, - "logits/rejected": -0.016296900808811188, - "logps/chosen": -0.3933512568473816, - "logps/rejected": -0.9950474500656128, - "loss": 0.754, - "nll_loss": 0.7286770939826965, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03933513164520264, - "rewards/margins": 0.060169607400894165, - "rewards/rejected": -0.0995047390460968, - "step": 232 - }, - { - "epoch": 0.7696118909991743, - "grad_norm": 0.48609060049057007, - "learning_rate": 9.556313993174062e-07, - "log_odds_chosen": 0.9991622567176819, - "log_odds_ratio": -0.3160445988178253, - "logits/chosen": 0.5852617621421814, - "logits/rejected": 0.3490922451019287, - "logps/chosen": -0.5229590535163879, - "logps/rejected": -1.0472159385681152, - "loss": 0.93, - "nll_loss": 0.8984131813049316, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.052295904606580734, - "rewards/margins": 0.05242568999528885, - "rewards/rejected": -0.10472159087657928, - "step": 233 - }, - { - "epoch": 0.772914946325351, - "grad_norm": 0.433671772480011, - "learning_rate": 9.419795221843004e-07, - "log_odds_chosen": 1.0517669916152954, - "log_odds_ratio": -0.31660887598991394, - "logits/chosen": 0.7648612260818481, - "logits/rejected": 0.31643152236938477, - "logps/chosen": -0.47595569491386414, - "logps/rejected": -0.9926103353500366, - "loss": 0.9179, - "nll_loss": 0.8862584233283997, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.047595568001270294, - "rewards/margins": 0.05166546627879143, - "rewards/rejected": -0.09926103800535202, - "step": 234 - }, - { - "epoch": 0.7762180016515277, - "grad_norm": 0.46932855248451233, - "learning_rate": 9.283276450511945e-07, - "log_odds_chosen": 0.6030049920082092, - "log_odds_ratio": -0.4544612765312195, - "logits/chosen": 0.6714116930961609, - "logits/rejected": 0.5839782953262329, - "logps/chosen": -0.5929805636405945, - "logps/rejected": -0.8773745894432068, - "loss": 0.9569, - "nll_loss": 0.9114702939987183, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05929805338382721, - "rewards/margins": 0.028439410030841827, - "rewards/rejected": -0.08773746341466904, - "step": 235 - }, - { - "epoch": 0.7795210569777044, - "grad_norm": 0.5499120950698853, - "learning_rate": 9.146757679180886e-07, - "log_odds_chosen": 1.0900614261627197, - "log_odds_ratio": -0.33833110332489014, - "logits/chosen": 0.6132259368896484, - "logits/rejected": 0.09309284389019012, - "logps/chosen": -0.5943810343742371, - "logps/rejected": -1.1237746477127075, - "loss": 0.9162, - "nll_loss": 0.8824090957641602, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.059438105672597885, - "rewards/margins": 0.05293935164809227, - "rewards/rejected": -0.11237745732069016, - "step": 236 - }, - { - "epoch": 0.782824112303881, - "grad_norm": 0.4562113285064697, - "learning_rate": 9.010238907849829e-07, - "log_odds_chosen": 0.5497158765792847, - "log_odds_ratio": -0.4572409391403198, - "logits/chosen": 0.6519767045974731, - "logits/rejected": 0.22898326814174652, - "logps/chosen": -0.7068825960159302, - "logps/rejected": -1.0260599851608276, - "loss": 1.0745, - "nll_loss": 1.0288114547729492, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07068825513124466, - "rewards/margins": 0.031917739659547806, - "rewards/rejected": -0.10260599851608276, - "step": 237 - }, - { - "epoch": 0.7861271676300579, - "grad_norm": 0.9395473599433899, - "learning_rate": 8.873720136518771e-07, - "log_odds_chosen": 0.6087286472320557, - "log_odds_ratio": -0.44022753834724426, - "logits/chosen": 0.8036108613014221, - "logits/rejected": 0.5548425316810608, - "logps/chosen": -0.6478132605552673, - "logps/rejected": -0.9784640073776245, - "loss": 1.0537, - "nll_loss": 1.0097031593322754, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0647813230752945, - "rewards/margins": 0.033065080642700195, - "rewards/rejected": -0.09784640371799469, - "step": 238 - }, - { - "epoch": 0.7894302229562346, - "grad_norm": 0.7380329370498657, - "learning_rate": 8.737201365187713e-07, - "log_odds_chosen": 1.2496747970581055, - "log_odds_ratio": -0.2653827667236328, - "logits/chosen": 0.745449423789978, - "logits/rejected": -0.034543052315711975, - "logps/chosen": -0.5385465025901794, - "logps/rejected": -1.2370598316192627, - "loss": 0.8184, - "nll_loss": 0.7918828725814819, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.053854651749134064, - "rewards/margins": 0.0698513314127922, - "rewards/rejected": -0.12370598316192627, - "step": 239 - }, - { - "epoch": 0.7927332782824112, - "grad_norm": 0.8663710355758667, - "learning_rate": 8.600682593856655e-07, - "log_odds_chosen": 0.8508704304695129, - "log_odds_ratio": -0.3619764447212219, - "logits/chosen": 0.7019792795181274, - "logits/rejected": 0.21884027123451233, - "logps/chosen": -0.5670613050460815, - "logps/rejected": -1.0255486965179443, - "loss": 0.8947, - "nll_loss": 0.8585167527198792, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05670613422989845, - "rewards/margins": 0.04584873467683792, - "rewards/rejected": -0.10255486518144608, - "step": 240 - }, - { - "epoch": 0.7960363336085879, - "grad_norm": 0.4710223972797394, - "learning_rate": 8.464163822525597e-07, - "log_odds_chosen": 0.7255326509475708, - "log_odds_ratio": -0.41024988889694214, - "logits/chosen": 0.7902576923370361, - "logits/rejected": 0.17195437848567963, - "logps/chosen": -0.5525068640708923, - "logps/rejected": -0.9262632131576538, - "loss": 0.8876, - "nll_loss": 0.8465515971183777, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05525068938732147, - "rewards/margins": 0.03737562894821167, - "rewards/rejected": -0.09262631833553314, - "step": 241 - }, - { - "epoch": 0.7993393889347646, - "grad_norm": 0.4448404014110565, - "learning_rate": 8.327645051194539e-07, - "log_odds_chosen": 0.9409824013710022, - "log_odds_ratio": -0.3543585240840912, - "logits/chosen": 0.4934588074684143, - "logits/rejected": 0.31733936071395874, - "logps/chosen": -0.5721904039382935, - "logps/rejected": -1.0677565336227417, - "loss": 1.0047, - "nll_loss": 0.9692625403404236, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.057219043374061584, - "rewards/margins": 0.049556612968444824, - "rewards/rejected": -0.10677565634250641, - "step": 242 - }, - { - "epoch": 0.8026424442609413, - "grad_norm": 0.36984509229660034, - "learning_rate": 8.19112627986348e-07, - "log_odds_chosen": 1.241189956665039, - "log_odds_ratio": -0.2601015865802765, - "logits/chosen": 0.6294851303100586, - "logits/rejected": 0.019494157284498215, - "logps/chosen": -0.4174768626689911, - "logps/rejected": -1.0144987106323242, - "loss": 0.7885, - "nll_loss": 0.7625011801719666, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04174768552184105, - "rewards/margins": 0.05970218777656555, - "rewards/rejected": -0.1014498770236969, - "step": 243 - }, - { - "epoch": 0.805945499587118, - "grad_norm": 0.5440399646759033, - "learning_rate": 8.054607508532423e-07, - "log_odds_chosen": 0.141658753156662, - "log_odds_ratio": -0.6754109859466553, - "logits/chosen": 0.6479302048683167, - "logits/rejected": 0.26013287901878357, - "logps/chosen": -0.8510721921920776, - "logps/rejected": -0.8924117088317871, - "loss": 1.2474, - "nll_loss": 1.1798360347747803, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.0851072147488594, - "rewards/margins": 0.0041339583694934845, - "rewards/rejected": -0.08924118429422379, - "step": 244 - }, - { - "epoch": 0.8092485549132948, - "grad_norm": 0.6318606734275818, - "learning_rate": 7.918088737201365e-07, - "log_odds_chosen": 0.9252104759216309, - "log_odds_ratio": -0.3583317995071411, - "logits/chosen": 0.568058967590332, - "logits/rejected": -0.045337777584791183, - "logps/chosen": -0.5792033076286316, - "logps/rejected": -1.074874997138977, - "loss": 0.9401, - "nll_loss": 0.904220700263977, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05792032927274704, - "rewards/margins": 0.04956716671586037, - "rewards/rejected": -0.10748749226331711, - "step": 245 - }, - { - "epoch": 0.8125516102394715, - "grad_norm": 0.5283595323562622, - "learning_rate": 7.781569965870307e-07, - "log_odds_chosen": 0.36973726749420166, - "log_odds_ratio": -0.547565221786499, - "logits/chosen": 0.5519516468048096, - "logits/rejected": 0.15018615126609802, - "logps/chosen": -0.7155150771141052, - "logps/rejected": -0.8952691555023193, - "loss": 1.0535, - "nll_loss": 0.9987106323242188, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.07155150175094604, - "rewards/margins": 0.01797540858387947, - "rewards/rejected": -0.08952692151069641, - "step": 246 - }, - { - "epoch": 0.8158546655656482, - "grad_norm": 0.3916710317134857, - "learning_rate": 7.645051194539249e-07, - "log_odds_chosen": 1.2051454782485962, - "log_odds_ratio": -0.3583226799964905, - "logits/chosen": 0.7736025452613831, - "logits/rejected": -0.04255976527929306, - "logps/chosen": -0.6245502233505249, - "logps/rejected": -1.2047293186187744, - "loss": 0.9399, - "nll_loss": 0.9040589928627014, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06245502084493637, - "rewards/margins": 0.05801790580153465, - "rewards/rejected": -0.12047293782234192, - "step": 247 - }, - { - "epoch": 0.8191577208918249, - "grad_norm": 0.3581034541130066, - "learning_rate": 7.50853242320819e-07, - "log_odds_chosen": 1.1717535257339478, - "log_odds_ratio": -0.34151381254196167, - "logits/chosen": 0.48659980297088623, - "logits/rejected": 0.11282393336296082, - "logps/chosen": -0.5181041359901428, - "logps/rejected": -1.0486713647842407, - "loss": 0.8666, - "nll_loss": 0.8324728012084961, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05181041359901428, - "rewards/margins": 0.05305672809481621, - "rewards/rejected": -0.10486713796854019, - "step": 248 - }, - { - "epoch": 0.8224607762180016, - "grad_norm": 0.4387553632259369, - "learning_rate": 7.372013651877133e-07, - "log_odds_chosen": 0.5133386254310608, - "log_odds_ratio": -0.5137954950332642, - "logits/chosen": 0.5584331154823303, - "logits/rejected": 0.2716492712497711, - "logps/chosen": -0.6799135208129883, - "logps/rejected": -0.904787540435791, - "loss": 1.0078, - "nll_loss": 0.9563804864883423, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06799135357141495, - "rewards/margins": 0.022487403824925423, - "rewards/rejected": -0.09047876298427582, - "step": 249 - }, - { - "epoch": 0.8257638315441783, - "grad_norm": 0.7516235113143921, - "learning_rate": 7.235494880546075e-07, - "log_odds_chosen": 0.7264823317527771, - "log_odds_ratio": -0.4016191363334656, - "logits/chosen": 0.6852781176567078, - "logits/rejected": 0.04300566762685776, - "logps/chosen": -0.5912334322929382, - "logps/rejected": -0.989246129989624, - "loss": 0.9929, - "nll_loss": 0.9526972770690918, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05912334844470024, - "rewards/margins": 0.03980126976966858, - "rewards/rejected": -0.09892462193965912, - "step": 250 - }, - { - "epoch": 0.8290668868703551, - "grad_norm": 0.7511612772941589, - "learning_rate": 7.098976109215017e-07, - "log_odds_chosen": 0.4757274389266968, - "log_odds_ratio": -0.5089365243911743, - "logits/chosen": 0.7610578536987305, - "logits/rejected": 0.004633564502000809, - "logps/chosen": -0.5897403955459595, - "logps/rejected": -0.8450216054916382, - "loss": 0.9505, - "nll_loss": 0.8996175527572632, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.058974042534828186, - "rewards/margins": 0.02552812173962593, - "rewards/rejected": -0.08450216054916382, - "step": 251 - }, - { - "epoch": 0.8323699421965318, - "grad_norm": 0.5014400482177734, - "learning_rate": 6.962457337883959e-07, - "log_odds_chosen": 0.9701356887817383, - "log_odds_ratio": -0.32983726263046265, - "logits/chosen": 0.594919741153717, - "logits/rejected": 0.2579506039619446, - "logps/chosen": -0.47991806268692017, - "logps/rejected": -0.9622279405593872, - "loss": 0.8666, - "nll_loss": 0.8336267471313477, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.047991808503866196, - "rewards/margins": 0.04823099076747894, - "rewards/rejected": -0.09622279554605484, - "step": 252 - }, - { - "epoch": 0.8356729975227085, - "grad_norm": 0.5858443379402161, - "learning_rate": 6.825938566552901e-07, - "log_odds_chosen": 0.9357874393463135, - "log_odds_ratio": -0.34100377559661865, - "logits/chosen": 0.7248008847236633, - "logits/rejected": 0.16196487843990326, - "logps/chosen": -0.5154309868812561, - "logps/rejected": -0.9902589917182922, - "loss": 0.8322, - "nll_loss": 0.7980992794036865, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05154310166835785, - "rewards/margins": 0.04748280346393585, - "rewards/rejected": -0.0990259051322937, - "step": 253 - }, - { - "epoch": 0.8389760528488852, - "grad_norm": 0.5427506566047668, - "learning_rate": 6.689419795221843e-07, - "log_odds_chosen": 1.1859885454177856, - "log_odds_ratio": -0.2777145206928253, - "logits/chosen": 0.7290123105049133, - "logits/rejected": 0.4405505955219269, - "logps/chosen": -0.4505842924118042, - "logps/rejected": -1.038667917251587, - "loss": 0.8538, - "nll_loss": 0.8260267972946167, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04505842924118042, - "rewards/margins": 0.05880836397409439, - "rewards/rejected": -0.10386680066585541, - "step": 254 - }, - { - "epoch": 0.8422791081750619, - "grad_norm": 0.679972231388092, - "learning_rate": 6.552901023890784e-07, - "log_odds_chosen": 0.4852396249771118, - "log_odds_ratio": -0.49834689497947693, - "logits/chosen": 0.7087770104408264, - "logits/rejected": 0.05778197571635246, - "logps/chosen": -0.7078080773353577, - "logps/rejected": -0.9582284688949585, - "loss": 1.0409, - "nll_loss": 0.9910181760787964, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07078080624341965, - "rewards/margins": 0.025042040273547173, - "rewards/rejected": -0.09582284837961197, - "step": 255 - }, - { - "epoch": 0.8455821635012386, - "grad_norm": 0.6108351945877075, - "learning_rate": 6.416382252559727e-07, - "log_odds_chosen": 0.4179123044013977, - "log_odds_ratio": -0.5517871379852295, - "logits/chosen": 0.6887850761413574, - "logits/rejected": 0.3083609342575073, - "logps/chosen": -0.71778404712677, - "logps/rejected": -0.8929762840270996, - "loss": 1.0546, - "nll_loss": 0.9993839859962463, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07177840918302536, - "rewards/margins": 0.017519228160381317, - "rewards/rejected": -0.08929762989282608, - "step": 256 - }, - { - "epoch": 0.8488852188274154, - "grad_norm": 0.7133077383041382, - "learning_rate": 6.279863481228669e-07, - "log_odds_chosen": 1.0166049003601074, - "log_odds_ratio": -0.309597909450531, - "logits/chosen": 0.6443052291870117, - "logits/rejected": 0.1449916660785675, - "logps/chosen": -0.5636100769042969, - "logps/rejected": -1.1260851621627808, - "loss": 0.8614, - "nll_loss": 0.8304792642593384, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.056361012160778046, - "rewards/margins": 0.05624750256538391, - "rewards/rejected": -0.11260851472616196, - "step": 257 - }, - { - "epoch": 0.8521882741535921, - "grad_norm": 0.33726024627685547, - "learning_rate": 6.143344709897611e-07, - "log_odds_chosen": 1.1555687189102173, - "log_odds_ratio": -0.28345346450805664, - "logits/chosen": 0.491170734167099, - "logits/rejected": 0.1574317216873169, - "logps/chosen": -0.4357753396034241, - "logps/rejected": -0.9932349324226379, - "loss": 0.8378, - "nll_loss": 0.8094955682754517, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.043577536940574646, - "rewards/margins": 0.05574595555663109, - "rewards/rejected": -0.09932349622249603, - "step": 258 - }, - { - "epoch": 0.8554913294797688, - "grad_norm": 0.3103138208389282, - "learning_rate": 6.006825938566553e-07, - "log_odds_chosen": 1.2626008987426758, - "log_odds_ratio": -0.26136070489883423, - "logits/chosen": 0.6989095211029053, - "logits/rejected": -0.0838126689195633, - "logps/chosen": -0.47132691740989685, - "logps/rejected": -1.140065312385559, - "loss": 0.8169, - "nll_loss": 0.790734589099884, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.047132693231105804, - "rewards/margins": 0.06687384843826294, - "rewards/rejected": -0.11400653421878815, - "step": 259 - }, - { - "epoch": 0.8587943848059455, - "grad_norm": 0.3447074294090271, - "learning_rate": 5.870307167235494e-07, - "log_odds_chosen": 0.7661303281784058, - "log_odds_ratio": -0.42986470460891724, - "logits/chosen": 0.6149240136146545, - "logits/rejected": 0.27751222252845764, - "logps/chosen": -0.590569257736206, - "logps/rejected": -1.0086517333984375, - "loss": 0.9613, - "nll_loss": 0.9182801246643066, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05905692279338837, - "rewards/margins": 0.04180825501680374, - "rewards/rejected": -0.10086517035961151, - "step": 260 - }, - { - "epoch": 0.8620974401321222, - "grad_norm": 0.5641007423400879, - "learning_rate": 5.733788395904437e-07, - "log_odds_chosen": 0.8064379692077637, - "log_odds_ratio": -0.40099215507507324, - "logits/chosen": 0.6300515532493591, - "logits/rejected": 0.1806260347366333, - "logps/chosen": -0.5696174502372742, - "logps/rejected": -0.9854542016983032, - "loss": 0.9353, - "nll_loss": 0.8952205777168274, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05696174502372742, - "rewards/margins": 0.04158366844058037, - "rewards/rejected": -0.09854541718959808, - "step": 261 - }, - { - "epoch": 0.8654004954582989, - "grad_norm": 0.6148810982704163, - "learning_rate": 5.597269624573379e-07, - "log_odds_chosen": 0.7683346271514893, - "log_odds_ratio": -0.39894548058509827, - "logits/chosen": 0.5601429343223572, - "logits/rejected": 0.2953536808490753, - "logps/chosen": -0.6178176403045654, - "logps/rejected": -1.020874261856079, - "loss": 0.9892, - "nll_loss": 0.9493349194526672, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06178176775574684, - "rewards/margins": 0.04030565544962883, - "rewards/rejected": -0.10208742320537567, - "step": 262 - }, - { - "epoch": 0.8687035507844756, - "grad_norm": 0.2859596908092499, - "learning_rate": 5.46075085324232e-07, - "log_odds_chosen": 1.041332721710205, - "log_odds_ratio": -0.3070143759250641, - "logits/chosen": 0.7071231007575989, - "logits/rejected": 0.03578655421733856, - "logps/chosen": -0.5134966373443604, - "logps/rejected": -1.0614898204803467, - "loss": 0.8631, - "nll_loss": 0.8323569893836975, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.051349662244319916, - "rewards/margins": 0.05479931831359863, - "rewards/rejected": -0.10614898800849915, - "step": 263 - }, - { - "epoch": 0.8720066061106524, - "grad_norm": 0.4726884067058563, - "learning_rate": 5.324232081911263e-07, - "log_odds_chosen": -0.3331339955329895, - "log_odds_ratio": -0.8841660022735596, - "logits/chosen": 0.6247517466545105, - "logits/rejected": 0.39789825677871704, - "logps/chosen": -0.9341442584991455, - "logps/rejected": -0.7430992126464844, - "loss": 1.2632, - "nll_loss": 1.1748204231262207, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.09341442584991455, - "rewards/margins": -0.01910450868308544, - "rewards/rejected": -0.07430991530418396, - "step": 264 - }, - { - "epoch": 0.8753096614368291, - "grad_norm": 0.45494329929351807, - "learning_rate": 5.187713310580204e-07, - "log_odds_chosen": 0.4311447739601135, - "log_odds_ratio": -0.5279918909072876, - "logits/chosen": 0.6854113340377808, - "logits/rejected": 0.2473314255475998, - "logps/chosen": -0.7107440233230591, - "logps/rejected": -0.9431452751159668, - "loss": 1.0733, - "nll_loss": 1.0205414295196533, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07107440382242203, - "rewards/margins": 0.023240121081471443, - "rewards/rejected": -0.09431452304124832, - "step": 265 - }, - { - "epoch": 0.8786127167630058, - "grad_norm": 0.38812732696533203, - "learning_rate": 5.051194539249146e-07, - "log_odds_chosen": 0.6349722146987915, - "log_odds_ratio": -0.4392663836479187, - "logits/chosen": 0.7123770713806152, - "logits/rejected": -0.01571335643529892, - "logps/chosen": -0.624293327331543, - "logps/rejected": -0.967180073261261, - "loss": 0.9932, - "nll_loss": 0.9493086338043213, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06242932379245758, - "rewards/margins": 0.0342886745929718, - "rewards/rejected": -0.09671800583600998, - "step": 266 - }, - { - "epoch": 0.8819157720891825, - "grad_norm": 0.509167492389679, - "learning_rate": 4.914675767918088e-07, - "log_odds_chosen": 1.0954244136810303, - "log_odds_ratio": -0.29425469040870667, - "logits/chosen": 0.523582935333252, - "logits/rejected": 0.4564739763736725, - "logps/chosen": -0.421134352684021, - "logps/rejected": -0.9451798796653748, - "loss": 0.8151, - "nll_loss": 0.7856869697570801, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04211343452334404, - "rewards/margins": 0.052404552698135376, - "rewards/rejected": -0.09451798349618912, - "step": 267 - }, - { - "epoch": 0.8852188274153592, - "grad_norm": 0.7007644176483154, - "learning_rate": 4.778156996587031e-07, - "log_odds_chosen": 0.8694885969161987, - "log_odds_ratio": -0.3592708706855774, - "logits/chosen": 0.5125115513801575, - "logits/rejected": 0.2612343728542328, - "logps/chosen": -0.5716733932495117, - "logps/rejected": -1.0208145380020142, - "loss": 0.9676, - "nll_loss": 0.931690514087677, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05716733634471893, - "rewards/margins": 0.044914111495018005, - "rewards/rejected": -0.10208145529031754, - "step": 268 - }, - { - "epoch": 0.8885218827415359, - "grad_norm": 0.6293836832046509, - "learning_rate": 4.641638225255973e-07, - "log_odds_chosen": 0.4245747923851013, - "log_odds_ratio": -0.5234514474868774, - "logits/chosen": 0.5733729600906372, - "logits/rejected": 0.3594064712524414, - "logps/chosen": -0.6935426592826843, - "logps/rejected": -0.9089328050613403, - "loss": 1.1127, - "nll_loss": 1.0603197813034058, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06935426592826843, - "rewards/margins": 0.02153901755809784, - "rewards/rejected": -0.09089328348636627, - "step": 269 - }, - { - "epoch": 0.8918249380677127, - "grad_norm": 0.5333037972450256, - "learning_rate": 4.5051194539249145e-07, - "log_odds_chosen": 1.5435826778411865, - "log_odds_ratio": -0.23327317833900452, - "logits/chosen": 0.5978046655654907, - "logits/rejected": 0.17486822605133057, - "logps/chosen": -0.3881281614303589, - "logps/rejected": -1.1109859943389893, - "loss": 0.7681, - "nll_loss": 0.7448058724403381, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03881281614303589, - "rewards/margins": 0.07228578627109528, - "rewards/rejected": -0.11109860241413116, - "step": 270 - }, - { - "epoch": 0.8951279933938894, - "grad_norm": 0.5562400221824646, - "learning_rate": 4.3686006825938567e-07, - "log_odds_chosen": 0.6399061679840088, - "log_odds_ratio": -0.44702619314193726, - "logits/chosen": 0.6351342797279358, - "logits/rejected": 0.42477717995643616, - "logps/chosen": -0.5836426615715027, - "logps/rejected": -0.9106531143188477, - "loss": 1.0166, - "nll_loss": 0.9719395041465759, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.058364272117614746, - "rewards/margins": 0.0327010415494442, - "rewards/rejected": -0.09106530994176865, - "step": 271 - }, - { - "epoch": 0.8984310487200661, - "grad_norm": 0.382891446352005, - "learning_rate": 4.2320819112627985e-07, - "log_odds_chosen": 1.2312469482421875, - "log_odds_ratio": -0.27231886982917786, - "logits/chosen": 0.714309811592102, - "logits/rejected": 0.35168707370758057, - "logps/chosen": -0.45473283529281616, - "logps/rejected": -1.0424731969833374, - "loss": 0.7826, - "nll_loss": 0.7553545236587524, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04547328129410744, - "rewards/margins": 0.05877403914928436, - "rewards/rejected": -0.1042473167181015, - "step": 272 - }, - { - "epoch": 0.9017341040462428, - "grad_norm": 0.382192999124527, - "learning_rate": 4.09556313993174e-07, - "log_odds_chosen": 0.9463987946510315, - "log_odds_ratio": -0.34287700057029724, - "logits/chosen": 0.7133896350860596, - "logits/rejected": 0.3244338631629944, - "logps/chosen": -0.5166228413581848, - "logps/rejected": -0.9969301223754883, - "loss": 0.8541, - "nll_loss": 0.819779634475708, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05166228860616684, - "rewards/margins": 0.04803072661161423, - "rewards/rejected": -0.09969300776720047, - "step": 273 - }, - { - "epoch": 0.9050371593724195, - "grad_norm": 0.6342567801475525, - "learning_rate": 3.9590443686006824e-07, - "log_odds_chosen": 0.7619415521621704, - "log_odds_ratio": -0.4149397015571594, - "logits/chosen": 0.7412171363830566, - "logits/rejected": 0.1542525738477707, - "logps/chosen": -0.6324884295463562, - "logps/rejected": -1.03025484085083, - "loss": 0.9532, - "nll_loss": 0.9116733074188232, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06324884295463562, - "rewards/margins": 0.03977663815021515, - "rewards/rejected": -0.10302548110485077, - "step": 274 - }, - { - "epoch": 0.9083402146985962, - "grad_norm": 0.5707753896713257, - "learning_rate": 3.8225255972696247e-07, - "log_odds_chosen": 0.9282410144805908, - "log_odds_ratio": -0.36501890420913696, - "logits/chosen": 0.6263246536254883, - "logits/rejected": 0.0017412155866622925, - "logps/chosen": -0.590927004814148, - "logps/rejected": -1.0784671306610107, - "loss": 0.9144, - "nll_loss": 0.8779453039169312, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.059092700481414795, - "rewards/margins": 0.0487540066242218, - "rewards/rejected": -0.1078467071056366, - "step": 275 - }, - { - "epoch": 0.911643270024773, - "grad_norm": 0.881402850151062, - "learning_rate": 3.6860068259385664e-07, - "log_odds_chosen": 0.7643041610717773, - "log_odds_ratio": -0.41213709115982056, - "logits/chosen": 0.6258478760719299, - "logits/rejected": 0.5689994096755981, - "logps/chosen": -0.5437120199203491, - "logps/rejected": -0.890987753868103, - "loss": 0.8837, - "nll_loss": 0.8424574136734009, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05437120422720909, - "rewards/margins": 0.03472757712006569, - "rewards/rejected": -0.08909878134727478, - "step": 276 - }, - { - "epoch": 0.9149463253509497, - "grad_norm": 0.5656671524047852, - "learning_rate": 3.5494880546075087e-07, - "log_odds_chosen": 0.15353864431381226, - "log_odds_ratio": -0.6496301293373108, - "logits/chosen": 0.6206212639808655, - "logits/rejected": 0.22864846885204315, - "logps/chosen": -0.8330994248390198, - "logps/rejected": -0.8991483449935913, - "loss": 1.157, - "nll_loss": 1.0920714139938354, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.08330994099378586, - "rewards/margins": 0.006604895927011967, - "rewards/rejected": -0.08991483598947525, - "step": 277 - }, - { - "epoch": 0.9182493806771264, - "grad_norm": 11.515300750732422, - "learning_rate": 3.4129692832764504e-07, - "log_odds_chosen": 0.7496099472045898, - "log_odds_ratio": -0.4233444035053253, - "logits/chosen": 0.534392774105072, - "logits/rejected": -0.13472303748130798, - "logps/chosen": -0.7250871658325195, - "logps/rejected": -1.107113242149353, - "loss": 0.9754, - "nll_loss": 0.9330787062644958, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07250871509313583, - "rewards/margins": 0.03820260614156723, - "rewards/rejected": -0.11071132123470306, - "step": 278 - }, - { - "epoch": 0.921552436003303, - "grad_norm": 0.48496487736701965, - "learning_rate": 3.276450511945392e-07, - "log_odds_chosen": 0.7559864521026611, - "log_odds_ratio": -0.4089910387992859, - "logits/chosen": 0.47455617785453796, - "logits/rejected": 0.10236611217260361, - "logps/chosen": -0.5628709197044373, - "logps/rejected": -0.9113346338272095, - "loss": 0.9497, - "nll_loss": 0.9087857007980347, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.056287094950675964, - "rewards/margins": 0.03484636917710304, - "rewards/rejected": -0.09113346040248871, - "step": 279 - }, - { - "epoch": 0.9248554913294798, - "grad_norm": 0.4781360328197479, - "learning_rate": 3.1399317406143344e-07, - "log_odds_chosen": 0.5580964088439941, - "log_odds_ratio": -0.4819334149360657, - "logits/chosen": 0.7977317571640015, - "logits/rejected": 0.11904895305633545, - "logps/chosen": -0.6788333654403687, - "logps/rejected": -0.9865895509719849, - "loss": 0.9644, - "nll_loss": 0.9162468314170837, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06788333505392075, - "rewards/margins": 0.03077562525868416, - "rewards/rejected": -0.0986589640378952, - "step": 280 - }, - { - "epoch": 0.9281585466556564, - "grad_norm": 0.42035070061683655, - "learning_rate": 3.0034129692832767e-07, - "log_odds_chosen": 1.0993973016738892, - "log_odds_ratio": -0.31785356998443604, - "logits/chosen": 0.6283450126647949, - "logits/rejected": 0.2206919938325882, - "logps/chosen": -0.4213792085647583, - "logps/rejected": -0.9078181385993958, - "loss": 0.7327, - "nll_loss": 0.7009255290031433, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04213792085647583, - "rewards/margins": 0.048643894493579865, - "rewards/rejected": -0.0907818153500557, - "step": 281 - }, - { - "epoch": 0.9314616019818331, - "grad_norm": 0.542384922504425, - "learning_rate": 2.8668941979522184e-07, - "log_odds_chosen": 0.4530085325241089, - "log_odds_ratio": -0.5064518451690674, - "logits/chosen": 0.5124476552009583, - "logits/rejected": 0.1543555110692978, - "logps/chosen": -0.7769076824188232, - "logps/rejected": -1.0268710851669312, - "loss": 1.0872, - "nll_loss": 1.0365545749664307, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07769076526165009, - "rewards/margins": 0.02499634400010109, - "rewards/rejected": -0.10268710553646088, - "step": 282 - }, - { - "epoch": 0.93476465730801, - "grad_norm": 0.8232835531234741, - "learning_rate": 2.73037542662116e-07, - "log_odds_chosen": 0.9723321795463562, - "log_odds_ratio": -0.3315427899360657, - "logits/chosen": 0.7350950241088867, - "logits/rejected": 0.17922315001487732, - "logps/chosen": -0.5099251866340637, - "logps/rejected": -0.9943480491638184, - "loss": 0.908, - "nll_loss": 0.8748064041137695, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.050992514938116074, - "rewards/margins": 0.048442281782627106, - "rewards/rejected": -0.09943480044603348, - "step": 283 - }, - { - "epoch": 0.9380677126341866, - "grad_norm": 0.41739508509635925, - "learning_rate": 2.593856655290102e-07, - "log_odds_chosen": 0.7631043195724487, - "log_odds_ratio": -0.42361724376678467, - "logits/chosen": 0.6787735223770142, - "logits/rejected": 0.1044793650507927, - "logps/chosen": -0.5500807166099548, - "logps/rejected": -0.9341393709182739, - "loss": 0.8744, - "nll_loss": 0.8320425152778625, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05500807613134384, - "rewards/margins": 0.03840586543083191, - "rewards/rejected": -0.09341394156217575, - "step": 284 - }, - { - "epoch": 0.9413707679603633, - "grad_norm": 0.4300461709499359, - "learning_rate": 2.457337883959044e-07, - "log_odds_chosen": 0.6889720559120178, - "log_odds_ratio": -0.4111790060997009, - "logits/chosen": 0.6379618048667908, - "logits/rejected": 0.17899121344089508, - "logps/chosen": -0.5329542756080627, - "logps/rejected": -0.877329409122467, - "loss": 0.8994, - "nll_loss": 0.8582838773727417, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.053295426070690155, - "rewards/margins": 0.03443751484155655, - "rewards/rejected": -0.0877329409122467, - "step": 285 - }, - { - "epoch": 0.94467382328654, - "grad_norm": 0.4140392243862152, - "learning_rate": 2.3208191126279864e-07, - "log_odds_chosen": 0.8494409322738647, - "log_odds_ratio": -0.3649711012840271, - "logits/chosen": 0.7397714257240295, - "logits/rejected": -0.07429146766662598, - "logps/chosen": -0.6355004906654358, - "logps/rejected": -1.1310406923294067, - "loss": 0.9918, - "nll_loss": 0.9552657008171082, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06355004757642746, - "rewards/margins": 0.04955402761697769, - "rewards/rejected": -0.11310407519340515, - "step": 286 - }, - { - "epoch": 0.9479768786127167, - "grad_norm": 0.6909618973731995, - "learning_rate": 2.1843003412969284e-07, - "log_odds_chosen": 0.2089315950870514, - "log_odds_ratio": -0.614280641078949, - "logits/chosen": 0.6428443789482117, - "logits/rejected": 0.177924245595932, - "logps/chosen": -0.6965489983558655, - "logps/rejected": -0.7880178689956665, - "loss": 1.0498, - "nll_loss": 0.9883894324302673, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06965489685535431, - "rewards/margins": 0.00914688315242529, - "rewards/rejected": -0.07880178093910217, - "step": 287 - }, - { - "epoch": 0.9512799339388934, - "grad_norm": 0.771812915802002, - "learning_rate": 2.04778156996587e-07, - "log_odds_chosen": 0.6732171773910522, - "log_odds_ratio": -0.4265425205230713, - "logits/chosen": 0.7075976133346558, - "logits/rejected": 0.34097951650619507, - "logps/chosen": -0.6979069113731384, - "logps/rejected": -1.100236415863037, - "loss": 1.0152, - "nll_loss": 0.972504734992981, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06979069113731384, - "rewards/margins": 0.04023294895887375, - "rewards/rejected": -0.11002364009618759, - "step": 288 - }, - { - "epoch": 0.9545829892650702, - "grad_norm": 0.3630276620388031, - "learning_rate": 1.9112627986348124e-07, - "log_odds_chosen": 1.5539547204971313, - "log_odds_ratio": -0.29640471935272217, - "logits/chosen": 0.6048256158828735, - "logits/rejected": 0.2101527452468872, - "logps/chosen": -0.45452961325645447, - "logps/rejected": -1.1474053859710693, - "loss": 0.8031, - "nll_loss": 0.7734284400939941, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.045452967286109924, - "rewards/margins": 0.06928756833076477, - "rewards/rejected": -0.1147405356168747, - "step": 289 - }, - { - "epoch": 0.9578860445912469, - "grad_norm": 0.5806308388710022, - "learning_rate": 1.7747440273037543e-07, - "log_odds_chosen": 0.35225987434387207, - "log_odds_ratio": -0.5652075409889221, - "logits/chosen": 0.684053897857666, - "logits/rejected": 0.22439348697662354, - "logps/chosen": -0.7048362493515015, - "logps/rejected": -0.873548150062561, - "loss": 1.0514, - "nll_loss": 0.9949245452880859, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.07048362493515015, - "rewards/margins": 0.016871187835931778, - "rewards/rejected": -0.08735481649637222, - "step": 290 - }, - { - "epoch": 0.9611890999174236, - "grad_norm": 0.5242696404457092, - "learning_rate": 1.638225255972696e-07, - "log_odds_chosen": 0.7631913423538208, - "log_odds_ratio": -0.3927976191043854, - "logits/chosen": 0.8313478231430054, - "logits/rejected": 0.3106094300746918, - "logps/chosen": -0.5621920824050903, - "logps/rejected": -0.9607857465744019, - "loss": 0.9257, - "nll_loss": 0.886394739151001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.056219205260276794, - "rewards/margins": 0.03985936939716339, - "rewards/rejected": -0.09607857465744019, - "step": 291 - }, - { - "epoch": 0.9644921552436003, - "grad_norm": 0.6096070408821106, - "learning_rate": 1.5017064846416383e-07, - "log_odds_chosen": 1.0682460069656372, - "log_odds_ratio": -0.3577880859375, - "logits/chosen": 0.5714915990829468, - "logits/rejected": -0.03853613883256912, - "logps/chosen": -0.5339826345443726, - "logps/rejected": -1.0092601776123047, - "loss": 0.9182, - "nll_loss": 0.8824512958526611, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.053398266434669495, - "rewards/margins": 0.047527752816677094, - "rewards/rejected": -0.10092601925134659, - "step": 292 - }, - { - "epoch": 0.967795210569777, - "grad_norm": 0.41762471199035645, - "learning_rate": 1.36518771331058e-07, - "log_odds_chosen": 0.6797726154327393, - "log_odds_ratio": -0.45752060413360596, - "logits/chosen": 0.7921758890151978, - "logits/rejected": 0.22569532692432404, - "logps/chosen": -0.5768328309059143, - "logps/rejected": -0.9011142253875732, - "loss": 0.9293, - "nll_loss": 0.8835339546203613, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05768328160047531, - "rewards/margins": 0.032428137958049774, - "rewards/rejected": -0.09011141955852509, - "step": 293 - }, - { - "epoch": 0.9710982658959537, - "grad_norm": 0.6665835380554199, - "learning_rate": 1.228668941979522e-07, - "log_odds_chosen": 0.5408161878585815, - "log_odds_ratio": -0.49131008982658386, - "logits/chosen": 0.7253115177154541, - "logits/rejected": 0.3431871235370636, - "logps/chosen": -0.6925525069236755, - "logps/rejected": -0.9829544425010681, - "loss": 1.0103, - "nll_loss": 0.9611354470252991, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06925524771213531, - "rewards/margins": 0.029040195047855377, - "rewards/rejected": -0.09829544275999069, - "step": 294 - }, - { - "epoch": 0.9744013212221305, - "grad_norm": 0.5364659428596497, - "learning_rate": 1.0921501706484642e-07, - "log_odds_chosen": 0.6445705890655518, - "log_odds_ratio": -0.4337511658668518, - "logits/chosen": 0.3450987935066223, - "logits/rejected": 0.17714332044124603, - "logps/chosen": -0.71531081199646, - "logps/rejected": -1.0854036808013916, - "loss": 1.1685, - "nll_loss": 1.125173568725586, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07153107225894928, - "rewards/margins": 0.03700929135084152, - "rewards/rejected": -0.1085403636097908, - "step": 295 - }, - { - "epoch": 0.9777043765483072, - "grad_norm": 0.7289105653762817, - "learning_rate": 9.556313993174062e-08, - "log_odds_chosen": 0.2139919400215149, - "log_odds_ratio": -0.7701393961906433, - "logits/chosen": 0.5326370000839233, - "logits/rejected": -0.12862583994865417, - "logps/chosen": -0.6968436241149902, - "logps/rejected": -0.8785687685012817, - "loss": 1.0686, - "nll_loss": 0.9916204214096069, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06968436390161514, - "rewards/margins": 0.018172509968280792, - "rewards/rejected": -0.08785686641931534, - "step": 296 - }, - { - "epoch": 0.9810074318744839, - "grad_norm": 0.37724432349205017, - "learning_rate": 8.19112627986348e-08, - "log_odds_chosen": 0.8595337271690369, - "log_odds_ratio": -0.3680306077003479, - "logits/chosen": 0.7508724927902222, - "logits/rejected": 0.13272592425346375, - "logps/chosen": -0.556876540184021, - "logps/rejected": -1.009569764137268, - "loss": 0.9115, - "nll_loss": 0.8747261762619019, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05568765476346016, - "rewards/margins": 0.045269329100847244, - "rewards/rejected": -0.1009569764137268, - "step": 297 - }, - { - "epoch": 0.9843104872006606, - "grad_norm": 0.3609966039657593, - "learning_rate": 6.8259385665529e-08, - "log_odds_chosen": 1.0100579261779785, - "log_odds_ratio": -0.3262861371040344, - "logits/chosen": 0.5984151363372803, - "logits/rejected": 0.004998762160539627, - "logps/chosen": -0.5041345953941345, - "logps/rejected": -1.009002923965454, - "loss": 0.8536, - "nll_loss": 0.8209413886070251, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05041345953941345, - "rewards/margins": 0.050486836582422256, - "rewards/rejected": -0.10090029984712601, - "step": 298 - }, - { - "epoch": 0.9876135425268373, - "grad_norm": 0.5954380631446838, - "learning_rate": 5.460750853242321e-08, - "log_odds_chosen": 0.44311755895614624, - "log_odds_ratio": -0.5467994213104248, - "logits/chosen": 0.5830391645431519, - "logits/rejected": 0.3920818865299225, - "logps/chosen": -0.6433383822441101, - "logps/rejected": -0.8638253211975098, - "loss": 0.9292, - "nll_loss": 0.8745091557502747, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06433384120464325, - "rewards/margins": 0.022048696875572205, - "rewards/rejected": -0.08638253062963486, - "step": 299 - }, - { - "epoch": 0.990916597853014, - "grad_norm": 0.4200268089771271, - "learning_rate": 4.09556313993174e-08, - "log_odds_chosen": 0.9774202108383179, - "log_odds_ratio": -0.32767370343208313, - "logits/chosen": 0.7513106465339661, - "logits/rejected": -0.09982183575630188, - "logps/chosen": -0.5735558271408081, - "logps/rejected": -1.1077017784118652, - "loss": 0.9047, - "nll_loss": 0.8719533085823059, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05735559016466141, - "rewards/margins": 0.05341459810733795, - "rewards/rejected": -0.11077018082141876, - "step": 300 - }, - { - "epoch": 0.9942196531791907, - "grad_norm": 0.38715505599975586, - "learning_rate": 2.7303754266211605e-08, - "log_odds_chosen": 0.5467053651809692, - "log_odds_ratio": -0.5097859501838684, - "logits/chosen": 0.5717355012893677, - "logits/rejected": 0.07400164753198624, - "logps/chosen": -0.6904975175857544, - "logps/rejected": -0.9988346099853516, - "loss": 1.008, - "nll_loss": 0.9570627808570862, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06904975324869156, - "rewards/margins": 0.030833713710308075, - "rewards/rejected": -0.09988346695899963, - "step": 301 - }, - { - "epoch": 0.9975227085053675, - "grad_norm": 0.5014972686767578, - "learning_rate": 1.3651877133105802e-08, - "log_odds_chosen": 0.43956100940704346, - "log_odds_ratio": -0.5114588737487793, - "logits/chosen": 0.7559257745742798, - "logits/rejected": -0.03446268290281296, - "logps/chosen": -0.6582670211791992, - "logps/rejected": -0.8834311366081238, - "loss": 0.991, - "nll_loss": 0.9398746490478516, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.06582669913768768, - "rewards/margins": 0.022516410797834396, - "rewards/rejected": -0.08834311366081238, - "step": 302 - }, - { - "epoch": 1.000825763831544, - "grad_norm": 0.466433584690094, - "learning_rate": 0.0, - "log_odds_chosen": 0.960208535194397, - "log_odds_ratio": -0.3503170311450958, - "logits/chosen": 0.7227274179458618, - "logits/rejected": -0.006568871438503265, - "logps/chosen": -0.616532564163208, - "logps/rejected": -1.152600646018982, - "loss": 0.919, - "nll_loss": 0.8840132355690002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0616532564163208, - "rewards/margins": 0.053606804460287094, - "rewards/rejected": -0.1152600571513176, - "step": 303 - } - ], - "logging_steps": 1, - "max_steps": 303, - "num_input_tokens_seen": 0, - "num_train_epochs": 2, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 0.0, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}