diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9573 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 530, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0037735849056603774, + "grad_norm": 1.4419530630111694, + "learning_rate": 4.9905660377358493e-05, + "log_odds_chosen": 0.4804525375366211, + "log_odds_ratio": -0.6413240432739258, + "logits/chosen": 0.3143516182899475, + "logits/rejected": -1.3078216314315796, + "logps/chosen": -1.7236464023590088, + "logps/rejected": -2.0679845809936523, + "loss": 2.2212, + "nll_loss": 2.1570305824279785, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.17236465215682983, + "rewards/margins": 0.03443381190299988, + "rewards/rejected": -0.2067984640598297, + "step": 1 + }, + { + "epoch": 0.007547169811320755, + "grad_norm": 1.0689440965652466, + "learning_rate": 4.9811320754716985e-05, + "log_odds_chosen": -0.09131823480129242, + "log_odds_ratio": -0.8326093554496765, + "logits/chosen": -0.3561238646507263, + "logits/rejected": -1.7676267623901367, + "logps/chosen": -1.9550193548202515, + "logps/rejected": -1.842592477798462, + "loss": 2.1604, + "nll_loss": 2.077115297317505, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.19550195336341858, + "rewards/margins": -0.011242689564824104, + "rewards/rejected": -0.18425926566123962, + "step": 2 + }, + { + "epoch": 0.011320754716981131, + "grad_norm": 1.0649460554122925, + "learning_rate": 4.9716981132075476e-05, + "log_odds_chosen": 0.6475443840026855, + "log_odds_ratio": -0.5298449993133545, + "logits/chosen": 0.7758256196975708, + "logits/rejected": -1.0398880243301392, + "logps/chosen": -1.5218658447265625, + "logps/rejected": -1.9791918992996216, + "loss": 1.9187, + "nll_loss": 1.8656777143478394, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1521865725517273, + "rewards/margins": 0.04573261737823486, + "rewards/rejected": -0.19791918992996216, + "step": 3 + }, + { + "epoch": 0.01509433962264151, + "grad_norm": 0.8160853981971741, + "learning_rate": 4.962264150943397e-05, + "log_odds_chosen": 0.4427639842033386, + "log_odds_ratio": -0.6302679777145386, + "logits/chosen": -1.3039603233337402, + "logits/rejected": -2.81551456451416, + "logps/chosen": -1.551461100578308, + "logps/rejected": -1.879504919052124, + "loss": 1.6384, + "nll_loss": 1.575362205505371, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.15514612197875977, + "rewards/margins": 0.032804377377033234, + "rewards/rejected": -0.1879504919052124, + "step": 4 + }, + { + "epoch": 0.018867924528301886, + "grad_norm": 0.8768725991249084, + "learning_rate": 4.952830188679246e-05, + "log_odds_chosen": 0.1845681071281433, + "log_odds_ratio": -0.7325757741928101, + "logits/chosen": -0.4272328019142151, + "logits/rejected": -1.7803056240081787, + "logps/chosen": -1.8399394750595093, + "logps/rejected": -1.96200692653656, + "loss": 2.0367, + "nll_loss": 1.9634612798690796, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.18399396538734436, + "rewards/margins": 0.012206735089421272, + "rewards/rejected": -0.19620069861412048, + "step": 5 + }, + { + "epoch": 0.022641509433962263, + "grad_norm": 3.759185314178467, + "learning_rate": 4.943396226415095e-05, + "log_odds_chosen": 0.4950372576713562, + "log_odds_ratio": -0.5588759183883667, + "logits/chosen": -0.019302427768707275, + "logits/rejected": -1.1816325187683105, + "logps/chosen": -1.6534287929534912, + "logps/rejected": -2.0343308448791504, + "loss": 1.9177, + "nll_loss": 1.8618510961532593, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16534289717674255, + "rewards/margins": 0.038090191781520844, + "rewards/rejected": -0.2034330666065216, + "step": 6 + }, + { + "epoch": 0.026415094339622643, + "grad_norm": 0.9324076175689697, + "learning_rate": 4.933962264150943e-05, + "log_odds_chosen": 0.12158789485692978, + "log_odds_ratio": -0.7240866422653198, + "logits/chosen": -0.2951589822769165, + "logits/rejected": -2.193943500518799, + "logps/chosen": -1.9935435056686401, + "logps/rejected": -2.0926284790039062, + "loss": 1.9309, + "nll_loss": 1.8584506511688232, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1993543654680252, + "rewards/margins": 0.009908500127494335, + "rewards/rejected": -0.20926286280155182, + "step": 7 + }, + { + "epoch": 0.03018867924528302, + "grad_norm": 0.7746831178665161, + "learning_rate": 4.9245283018867924e-05, + "log_odds_chosen": 1.0372132062911987, + "log_odds_ratio": -0.44096675515174866, + "logits/chosen": -0.7707222700119019, + "logits/rejected": -2.369938850402832, + "logps/chosen": -1.2799935340881348, + "logps/rejected": -2.0016367435455322, + "loss": 1.2616, + "nll_loss": 1.2174644470214844, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.12799936532974243, + "rewards/margins": 0.07216434180736542, + "rewards/rejected": -0.20016369223594666, + "step": 8 + }, + { + "epoch": 0.033962264150943396, + "grad_norm": 0.7088687419891357, + "learning_rate": 4.9150943396226415e-05, + "log_odds_chosen": 0.1920507550239563, + "log_odds_ratio": -0.729371964931488, + "logits/chosen": -0.6697689294815063, + "logits/rejected": -3.048678398132324, + "logps/chosen": -1.8056210279464722, + "logps/rejected": -1.9406684637069702, + "loss": 1.89, + "nll_loss": 1.8170198202133179, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.1805620938539505, + "rewards/margins": 0.01350475661456585, + "rewards/rejected": -0.1940668523311615, + "step": 9 + }, + { + "epoch": 0.03773584905660377, + "grad_norm": 0.7092457413673401, + "learning_rate": 4.9056603773584906e-05, + "log_odds_chosen": 0.8096474409103394, + "log_odds_ratio": -0.4400815963745117, + "logits/chosen": -0.08921952545642853, + "logits/rejected": -2.1929593086242676, + "logps/chosen": -1.4596810340881348, + "logps/rejected": -2.0628461837768555, + "loss": 1.6278, + "nll_loss": 1.583770751953125, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14596810936927795, + "rewards/margins": 0.060316506773233414, + "rewards/rejected": -0.20628461241722107, + "step": 10 + }, + { + "epoch": 0.04150943396226415, + "grad_norm": 0.7152612209320068, + "learning_rate": 4.89622641509434e-05, + "log_odds_chosen": 0.5647962689399719, + "log_odds_ratio": -0.5348465442657471, + "logits/chosen": -0.022822290658950806, + "logits/rejected": -0.9105501174926758, + "logps/chosen": -1.738325595855713, + "logps/rejected": -2.1731629371643066, + "loss": 1.715, + "nll_loss": 1.6615500450134277, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17383255064487457, + "rewards/margins": 0.043483734130859375, + "rewards/rejected": -0.21731628477573395, + "step": 11 + }, + { + "epoch": 0.045283018867924525, + "grad_norm": 0.49451878666877747, + "learning_rate": 4.886792452830189e-05, + "log_odds_chosen": 0.8111759424209595, + "log_odds_ratio": -0.5735799670219421, + "logits/chosen": -0.1950460523366928, + "logits/rejected": -1.8716578483581543, + "logps/chosen": -1.6253588199615479, + "logps/rejected": -2.223177671432495, + "loss": 1.6836, + "nll_loss": 1.6262810230255127, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.1625358760356903, + "rewards/margins": 0.05978189408779144, + "rewards/rejected": -0.22231778502464294, + "step": 12 + }, + { + "epoch": 0.04905660377358491, + "grad_norm": 0.49509674310684204, + "learning_rate": 4.877358490566038e-05, + "log_odds_chosen": 1.2461881637573242, + "log_odds_ratio": -0.3561839163303375, + "logits/chosen": -0.05543512478470802, + "logits/rejected": -1.6817309856414795, + "logps/chosen": -1.4552483558654785, + "logps/rejected": -2.393232583999634, + "loss": 1.6718, + "nll_loss": 1.6361980438232422, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14552482962608337, + "rewards/margins": 0.09379842877388, + "rewards/rejected": -0.23932327330112457, + "step": 13 + }, + { + "epoch": 0.052830188679245285, + "grad_norm": 0.5647757649421692, + "learning_rate": 4.867924528301887e-05, + "log_odds_chosen": 0.9501932263374329, + "log_odds_ratio": -0.4295998215675354, + "logits/chosen": -0.2805134057998657, + "logits/rejected": -2.1965882778167725, + "logps/chosen": -1.7489657402038574, + "logps/rejected": -2.5288243293762207, + "loss": 1.7557, + "nll_loss": 1.7127894163131714, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17489658296108246, + "rewards/margins": 0.07798586785793304, + "rewards/rejected": -0.2528824508190155, + "step": 14 + }, + { + "epoch": 0.05660377358490566, + "grad_norm": 0.46898186206817627, + "learning_rate": 4.858490566037736e-05, + "log_odds_chosen": 1.263291597366333, + "log_odds_ratio": -0.4335542321205139, + "logits/chosen": -0.10565178096294403, + "logits/rejected": -1.839094638824463, + "logps/chosen": -1.3841191530227661, + "logps/rejected": -2.2567789554595947, + "loss": 1.4752, + "nll_loss": 1.431824803352356, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13841190934181213, + "rewards/margins": 0.0872659832239151, + "rewards/rejected": -0.22567789256572723, + "step": 15 + }, + { + "epoch": 0.06037735849056604, + "grad_norm": 0.43158116936683655, + "learning_rate": 4.849056603773585e-05, + "log_odds_chosen": 0.6095455288887024, + "log_odds_ratio": -0.5792121887207031, + "logits/chosen": -0.17698495090007782, + "logits/rejected": -2.8387160301208496, + "logps/chosen": -2.014704942703247, + "logps/rejected": -2.574517250061035, + "loss": 1.9717, + "nll_loss": 1.9137517213821411, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2014704942703247, + "rewards/margins": 0.05598121136426926, + "rewards/rejected": -0.25745171308517456, + "step": 16 + }, + { + "epoch": 0.06415094339622641, + "grad_norm": 0.4689409136772156, + "learning_rate": 4.8396226415094344e-05, + "log_odds_chosen": 1.2785823345184326, + "log_odds_ratio": -0.3914850652217865, + "logits/chosen": -0.17786982655525208, + "logits/rejected": -1.9287208318710327, + "logps/chosen": -1.5383832454681396, + "logps/rejected": -2.564413547515869, + "loss": 1.6308, + "nll_loss": 1.5916929244995117, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.15383832156658173, + "rewards/margins": 0.10260303318500519, + "rewards/rejected": -0.2564413547515869, + "step": 17 + }, + { + "epoch": 0.06792452830188679, + "grad_norm": 0.4632433354854584, + "learning_rate": 4.8301886792452835e-05, + "log_odds_chosen": 1.8498305082321167, + "log_odds_ratio": -0.2724185287952423, + "logits/chosen": 0.00803515687584877, + "logits/rejected": -1.558810830116272, + "logps/chosen": -1.5008854866027832, + "logps/rejected": -3.094841957092285, + "loss": 1.3575, + "nll_loss": 1.3302488327026367, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15008854866027832, + "rewards/margins": 0.15939566493034363, + "rewards/rejected": -0.30948421359062195, + "step": 18 + }, + { + "epoch": 0.07169811320754717, + "grad_norm": 0.35216155648231506, + "learning_rate": 4.8207547169811326e-05, + "log_odds_chosen": 2.0445966720581055, + "log_odds_ratio": -0.22769802808761597, + "logits/chosen": -1.055422306060791, + "logits/rejected": -2.8562209606170654, + "logps/chosen": -1.4856079816818237, + "logps/rejected": -3.2529525756835938, + "loss": 1.6494, + "nll_loss": 1.626587152481079, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1485608071088791, + "rewards/margins": 0.17673446238040924, + "rewards/rejected": -0.32529526948928833, + "step": 19 + }, + { + "epoch": 0.07547169811320754, + "grad_norm": 0.3028462827205658, + "learning_rate": 4.811320754716982e-05, + "log_odds_chosen": 1.1802749633789062, + "log_odds_ratio": -0.4493004083633423, + "logits/chosen": -1.202689290046692, + "logits/rejected": -3.1590261459350586, + "logps/chosen": -1.8469630479812622, + "logps/rejected": -2.9175806045532227, + "loss": 1.6889, + "nll_loss": 1.6439603567123413, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.18469631671905518, + "rewards/margins": 0.10706175863742828, + "rewards/rejected": -0.29175806045532227, + "step": 20 + }, + { + "epoch": 0.07924528301886792, + "grad_norm": 0.28935325145721436, + "learning_rate": 4.80188679245283e-05, + "log_odds_chosen": 2.2533977031707764, + "log_odds_ratio": -0.21968787908554077, + "logits/chosen": -0.12538594007492065, + "logits/rejected": -1.9605629444122314, + "logps/chosen": -1.555745244026184, + "logps/rejected": -3.573531150817871, + "loss": 1.479, + "nll_loss": 1.4570300579071045, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15557453036308289, + "rewards/margins": 0.2017785757780075, + "rewards/rejected": -0.3573530912399292, + "step": 21 + }, + { + "epoch": 0.0830188679245283, + "grad_norm": 0.39032241702079773, + "learning_rate": 4.792452830188679e-05, + "log_odds_chosen": 1.7034038305282593, + "log_odds_ratio": -0.41050586104393005, + "logits/chosen": -0.2803301215171814, + "logits/rejected": -2.3801870346069336, + "logps/chosen": -1.699625015258789, + "logps/rejected": -3.263505458831787, + "loss": 1.7973, + "nll_loss": 1.7562379837036133, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16996252536773682, + "rewards/margins": 0.1563880443572998, + "rewards/rejected": -0.3263505697250366, + "step": 22 + }, + { + "epoch": 0.08679245283018867, + "grad_norm": 0.2868567705154419, + "learning_rate": 4.7830188679245284e-05, + "log_odds_chosen": 2.2137861251831055, + "log_odds_ratio": -0.3145076334476471, + "logits/chosen": -1.0021281242370605, + "logits/rejected": -2.4567623138427734, + "logps/chosen": -1.621106743812561, + "logps/rejected": -3.6742091178894043, + "loss": 1.5856, + "nll_loss": 1.5541696548461914, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.16211068630218506, + "rewards/margins": 0.20531021058559418, + "rewards/rejected": -0.36742085218429565, + "step": 23 + }, + { + "epoch": 0.09056603773584905, + "grad_norm": 0.3213948607444763, + "learning_rate": 4.7735849056603775e-05, + "log_odds_chosen": 3.161144256591797, + "log_odds_ratio": -0.2373344600200653, + "logits/chosen": -0.881264865398407, + "logits/rejected": -2.189188241958618, + "logps/chosen": -1.3406716585159302, + "logps/rejected": -3.9409539699554443, + "loss": 1.5735, + "nll_loss": 1.5498067140579224, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13406717777252197, + "rewards/margins": 0.2600281834602356, + "rewards/rejected": -0.39409539103507996, + "step": 24 + }, + { + "epoch": 0.09433962264150944, + "grad_norm": 0.3009367287158966, + "learning_rate": 4.7641509433962266e-05, + "log_odds_chosen": 2.8621065616607666, + "log_odds_ratio": -0.2253154218196869, + "logits/chosen": -0.2625047266483307, + "logits/rejected": -2.8666999340057373, + "logps/chosen": -1.7110705375671387, + "logps/rejected": -4.400345325469971, + "loss": 1.7058, + "nll_loss": 1.683306097984314, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.17110705375671387, + "rewards/margins": 0.2689274847507477, + "rewards/rejected": -0.44003453850746155, + "step": 25 + }, + { + "epoch": 0.09811320754716982, + "grad_norm": 0.3267085552215576, + "learning_rate": 4.754716981132076e-05, + "log_odds_chosen": 2.815523386001587, + "log_odds_ratio": -0.2813441753387451, + "logits/chosen": -1.2153959274291992, + "logits/rejected": -3.1709775924682617, + "logps/chosen": -1.6071617603302002, + "logps/rejected": -4.192075729370117, + "loss": 1.642, + "nll_loss": 1.6138246059417725, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.16071617603302002, + "rewards/margins": 0.2584913671016693, + "rewards/rejected": -0.41920754313468933, + "step": 26 + }, + { + "epoch": 0.1018867924528302, + "grad_norm": 0.4692343771457672, + "learning_rate": 4.745283018867925e-05, + "log_odds_chosen": 3.2350544929504395, + "log_odds_ratio": -0.22966182231903076, + "logits/chosen": -0.6714242696762085, + "logits/rejected": -3.4772703647613525, + "logps/chosen": -1.573210597038269, + "logps/rejected": -4.57033634185791, + "loss": 1.5627, + "nll_loss": 1.5397582054138184, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.15732106566429138, + "rewards/margins": 0.29971253871917725, + "rewards/rejected": -0.45703360438346863, + "step": 27 + }, + { + "epoch": 0.10566037735849057, + "grad_norm": 0.31730151176452637, + "learning_rate": 4.735849056603774e-05, + "log_odds_chosen": 3.0557804107666016, + "log_odds_ratio": -0.19463254511356354, + "logits/chosen": -0.8169313669204712, + "logits/rejected": -1.874739646911621, + "logps/chosen": -1.7243003845214844, + "logps/rejected": -4.595163345336914, + "loss": 1.5311, + "nll_loss": 1.5116479396820068, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17243005335330963, + "rewards/margins": 0.2870863080024719, + "rewards/rejected": -0.45951637625694275, + "step": 28 + }, + { + "epoch": 0.10943396226415095, + "grad_norm": 0.25958406925201416, + "learning_rate": 4.726415094339623e-05, + "log_odds_chosen": 4.204806327819824, + "log_odds_ratio": -0.06744246184825897, + "logits/chosen": -0.7214003801345825, + "logits/rejected": -3.8571434020996094, + "logps/chosen": -1.5588371753692627, + "logps/rejected": -5.471315383911133, + "loss": 1.5991, + "nll_loss": 1.5923649072647095, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15588372945785522, + "rewards/margins": 0.39124780893325806, + "rewards/rejected": -0.5471315383911133, + "step": 29 + }, + { + "epoch": 0.11320754716981132, + "grad_norm": 0.2962592542171478, + "learning_rate": 4.716981132075472e-05, + "log_odds_chosen": 2.306314468383789, + "log_odds_ratio": -0.5868710279464722, + "logits/chosen": -0.9562402963638306, + "logits/rejected": -4.068589687347412, + "logps/chosen": -1.8381496667861938, + "logps/rejected": -4.1114959716796875, + "loss": 1.7932, + "nll_loss": 1.7344965934753418, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.18381497263908386, + "rewards/margins": 0.2273346334695816, + "rewards/rejected": -0.41114962100982666, + "step": 30 + }, + { + "epoch": 0.1169811320754717, + "grad_norm": 0.3253747224807739, + "learning_rate": 4.707547169811321e-05, + "log_odds_chosen": 3.867879867553711, + "log_odds_ratio": -0.2663135528564453, + "logits/chosen": -1.6323354244232178, + "logits/rejected": -2.8868775367736816, + "logps/chosen": -1.3807705640792847, + "logps/rejected": -4.98048210144043, + "loss": 1.3989, + "nll_loss": 1.3722314834594727, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.13807706534862518, + "rewards/margins": 0.35997116565704346, + "rewards/rejected": -0.49804821610450745, + "step": 31 + }, + { + "epoch": 0.12075471698113208, + "grad_norm": 0.25523456931114197, + "learning_rate": 4.6981132075471704e-05, + "log_odds_chosen": 4.133279800415039, + "log_odds_ratio": -0.20295441150665283, + "logits/chosen": -0.9551544785499573, + "logits/rejected": -4.06210470199585, + "logps/chosen": -1.683975338935852, + "logps/rejected": -5.5400261878967285, + "loss": 1.7012, + "nll_loss": 1.6808902025222778, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16839754581451416, + "rewards/margins": 0.3856050968170166, + "rewards/rejected": -0.5540026426315308, + "step": 32 + }, + { + "epoch": 0.12452830188679245, + "grad_norm": 0.2824070155620575, + "learning_rate": 4.6886792452830195e-05, + "log_odds_chosen": 4.0810699462890625, + "log_odds_ratio": -0.24137283861637115, + "logits/chosen": -0.1275748610496521, + "logits/rejected": -1.968299150466919, + "logps/chosen": -1.4146742820739746, + "logps/rejected": -5.19221830368042, + "loss": 1.4468, + "nll_loss": 1.422704815864563, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14146743714809418, + "rewards/margins": 0.37775442004203796, + "rewards/rejected": -0.519221842288971, + "step": 33 + }, + { + "epoch": 0.12830188679245283, + "grad_norm": 0.3047614097595215, + "learning_rate": 4.679245283018868e-05, + "log_odds_chosen": 4.300058364868164, + "log_odds_ratio": -0.2853168845176697, + "logits/chosen": -2.589682102203369, + "logits/rejected": -3.8946940898895264, + "logps/chosen": -1.4936325550079346, + "logps/rejected": -5.401619911193848, + "loss": 1.6643, + "nll_loss": 1.6357437372207642, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.14936324954032898, + "rewards/margins": 0.3907987177371979, + "rewards/rejected": -0.5401619672775269, + "step": 34 + }, + { + "epoch": 0.1320754716981132, + "grad_norm": 0.298910528421402, + "learning_rate": 4.669811320754717e-05, + "log_odds_chosen": 6.439607620239258, + "log_odds_ratio": -0.03047255240380764, + "logits/chosen": -1.3439738750457764, + "logits/rejected": -3.9568533897399902, + "logps/chosen": -1.3088639974594116, + "logps/rejected": -7.318572044372559, + "loss": 1.6855, + "nll_loss": 1.6824290752410889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13088640570640564, + "rewards/margins": 0.6009708642959595, + "rewards/rejected": -0.7318572402000427, + "step": 35 + }, + { + "epoch": 0.13584905660377358, + "grad_norm": 0.4024220407009125, + "learning_rate": 4.660377358490566e-05, + "log_odds_chosen": 5.031867980957031, + "log_odds_ratio": -0.17651405930519104, + "logits/chosen": -0.4531555771827698, + "logits/rejected": -3.1221275329589844, + "logps/chosen": -1.6552730798721313, + "logps/rejected": -6.324914932250977, + "loss": 1.7674, + "nll_loss": 1.749765157699585, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16552734375, + "rewards/margins": 0.46696415543556213, + "rewards/rejected": -0.6324914693832397, + "step": 36 + }, + { + "epoch": 0.13962264150943396, + "grad_norm": 0.2770419418811798, + "learning_rate": 4.650943396226415e-05, + "log_odds_chosen": 5.099140167236328, + "log_odds_ratio": -0.25729480385780334, + "logits/chosen": -1.506839632987976, + "logits/rejected": -3.454437732696533, + "logps/chosen": -1.6088337898254395, + "logps/rejected": -6.49648904800415, + "loss": 1.5488, + "nll_loss": 1.5230274200439453, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.160883367061615, + "rewards/margins": 0.48876553773880005, + "rewards/rejected": -0.649648904800415, + "step": 37 + }, + { + "epoch": 0.14339622641509434, + "grad_norm": 0.31073063611984253, + "learning_rate": 4.641509433962264e-05, + "log_odds_chosen": 5.445122241973877, + "log_odds_ratio": -0.18372483551502228, + "logits/chosen": -1.1133687496185303, + "logits/rejected": -3.4876227378845215, + "logps/chosen": -1.4217114448547363, + "logps/rejected": -6.529870986938477, + "loss": 1.4282, + "nll_loss": 1.4098114967346191, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14217115938663483, + "rewards/margins": 0.5108159184455872, + "rewards/rejected": -0.6529870629310608, + "step": 38 + }, + { + "epoch": 0.1471698113207547, + "grad_norm": 0.29280173778533936, + "learning_rate": 4.6320754716981134e-05, + "log_odds_chosen": 6.195460319519043, + "log_odds_ratio": -0.035549942404031754, + "logits/chosen": -1.361371636390686, + "logits/rejected": -4.5884246826171875, + "logps/chosen": -1.7262309789657593, + "logps/rejected": -7.716752529144287, + "loss": 1.5956, + "nll_loss": 1.5920307636260986, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17262309789657593, + "rewards/margins": 0.5990520715713501, + "rewards/rejected": -0.7716752290725708, + "step": 39 + }, + { + "epoch": 0.1509433962264151, + "grad_norm": 0.28057682514190674, + "learning_rate": 4.6226415094339625e-05, + "log_odds_chosen": 6.020131587982178, + "log_odds_ratio": -0.03772380203008652, + "logits/chosen": -1.2496304512023926, + "logits/rejected": -4.043735504150391, + "logps/chosen": -1.566927433013916, + "logps/rejected": -7.254069805145264, + "loss": 1.4067, + "nll_loss": 1.402917742729187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1566927433013916, + "rewards/margins": 0.5687142610549927, + "rewards/rejected": -0.7254070043563843, + "step": 40 + }, + { + "epoch": 0.15471698113207547, + "grad_norm": 0.2948552072048187, + "learning_rate": 4.6132075471698117e-05, + "log_odds_chosen": 4.575827598571777, + "log_odds_ratio": -0.2891075015068054, + "logits/chosen": -0.34096649289131165, + "logits/rejected": -2.514559507369995, + "logps/chosen": -1.75924813747406, + "logps/rejected": -6.152600288391113, + "loss": 1.6047, + "nll_loss": 1.575812816619873, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.17592480778694153, + "rewards/margins": 0.4393352270126343, + "rewards/rejected": -0.6152600049972534, + "step": 41 + }, + { + "epoch": 0.15849056603773584, + "grad_norm": 0.3365479111671448, + "learning_rate": 4.603773584905661e-05, + "log_odds_chosen": 6.562654972076416, + "log_odds_ratio": -0.16193996369838715, + "logits/chosen": -1.5872305631637573, + "logits/rejected": -3.203104019165039, + "logps/chosen": -1.6002566814422607, + "logps/rejected": -7.7676682472229, + "loss": 1.7498, + "nll_loss": 1.7336182594299316, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1600256711244583, + "rewards/margins": 0.6167411208152771, + "rewards/rejected": -0.7767667770385742, + "step": 42 + }, + { + "epoch": 0.16226415094339622, + "grad_norm": 0.28234297037124634, + "learning_rate": 4.59433962264151e-05, + "log_odds_chosen": 5.429837226867676, + "log_odds_ratio": -0.21709167957305908, + "logits/chosen": 0.13629187643527985, + "logits/rejected": -3.1108312606811523, + "logps/chosen": -1.7401251792907715, + "logps/rejected": -7.000548362731934, + "loss": 1.5603, + "nll_loss": 1.538613200187683, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17401251196861267, + "rewards/margins": 0.5260423421859741, + "rewards/rejected": -0.7000548243522644, + "step": 43 + }, + { + "epoch": 0.1660377358490566, + "grad_norm": 0.30918800830841064, + "learning_rate": 4.584905660377359e-05, + "log_odds_chosen": 5.783122539520264, + "log_odds_ratio": -0.11934540420770645, + "logits/chosen": -0.9094568490982056, + "logits/rejected": -3.6608901023864746, + "logps/chosen": -1.6641435623168945, + "logps/rejected": -7.227668285369873, + "loss": 1.5404, + "nll_loss": 1.5284289121627808, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16641436517238617, + "rewards/margins": 0.5563524961471558, + "rewards/rejected": -0.7227668762207031, + "step": 44 + }, + { + "epoch": 0.16981132075471697, + "grad_norm": 0.3235601782798767, + "learning_rate": 4.575471698113208e-05, + "log_odds_chosen": 6.502612113952637, + "log_odds_ratio": -0.10429678857326508, + "logits/chosen": -1.07891047000885, + "logits/rejected": -3.1915483474731445, + "logps/chosen": -1.5495665073394775, + "logps/rejected": -7.755338668823242, + "loss": 1.548, + "nll_loss": 1.5375723838806152, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1549566686153412, + "rewards/margins": 0.6205772161483765, + "rewards/rejected": -0.7755338549613953, + "step": 45 + }, + { + "epoch": 0.17358490566037735, + "grad_norm": 0.26763808727264404, + "learning_rate": 4.566037735849057e-05, + "log_odds_chosen": 6.199982166290283, + "log_odds_ratio": -0.07396085560321808, + "logits/chosen": -1.4093493223190308, + "logits/rejected": -5.298598289489746, + "logps/chosen": -1.7217578887939453, + "logps/rejected": -7.687525749206543, + "loss": 1.5789, + "nll_loss": 1.571506381034851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.172175794839859, + "rewards/margins": 0.5965768098831177, + "rewards/rejected": -0.7687525749206543, + "step": 46 + }, + { + "epoch": 0.17735849056603772, + "grad_norm": 0.28478890657424927, + "learning_rate": 4.556603773584906e-05, + "log_odds_chosen": 5.49038028717041, + "log_odds_ratio": -0.17011021077632904, + "logits/chosen": -0.664116621017456, + "logits/rejected": -3.217937469482422, + "logps/chosen": -2.0000357627868652, + "logps/rejected": -7.373146057128906, + "loss": 1.7461, + "nll_loss": 1.7290459871292114, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.20000356435775757, + "rewards/margins": 0.5373110771179199, + "rewards/rejected": -0.7373145818710327, + "step": 47 + }, + { + "epoch": 0.1811320754716981, + "grad_norm": 0.29675430059432983, + "learning_rate": 4.547169811320755e-05, + "log_odds_chosen": 6.941376209259033, + "log_odds_ratio": -0.07056228816509247, + "logits/chosen": -0.9245410561561584, + "logits/rejected": -3.209742546081543, + "logps/chosen": -1.5620588064193726, + "logps/rejected": -8.255936622619629, + "loss": 1.6204, + "nll_loss": 1.6133677959442139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1562058925628662, + "rewards/margins": 0.6693878173828125, + "rewards/rejected": -0.8255936503410339, + "step": 48 + }, + { + "epoch": 0.18490566037735848, + "grad_norm": 0.3024120330810547, + "learning_rate": 4.537735849056604e-05, + "log_odds_chosen": 7.8667497634887695, + "log_odds_ratio": -0.11265160888433456, + "logits/chosen": -1.589120864868164, + "logits/rejected": -4.3788957595825195, + "logps/chosen": -1.7432844638824463, + "logps/rejected": -9.341209411621094, + "loss": 1.7561, + "nll_loss": 1.7448220252990723, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17432843148708344, + "rewards/margins": 0.7597925662994385, + "rewards/rejected": -0.9341210126876831, + "step": 49 + }, + { + "epoch": 0.18867924528301888, + "grad_norm": 0.27790966629981995, + "learning_rate": 4.528301886792453e-05, + "log_odds_chosen": 9.197233200073242, + "log_odds_ratio": -0.0743798017501831, + "logits/chosen": -2.399343729019165, + "logits/rejected": -5.902744770050049, + "logps/chosen": -1.7229493856430054, + "logps/rejected": -10.651434898376465, + "loss": 1.6465, + "nll_loss": 1.6390764713287354, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17229494452476501, + "rewards/margins": 0.8928486108779907, + "rewards/rejected": -1.0651434659957886, + "step": 50 + }, + { + "epoch": 0.19245283018867926, + "grad_norm": 0.28730452060699463, + "learning_rate": 4.518867924528302e-05, + "log_odds_chosen": 7.1606950759887695, + "log_odds_ratio": -0.15011747181415558, + "logits/chosen": -0.7371699810028076, + "logits/rejected": -3.6732337474823, + "logps/chosen": -1.4196903705596924, + "logps/rejected": -8.275727272033691, + "loss": 1.5138, + "nll_loss": 1.4987900257110596, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14196905493736267, + "rewards/margins": 0.685603678226471, + "rewards/rejected": -0.8275727033615112, + "step": 51 + }, + { + "epoch": 0.19622641509433963, + "grad_norm": 0.3072900176048279, + "learning_rate": 4.509433962264151e-05, + "log_odds_chosen": 9.726778030395508, + "log_odds_ratio": -0.0466623455286026, + "logits/chosen": -2.5045127868652344, + "logits/rejected": -5.068806171417236, + "logps/chosen": -1.6165649890899658, + "logps/rejected": -11.054706573486328, + "loss": 1.4102, + "nll_loss": 1.4055166244506836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16165651381015778, + "rewards/margins": 0.9438142776489258, + "rewards/rejected": -1.1054707765579224, + "step": 52 + }, + { + "epoch": 0.2, + "grad_norm": 0.2943893074989319, + "learning_rate": 4.5e-05, + "log_odds_chosen": 10.128807067871094, + "log_odds_ratio": -0.0014914250932633877, + "logits/chosen": -1.9018776416778564, + "logits/rejected": -4.2667951583862305, + "logps/chosen": -1.5886954069137573, + "logps/rejected": -11.448442459106445, + "loss": 1.6031, + "nll_loss": 1.6029250621795654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15886953473091125, + "rewards/margins": 0.9859746694564819, + "rewards/rejected": -1.1448442935943604, + "step": 53 + }, + { + "epoch": 0.2037735849056604, + "grad_norm": 0.2680610418319702, + "learning_rate": 4.4905660377358494e-05, + "log_odds_chosen": 8.853492736816406, + "log_odds_ratio": -0.04176551476120949, + "logits/chosen": -1.450791835784912, + "logits/rejected": -5.139822006225586, + "logps/chosen": -1.6527010202407837, + "logps/rejected": -10.218703269958496, + "loss": 1.6926, + "nll_loss": 1.6884214878082275, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16527009010314941, + "rewards/margins": 0.8566002249717712, + "rewards/rejected": -1.0218703746795654, + "step": 54 + }, + { + "epoch": 0.20754716981132076, + "grad_norm": 0.32189199328422546, + "learning_rate": 4.4811320754716985e-05, + "log_odds_chosen": 11.52000617980957, + "log_odds_ratio": -0.000579544750507921, + "logits/chosen": -1.3866709470748901, + "logits/rejected": -3.310800790786743, + "logps/chosen": -1.6044374704360962, + "logps/rejected": -12.8070068359375, + "loss": 1.527, + "nll_loss": 1.5269696712493896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1604437530040741, + "rewards/margins": 1.120257019996643, + "rewards/rejected": -1.2807008028030396, + "step": 55 + }, + { + "epoch": 0.21132075471698114, + "grad_norm": 0.2579837739467621, + "learning_rate": 4.4716981132075476e-05, + "log_odds_chosen": 8.566587448120117, + "log_odds_ratio": -0.025317970663309097, + "logits/chosen": -0.9709105491638184, + "logits/rejected": -5.532227993011475, + "logps/chosen": -1.6428523063659668, + "logps/rejected": -9.977346420288086, + "loss": 1.5057, + "nll_loss": 1.5031633377075195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16428521275520325, + "rewards/margins": 0.8334494829177856, + "rewards/rejected": -0.9977346658706665, + "step": 56 + }, + { + "epoch": 0.21509433962264152, + "grad_norm": 0.32639437913894653, + "learning_rate": 4.462264150943397e-05, + "log_odds_chosen": 10.306001663208008, + "log_odds_ratio": -0.03766224905848503, + "logits/chosen": -1.1223177909851074, + "logits/rejected": -2.9572572708129883, + "logps/chosen": -1.6183445453643799, + "logps/rejected": -11.599272727966309, + "loss": 1.5893, + "nll_loss": 1.5855064392089844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1618344485759735, + "rewards/margins": 0.998092770576477, + "rewards/rejected": -1.159927248954773, + "step": 57 + }, + { + "epoch": 0.2188679245283019, + "grad_norm": 0.25903022289276123, + "learning_rate": 4.452830188679246e-05, + "log_odds_chosen": 8.881806373596191, + "log_odds_ratio": -0.10347943007946014, + "logits/chosen": -0.9591866731643677, + "logits/rejected": -4.289064407348633, + "logps/chosen": -1.6897010803222656, + "logps/rejected": -10.346776962280273, + "loss": 1.6166, + "nll_loss": 1.606278419494629, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16897010803222656, + "rewards/margins": 0.8657075762748718, + "rewards/rejected": -1.0346777439117432, + "step": 58 + }, + { + "epoch": 0.22264150943396227, + "grad_norm": 0.2665920853614807, + "learning_rate": 4.443396226415095e-05, + "log_odds_chosen": 11.143839836120605, + "log_odds_ratio": -0.01865122653543949, + "logits/chosen": -2.2705538272857666, + "logits/rejected": -4.639202117919922, + "logps/chosen": -1.646267056465149, + "logps/rejected": -12.43774127960205, + "loss": 1.5823, + "nll_loss": 1.58046555519104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16462671756744385, + "rewards/margins": 1.0791475772857666, + "rewards/rejected": -1.243774175643921, + "step": 59 + }, + { + "epoch": 0.22641509433962265, + "grad_norm": 0.2563314139842987, + "learning_rate": 4.433962264150944e-05, + "log_odds_chosen": 11.137109756469727, + "log_odds_ratio": -0.03539396822452545, + "logits/chosen": -0.5501875281333923, + "logits/rejected": -4.503236770629883, + "logps/chosen": -1.5203689336776733, + "logps/rejected": -12.256806373596191, + "loss": 1.5345, + "nll_loss": 1.5309462547302246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1520369052886963, + "rewards/margins": 1.073643684387207, + "rewards/rejected": -1.2256807088851929, + "step": 60 + }, + { + "epoch": 0.23018867924528302, + "grad_norm": 0.3310481011867523, + "learning_rate": 4.4245283018867925e-05, + "log_odds_chosen": 10.03518295288086, + "log_odds_ratio": -0.09628309309482574, + "logits/chosen": -1.4740400314331055, + "logits/rejected": -4.722970962524414, + "logps/chosen": -1.992842435836792, + "logps/rejected": -11.883909225463867, + "loss": 1.7703, + "nll_loss": 1.7606245279312134, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.19928425550460815, + "rewards/margins": 0.9891066551208496, + "rewards/rejected": -1.1883909702301025, + "step": 61 + }, + { + "epoch": 0.2339622641509434, + "grad_norm": 0.3190458416938782, + "learning_rate": 4.4150943396226416e-05, + "log_odds_chosen": 9.210914611816406, + "log_odds_ratio": -0.040729597210884094, + "logits/chosen": -0.7013575434684753, + "logits/rejected": -3.3500924110412598, + "logps/chosen": -1.950926423072815, + "logps/rejected": -10.972383499145508, + "loss": 1.752, + "nll_loss": 1.7479382753372192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19509264826774597, + "rewards/margins": 0.9021456837654114, + "rewards/rejected": -1.097238302230835, + "step": 62 + }, + { + "epoch": 0.23773584905660378, + "grad_norm": 0.3573385179042816, + "learning_rate": 4.405660377358491e-05, + "log_odds_chosen": 11.01914119720459, + "log_odds_ratio": -0.06296153366565704, + "logits/chosen": -1.7574726343154907, + "logits/rejected": -3.601536989212036, + "logps/chosen": -1.648729920387268, + "logps/rejected": -12.339969635009766, + "loss": 1.528, + "nll_loss": 1.5217459201812744, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16487298905849457, + "rewards/margins": 1.0691239833831787, + "rewards/rejected": -1.233996868133545, + "step": 63 + }, + { + "epoch": 0.24150943396226415, + "grad_norm": 0.2962454557418823, + "learning_rate": 4.39622641509434e-05, + "log_odds_chosen": 10.007362365722656, + "log_odds_ratio": -0.043272338807582855, + "logits/chosen": -1.2760871648788452, + "logits/rejected": -4.875994682312012, + "logps/chosen": -1.6131874322891235, + "logps/rejected": -11.287424087524414, + "loss": 1.5696, + "nll_loss": 1.5652244091033936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16131874918937683, + "rewards/margins": 0.967423677444458, + "rewards/rejected": -1.1287423372268677, + "step": 64 + }, + { + "epoch": 0.24528301886792453, + "grad_norm": 0.3105560541152954, + "learning_rate": 4.386792452830189e-05, + "log_odds_chosen": 11.872138977050781, + "log_odds_ratio": -0.07399442046880722, + "logits/chosen": -0.6608960628509521, + "logits/rejected": -3.028031349182129, + "logps/chosen": -1.554811716079712, + "logps/rejected": -13.131336212158203, + "loss": 1.3613, + "nll_loss": 1.3538590669631958, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15548115968704224, + "rewards/margins": 1.1576523780822754, + "rewards/rejected": -1.313133716583252, + "step": 65 + }, + { + "epoch": 0.2490566037735849, + "grad_norm": 0.27170437574386597, + "learning_rate": 4.377358490566038e-05, + "log_odds_chosen": 9.97773265838623, + "log_odds_ratio": -0.05262986570596695, + "logits/chosen": -1.7461464405059814, + "logits/rejected": -3.9088220596313477, + "logps/chosen": -1.6022846698760986, + "logps/rejected": -11.324554443359375, + "loss": 1.5483, + "nll_loss": 1.542998194694519, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16022847592830658, + "rewards/margins": 0.9722269177436829, + "rewards/rejected": -1.1324553489685059, + "step": 66 + }, + { + "epoch": 0.2528301886792453, + "grad_norm": 0.34835413098335266, + "learning_rate": 4.367924528301887e-05, + "log_odds_chosen": 9.539581298828125, + "log_odds_ratio": -0.11755736172199249, + "logits/chosen": -1.313893437385559, + "logits/rejected": -3.734592914581299, + "logps/chosen": -1.4113194942474365, + "logps/rejected": -10.656464576721191, + "loss": 1.3774, + "nll_loss": 1.3656796216964722, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1411319375038147, + "rewards/margins": 0.9245145320892334, + "rewards/rejected": -1.0656465291976929, + "step": 67 + }, + { + "epoch": 0.25660377358490566, + "grad_norm": 0.2896377444267273, + "learning_rate": 4.358490566037736e-05, + "log_odds_chosen": 10.555123329162598, + "log_odds_ratio": -0.08397966623306274, + "logits/chosen": -0.9625846743583679, + "logits/rejected": -3.1470422744750977, + "logps/chosen": -1.5027086734771729, + "logps/rejected": -11.76999282836914, + "loss": 1.5365, + "nll_loss": 1.5281280279159546, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15027087926864624, + "rewards/margins": 1.0267283916473389, + "rewards/rejected": -1.1769993305206299, + "step": 68 + }, + { + "epoch": 0.26037735849056604, + "grad_norm": 0.2656661868095398, + "learning_rate": 4.3490566037735853e-05, + "log_odds_chosen": 11.847007751464844, + "log_odds_ratio": -0.0024143236223608255, + "logits/chosen": -2.249497890472412, + "logits/rejected": -4.6625542640686035, + "logps/chosen": -1.611348271369934, + "logps/rejected": -13.128585815429688, + "loss": 1.3686, + "nll_loss": 1.368332862854004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16113483905792236, + "rewards/margins": 1.1517237424850464, + "rewards/rejected": -1.3128585815429688, + "step": 69 + }, + { + "epoch": 0.2641509433962264, + "grad_norm": 0.2848161458969116, + "learning_rate": 4.3396226415094345e-05, + "log_odds_chosen": 11.41952896118164, + "log_odds_ratio": -0.06205465644598007, + "logits/chosen": -1.0198203325271606, + "logits/rejected": -4.255819320678711, + "logps/chosen": -1.6085125207901, + "logps/rejected": -12.758248329162598, + "loss": 1.5198, + "nll_loss": 1.5136139392852783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16085125505924225, + "rewards/margins": 1.114973545074463, + "rewards/rejected": -1.2758249044418335, + "step": 70 + }, + { + "epoch": 0.2679245283018868, + "grad_norm": 0.3033360540866852, + "learning_rate": 4.3301886792452836e-05, + "log_odds_chosen": 8.192865371704102, + "log_odds_ratio": -0.06372419744729996, + "logits/chosen": -2.1323366165161133, + "logits/rejected": -4.94224214553833, + "logps/chosen": -1.7140583992004395, + "logps/rejected": -9.679903030395508, + "loss": 1.6245, + "nll_loss": 1.6181257963180542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1714058518409729, + "rewards/margins": 0.7965843677520752, + "rewards/rejected": -0.9679902791976929, + "step": 71 + }, + { + "epoch": 0.27169811320754716, + "grad_norm": 0.35890570282936096, + "learning_rate": 4.320754716981133e-05, + "log_odds_chosen": 9.654875755310059, + "log_odds_ratio": -0.13519693911075592, + "logits/chosen": -0.6182310581207275, + "logits/rejected": -2.691154718399048, + "logps/chosen": -1.8398606777191162, + "logps/rejected": -11.286558151245117, + "loss": 1.5057, + "nll_loss": 1.492200255393982, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.18398606777191162, + "rewards/margins": 0.9446697235107422, + "rewards/rejected": -1.1286557912826538, + "step": 72 + }, + { + "epoch": 0.27547169811320754, + "grad_norm": 1.4705555438995361, + "learning_rate": 4.311320754716982e-05, + "log_odds_chosen": 13.209151268005371, + "log_odds_ratio": -0.0005382616654969752, + "logits/chosen": -0.8367394804954529, + "logits/rejected": -4.00326681137085, + "logps/chosen": -1.536256194114685, + "logps/rejected": -14.438907623291016, + "loss": 1.4609, + "nll_loss": 1.4608838558197021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15362560749053955, + "rewards/margins": 1.2902652025222778, + "rewards/rejected": -1.4438908100128174, + "step": 73 + }, + { + "epoch": 0.2792452830188679, + "grad_norm": 0.33669015765190125, + "learning_rate": 4.301886792452831e-05, + "log_odds_chosen": 11.09627628326416, + "log_odds_ratio": -0.09562374651432037, + "logits/chosen": -1.0604119300842285, + "logits/rejected": -2.2503695487976074, + "logps/chosen": -1.7556407451629639, + "logps/rejected": -12.555806159973145, + "loss": 1.3805, + "nll_loss": 1.3709644079208374, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17556408047676086, + "rewards/margins": 1.0800164937973022, + "rewards/rejected": -1.2555806636810303, + "step": 74 + }, + { + "epoch": 0.2830188679245283, + "grad_norm": 0.26332709193229675, + "learning_rate": 4.292452830188679e-05, + "log_odds_chosen": 11.302566528320312, + "log_odds_ratio": -0.013619111850857735, + "logits/chosen": -1.1802372932434082, + "logits/rejected": -4.334959506988525, + "logps/chosen": -1.7836018800735474, + "logps/rejected": -12.880077362060547, + "loss": 1.6039, + "nll_loss": 1.6024914979934692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17836017906665802, + "rewards/margins": 1.1096476316452026, + "rewards/rejected": -1.2880078554153442, + "step": 75 + }, + { + "epoch": 0.28679245283018867, + "grad_norm": 0.2604648470878601, + "learning_rate": 4.2830188679245284e-05, + "log_odds_chosen": 11.805458068847656, + "log_odds_ratio": -0.09639889001846313, + "logits/chosen": -1.4125428199768066, + "logits/rejected": -4.383103847503662, + "logps/chosen": -1.753788948059082, + "logps/rejected": -13.35352897644043, + "loss": 1.5627, + "nll_loss": 1.5530548095703125, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17537888884544373, + "rewards/margins": 1.1599740982055664, + "rewards/rejected": -1.3353530168533325, + "step": 76 + }, + { + "epoch": 0.29056603773584905, + "grad_norm": 0.2621481418609619, + "learning_rate": 4.2735849056603775e-05, + "log_odds_chosen": 12.079008102416992, + "log_odds_ratio": -0.029307007789611816, + "logits/chosen": -1.0459429025650024, + "logits/rejected": -4.2921833992004395, + "logps/chosen": -1.7573282718658447, + "logps/rejected": -13.61059856414795, + "loss": 1.6019, + "nll_loss": 1.5990142822265625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17573282122612, + "rewards/margins": 1.1853270530700684, + "rewards/rejected": -1.3610599040985107, + "step": 77 + }, + { + "epoch": 0.2943396226415094, + "grad_norm": 0.23861609399318695, + "learning_rate": 4.2641509433962266e-05, + "log_odds_chosen": 14.536327362060547, + "log_odds_ratio": -0.006748859770596027, + "logits/chosen": -1.6867283582687378, + "logits/rejected": -4.363768100738525, + "logps/chosen": -1.6174168586730957, + "logps/rejected": -15.880059242248535, + "loss": 1.5851, + "nll_loss": 1.5844483375549316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16174167394638062, + "rewards/margins": 1.4262642860412598, + "rewards/rejected": -1.5880060195922852, + "step": 78 + }, + { + "epoch": 0.2981132075471698, + "grad_norm": 0.26430046558380127, + "learning_rate": 4.254716981132076e-05, + "log_odds_chosen": 10.643120765686035, + "log_odds_ratio": -0.15290699899196625, + "logits/chosen": -1.1397204399108887, + "logits/rejected": -4.667537689208984, + "logps/chosen": -1.5166621208190918, + "logps/rejected": -11.917654991149902, + "loss": 1.4211, + "nll_loss": 1.4057632684707642, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15166620910167694, + "rewards/margins": 1.0400993824005127, + "rewards/rejected": -1.191765546798706, + "step": 79 + }, + { + "epoch": 0.3018867924528302, + "grad_norm": 0.26361650228500366, + "learning_rate": 4.245283018867925e-05, + "log_odds_chosen": 14.004961967468262, + "log_odds_ratio": -0.0008121158462017775, + "logits/chosen": -1.747410774230957, + "logits/rejected": -5.366614818572998, + "logps/chosen": -1.7156703472137451, + "logps/rejected": -15.486541748046875, + "loss": 1.6296, + "nll_loss": 1.6295278072357178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17156702280044556, + "rewards/margins": 1.377087116241455, + "rewards/rejected": -1.5486541986465454, + "step": 80 + }, + { + "epoch": 0.30566037735849055, + "grad_norm": 0.3431408405303955, + "learning_rate": 4.235849056603774e-05, + "log_odds_chosen": 9.586076736450195, + "log_odds_ratio": -0.07982049137353897, + "logits/chosen": -2.0183656215667725, + "logits/rejected": -4.584896564483643, + "logps/chosen": -1.6326284408569336, + "logps/rejected": -10.968765258789062, + "loss": 1.3615, + "nll_loss": 1.3535064458847046, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16326284408569336, + "rewards/margins": 0.9336137175559998, + "rewards/rejected": -1.096876621246338, + "step": 81 + }, + { + "epoch": 0.30943396226415093, + "grad_norm": 0.29136648774147034, + "learning_rate": 4.226415094339623e-05, + "log_odds_chosen": 12.734142303466797, + "log_odds_ratio": -0.004932960495352745, + "logits/chosen": -2.464571952819824, + "logits/rejected": -4.204010009765625, + "logps/chosen": -1.1688156127929688, + "logps/rejected": -13.363677978515625, + "loss": 1.0781, + "nll_loss": 1.0775874853134155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11688156425952911, + "rewards/margins": 1.2194862365722656, + "rewards/rejected": -1.3363678455352783, + "step": 82 + }, + { + "epoch": 0.3132075471698113, + "grad_norm": 0.3025481402873993, + "learning_rate": 4.216981132075472e-05, + "log_odds_chosen": 11.096177101135254, + "log_odds_ratio": -0.0837731808423996, + "logits/chosen": -0.643512487411499, + "logits/rejected": -2.385446071624756, + "logps/chosen": -1.6675033569335938, + "logps/rejected": -12.507808685302734, + "loss": 1.4565, + "nll_loss": 1.4480957984924316, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16675035655498505, + "rewards/margins": 1.0840305089950562, + "rewards/rejected": -1.2507808208465576, + "step": 83 + }, + { + "epoch": 0.3169811320754717, + "grad_norm": 0.2666255831718445, + "learning_rate": 4.207547169811321e-05, + "log_odds_chosen": 12.243534088134766, + "log_odds_ratio": -0.17083272337913513, + "logits/chosen": -1.1914499998092651, + "logits/rejected": -4.547676086425781, + "logps/chosen": -1.4934356212615967, + "logps/rejected": -13.507254600524902, + "loss": 1.5673, + "nll_loss": 1.5502480268478394, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1493435651063919, + "rewards/margins": 1.2013819217681885, + "rewards/rejected": -1.3507256507873535, + "step": 84 + }, + { + "epoch": 0.32075471698113206, + "grad_norm": 0.3045007884502411, + "learning_rate": 4.1981132075471704e-05, + "log_odds_chosen": 12.160042762756348, + "log_odds_ratio": -0.08180340379476547, + "logits/chosen": -2.2346179485321045, + "logits/rejected": -4.7149529457092285, + "logps/chosen": -1.7960240840911865, + "logps/rejected": -13.734954833984375, + "loss": 1.5391, + "nll_loss": 1.5308822393417358, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17960241436958313, + "rewards/margins": 1.1938930749893188, + "rewards/rejected": -1.3734955787658691, + "step": 85 + }, + { + "epoch": 0.32452830188679244, + "grad_norm": 0.324097216129303, + "learning_rate": 4.1886792452830195e-05, + "log_odds_chosen": 13.820845603942871, + "log_odds_ratio": -0.0012615115847438574, + "logits/chosen": -1.7233232259750366, + "logits/rejected": -4.733954906463623, + "logps/chosen": -1.5087658166885376, + "logps/rejected": -15.039979934692383, + "loss": 1.4518, + "nll_loss": 1.451686143875122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15087658166885376, + "rewards/margins": 1.353121280670166, + "rewards/rejected": -1.5039979219436646, + "step": 86 + }, + { + "epoch": 0.3283018867924528, + "grad_norm": 0.31188490986824036, + "learning_rate": 4.1792452830188686e-05, + "log_odds_chosen": 13.122318267822266, + "log_odds_ratio": -0.026753831654787064, + "logits/chosen": -1.6933764219284058, + "logits/rejected": -3.5955612659454346, + "logps/chosen": -1.3663079738616943, + "logps/rejected": -14.080284118652344, + "loss": 1.3399, + "nll_loss": 1.3372132778167725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13663078844547272, + "rewards/margins": 1.271397590637207, + "rewards/rejected": -1.4080283641815186, + "step": 87 + }, + { + "epoch": 0.3320754716981132, + "grad_norm": 0.2510411739349365, + "learning_rate": 4.169811320754717e-05, + "log_odds_chosen": 13.573080062866211, + "log_odds_ratio": -0.07667991518974304, + "logits/chosen": -1.1544338464736938, + "logits/rejected": -5.001407146453857, + "logps/chosen": -1.331773042678833, + "logps/rejected": -14.553543090820312, + "loss": 1.4304, + "nll_loss": 1.4226869344711304, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13317731022834778, + "rewards/margins": 1.3221771717071533, + "rewards/rejected": -1.4553543329238892, + "step": 88 + }, + { + "epoch": 0.33584905660377357, + "grad_norm": 0.31842777132987976, + "learning_rate": 4.160377358490566e-05, + "log_odds_chosen": 13.996376037597656, + "log_odds_ratio": -0.0015381659613922238, + "logits/chosen": -2.5720713138580322, + "logits/rejected": -4.779295921325684, + "logps/chosen": -1.5352387428283691, + "logps/rejected": -15.243785858154297, + "loss": 1.6801, + "nll_loss": 1.6799163818359375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15352387726306915, + "rewards/margins": 1.3708547353744507, + "rewards/rejected": -1.524378776550293, + "step": 89 + }, + { + "epoch": 0.33962264150943394, + "grad_norm": 0.30600160360336304, + "learning_rate": 4.150943396226415e-05, + "log_odds_chosen": 14.847908973693848, + "log_odds_ratio": -0.0009415854001417756, + "logits/chosen": -0.8421066999435425, + "logits/rejected": -3.2322371006011963, + "logps/chosen": -1.3275384902954102, + "logps/rejected": -15.784387588500977, + "loss": 1.324, + "nll_loss": 1.323903203010559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13275384902954102, + "rewards/margins": 1.4456850290298462, + "rewards/rejected": -1.5784387588500977, + "step": 90 + }, + { + "epoch": 0.3433962264150943, + "grad_norm": 0.30167537927627563, + "learning_rate": 4.1415094339622644e-05, + "log_odds_chosen": 12.118502616882324, + "log_odds_ratio": -0.08149873465299606, + "logits/chosen": -0.8174037337303162, + "logits/rejected": -3.398536443710327, + "logps/chosen": -1.4228649139404297, + "logps/rejected": -13.159191131591797, + "loss": 1.3606, + "nll_loss": 1.35248601436615, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1422865092754364, + "rewards/margins": 1.1736326217651367, + "rewards/rejected": -1.3159191608428955, + "step": 91 + }, + { + "epoch": 0.3471698113207547, + "grad_norm": 0.4155506491661072, + "learning_rate": 4.1320754716981135e-05, + "log_odds_chosen": 13.490291595458984, + "log_odds_ratio": -0.11288166791200638, + "logits/chosen": -0.7610112428665161, + "logits/rejected": -3.971395492553711, + "logps/chosen": -1.609310269355774, + "logps/rejected": -14.705669403076172, + "loss": 1.6664, + "nll_loss": 1.6551594734191895, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16093102097511292, + "rewards/margins": 1.3096359968185425, + "rewards/rejected": -1.470566987991333, + "step": 92 + }, + { + "epoch": 0.35094339622641507, + "grad_norm": 0.2882811427116394, + "learning_rate": 4.1226415094339626e-05, + "log_odds_chosen": 13.462080001831055, + "log_odds_ratio": -0.15872977674007416, + "logits/chosen": -1.3535653352737427, + "logits/rejected": -4.049402236938477, + "logps/chosen": -1.4857981204986572, + "logps/rejected": -14.58334732055664, + "loss": 1.4598, + "nll_loss": 1.4439499378204346, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14857982099056244, + "rewards/margins": 1.3097549676895142, + "rewards/rejected": -1.4583346843719482, + "step": 93 + }, + { + "epoch": 0.35471698113207545, + "grad_norm": 0.3006541430950165, + "learning_rate": 4.113207547169812e-05, + "log_odds_chosen": 12.660806655883789, + "log_odds_ratio": -0.05375822260975838, + "logits/chosen": -2.8630645275115967, + "logits/rejected": -5.452940464019775, + "logps/chosen": -1.785542368888855, + "logps/rejected": -14.212843894958496, + "loss": 1.7117, + "nll_loss": 1.7063469886779785, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1785542368888855, + "rewards/margins": 1.2427302598953247, + "rewards/rejected": -1.4212844371795654, + "step": 94 + }, + { + "epoch": 0.3584905660377358, + "grad_norm": 0.2722157835960388, + "learning_rate": 4.103773584905661e-05, + "log_odds_chosen": 15.298785209655762, + "log_odds_ratio": -0.000986977363936603, + "logits/chosen": -1.7740228176116943, + "logits/rejected": -4.849878311157227, + "logps/chosen": -1.6763180494308472, + "logps/rejected": -16.691926956176758, + "loss": 1.6459, + "nll_loss": 1.645756721496582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16763180494308472, + "rewards/margins": 1.5015610456466675, + "rewards/rejected": -1.6691927909851074, + "step": 95 + }, + { + "epoch": 0.3622641509433962, + "grad_norm": 0.5683091878890991, + "learning_rate": 4.09433962264151e-05, + "log_odds_chosen": 13.669389724731445, + "log_odds_ratio": -0.0002820601512212306, + "logits/chosen": -3.1163902282714844, + "logits/rejected": -6.106686115264893, + "logps/chosen": -1.557908058166504, + "logps/rejected": -14.852912902832031, + "loss": 1.4868, + "nll_loss": 1.4867894649505615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1557908058166504, + "rewards/margins": 1.329500436782837, + "rewards/rejected": -1.4852913618087769, + "step": 96 + }, + { + "epoch": 0.3660377358490566, + "grad_norm": 0.2856326699256897, + "learning_rate": 4.084905660377359e-05, + "log_odds_chosen": 15.168952941894531, + "log_odds_ratio": -0.07142313569784164, + "logits/chosen": -1.4473392963409424, + "logits/rejected": -4.727592468261719, + "logps/chosen": -1.3984313011169434, + "logps/rejected": -16.263202667236328, + "loss": 1.581, + "nll_loss": 1.5738685131072998, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13984312117099762, + "rewards/margins": 1.486477255821228, + "rewards/rejected": -1.6263203620910645, + "step": 97 + }, + { + "epoch": 0.36981132075471695, + "grad_norm": 0.29159900546073914, + "learning_rate": 4.075471698113208e-05, + "log_odds_chosen": 15.751093864440918, + "log_odds_ratio": -0.00026286751381121576, + "logits/chosen": -3.1162948608398438, + "logits/rejected": -5.55817985534668, + "logps/chosen": -1.6828699111938477, + "logps/rejected": -17.040075302124023, + "loss": 1.4492, + "nll_loss": 1.4491852521896362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.168286994099617, + "rewards/margins": 1.5357205867767334, + "rewards/rejected": -1.704007625579834, + "step": 98 + }, + { + "epoch": 0.37358490566037733, + "grad_norm": 0.27507874369621277, + "learning_rate": 4.066037735849057e-05, + "log_odds_chosen": 13.987431526184082, + "log_odds_ratio": -0.07922342419624329, + "logits/chosen": -1.512285590171814, + "logits/rejected": -6.580392837524414, + "logps/chosen": -1.6363471746444702, + "logps/rejected": -15.355705261230469, + "loss": 1.4966, + "nll_loss": 1.4886995553970337, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16363471746444702, + "rewards/margins": 1.3719358444213867, + "rewards/rejected": -1.535570502281189, + "step": 99 + }, + { + "epoch": 0.37735849056603776, + "grad_norm": 0.25051409006118774, + "learning_rate": 4.0566037735849064e-05, + "log_odds_chosen": 15.439388275146484, + "log_odds_ratio": -5.675078136846423e-05, + "logits/chosen": -1.3515607118606567, + "logits/rejected": -5.860863208770752, + "logps/chosen": -1.4820207357406616, + "logps/rejected": -16.625112533569336, + "loss": 1.5335, + "nll_loss": 1.5334585905075073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1482020765542984, + "rewards/margins": 1.5143091678619385, + "rewards/rejected": -1.6625111103057861, + "step": 100 + }, + { + "epoch": 0.38113207547169814, + "grad_norm": 0.3001623749732971, + "learning_rate": 4.047169811320755e-05, + "log_odds_chosen": 12.887317657470703, + "log_odds_ratio": -0.04469170421361923, + "logits/chosen": -2.702786684036255, + "logits/rejected": -4.913610458374023, + "logps/chosen": -1.787389874458313, + "logps/rejected": -14.465229988098145, + "loss": 1.5049, + "nll_loss": 1.5004353523254395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17873898148536682, + "rewards/margins": 1.2677841186523438, + "rewards/rejected": -1.4465229511260986, + "step": 101 + }, + { + "epoch": 0.3849056603773585, + "grad_norm": 0.34954193234443665, + "learning_rate": 4.037735849056604e-05, + "log_odds_chosen": 13.664055824279785, + "log_odds_ratio": -0.1444215476512909, + "logits/chosen": -1.608425498008728, + "logits/rejected": -4.3469367027282715, + "logps/chosen": -1.8433992862701416, + "logps/rejected": -15.285165786743164, + "loss": 1.6168, + "nll_loss": 1.60233736038208, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.18433992564678192, + "rewards/margins": 1.3441766500473022, + "rewards/rejected": -1.5285166501998901, + "step": 102 + }, + { + "epoch": 0.3886792452830189, + "grad_norm": 0.3157506287097931, + "learning_rate": 4.028301886792453e-05, + "log_odds_chosen": 11.686209678649902, + "log_odds_ratio": -0.10109421610832214, + "logits/chosen": -0.873611330986023, + "logits/rejected": -3.7620019912719727, + "logps/chosen": -1.6527265310287476, + "logps/rejected": -13.108765602111816, + "loss": 1.5602, + "nll_loss": 1.5500413179397583, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16527265310287476, + "rewards/margins": 1.145603895187378, + "rewards/rejected": -1.3108766078948975, + "step": 103 + }, + { + "epoch": 0.39245283018867927, + "grad_norm": 0.2715020477771759, + "learning_rate": 4.018867924528302e-05, + "log_odds_chosen": 11.378678321838379, + "log_odds_ratio": -0.13346989452838898, + "logits/chosen": -1.1194851398468018, + "logits/rejected": -4.566447734832764, + "logps/chosen": -1.6644248962402344, + "logps/rejected": -12.777153015136719, + "loss": 1.5005, + "nll_loss": 1.487147331237793, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16644248366355896, + "rewards/margins": 1.1112728118896484, + "rewards/rejected": -1.2777153253555298, + "step": 104 + }, + { + "epoch": 0.39622641509433965, + "grad_norm": 0.29437255859375, + "learning_rate": 4.009433962264151e-05, + "log_odds_chosen": 15.541433334350586, + "log_odds_ratio": -5.782198059023358e-05, + "logits/chosen": -1.276192307472229, + "logits/rejected": -4.482449054718018, + "logps/chosen": -1.7318949699401855, + "logps/rejected": -17.009288787841797, + "loss": 1.5203, + "nll_loss": 1.5202912092208862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17318949103355408, + "rewards/margins": 1.5277395248413086, + "rewards/rejected": -1.700929045677185, + "step": 105 + }, + { + "epoch": 0.4, + "grad_norm": 0.28076162934303284, + "learning_rate": 4e-05, + "log_odds_chosen": 15.423474311828613, + "log_odds_ratio": -0.003808586858212948, + "logits/chosen": -1.461142659187317, + "logits/rejected": -4.638182163238525, + "logps/chosen": -1.5100231170654297, + "logps/rejected": -16.47011947631836, + "loss": 1.497, + "nll_loss": 1.4965708255767822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15100233256816864, + "rewards/margins": 1.4960097074508667, + "rewards/rejected": -1.6470119953155518, + "step": 106 + }, + { + "epoch": 0.4037735849056604, + "grad_norm": 0.28875720500946045, + "learning_rate": 3.9905660377358494e-05, + "log_odds_chosen": 12.895769119262695, + "log_odds_ratio": -0.060368865728378296, + "logits/chosen": -2.2072086334228516, + "logits/rejected": -5.86301851272583, + "logps/chosen": -1.8704140186309814, + "logps/rejected": -14.505237579345703, + "loss": 1.5992, + "nll_loss": 1.593145489692688, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.18704140186309814, + "rewards/margins": 1.2634824514389038, + "rewards/rejected": -1.4505239725112915, + "step": 107 + }, + { + "epoch": 0.4075471698113208, + "grad_norm": 0.2719891369342804, + "learning_rate": 3.9811320754716985e-05, + "log_odds_chosen": 15.212738037109375, + "log_odds_ratio": -0.06606274098157883, + "logits/chosen": -1.9831483364105225, + "logits/rejected": -5.709364414215088, + "logps/chosen": -1.6806718111038208, + "logps/rejected": -16.673917770385742, + "loss": 1.5861, + "nll_loss": 1.5794475078582764, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16806718707084656, + "rewards/margins": 1.4993247985839844, + "rewards/rejected": -1.6673917770385742, + "step": 108 + }, + { + "epoch": 0.41132075471698115, + "grad_norm": 0.30551427602767944, + "learning_rate": 3.9716981132075477e-05, + "log_odds_chosen": 13.554816246032715, + "log_odds_ratio": -0.17723311483860016, + "logits/chosen": -1.8992154598236084, + "logits/rejected": -4.842866897583008, + "logps/chosen": -1.408470869064331, + "logps/rejected": -14.5311861038208, + "loss": 1.4911, + "nll_loss": 1.4734236001968384, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1408470869064331, + "rewards/margins": 1.3122715950012207, + "rewards/rejected": -1.4531188011169434, + "step": 109 + }, + { + "epoch": 0.41509433962264153, + "grad_norm": 0.25901442766189575, + "learning_rate": 3.962264150943397e-05, + "log_odds_chosen": 14.055702209472656, + "log_odds_ratio": -0.07455631345510483, + "logits/chosen": -1.1471797227859497, + "logits/rejected": -4.136468887329102, + "logps/chosen": -1.398701786994934, + "logps/rejected": -15.004222869873047, + "loss": 1.4465, + "nll_loss": 1.4390912055969238, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13987018167972565, + "rewards/margins": 1.3605520725250244, + "rewards/rejected": -1.5004222393035889, + "step": 110 + }, + { + "epoch": 0.4188679245283019, + "grad_norm": 0.3040740191936493, + "learning_rate": 3.952830188679246e-05, + "log_odds_chosen": 13.487771987915039, + "log_odds_ratio": -0.0007269117631949484, + "logits/chosen": -2.4711754322052, + "logits/rejected": -5.052443504333496, + "logps/chosen": -1.6260043382644653, + "logps/rejected": -14.823724746704102, + "loss": 1.432, + "nll_loss": 1.431890606880188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16260044276714325, + "rewards/margins": 1.3197720050811768, + "rewards/rejected": -1.482372522354126, + "step": 111 + }, + { + "epoch": 0.4226415094339623, + "grad_norm": 0.25823676586151123, + "learning_rate": 3.943396226415095e-05, + "log_odds_chosen": 15.438188552856445, + "log_odds_ratio": -0.00011414527398301288, + "logits/chosen": -2.096723794937134, + "logits/rejected": -6.7537760734558105, + "logps/chosen": -1.8138432502746582, + "logps/rejected": -16.985055923461914, + "loss": 1.7199, + "nll_loss": 1.7199323177337646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18138432502746582, + "rewards/margins": 1.517121434211731, + "rewards/rejected": -1.6985057592391968, + "step": 112 + }, + { + "epoch": 0.42641509433962266, + "grad_norm": 0.349231094121933, + "learning_rate": 3.933962264150944e-05, + "log_odds_chosen": 12.445716857910156, + "log_odds_ratio": -0.06478449702262878, + "logits/chosen": -2.650108575820923, + "logits/rejected": -5.185212135314941, + "logps/chosen": -1.6985199451446533, + "logps/rejected": -13.793228149414062, + "loss": 1.746, + "nll_loss": 1.739565372467041, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16985198855400085, + "rewards/margins": 1.2094708681106567, + "rewards/rejected": -1.3793230056762695, + "step": 113 + }, + { + "epoch": 0.43018867924528303, + "grad_norm": 0.2924027740955353, + "learning_rate": 3.924528301886793e-05, + "log_odds_chosen": 12.061296463012695, + "log_odds_ratio": -0.03779318928718567, + "logits/chosen": -0.749549150466919, + "logits/rejected": -3.7414801120758057, + "logps/chosen": -1.8810656070709229, + "logps/rejected": -13.728483200073242, + "loss": 1.7387, + "nll_loss": 1.7349181175231934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18810656666755676, + "rewards/margins": 1.1847418546676636, + "rewards/rejected": -1.3728485107421875, + "step": 114 + }, + { + "epoch": 0.4339622641509434, + "grad_norm": 0.28793764114379883, + "learning_rate": 3.9150943396226416e-05, + "log_odds_chosen": 16.13731575012207, + "log_odds_ratio": -0.054478421807289124, + "logits/chosen": -1.7537418603897095, + "logits/rejected": -5.8226141929626465, + "logps/chosen": -1.4789087772369385, + "logps/rejected": -17.265403747558594, + "loss": 1.589, + "nll_loss": 1.5835916996002197, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14789089560508728, + "rewards/margins": 1.5786495208740234, + "rewards/rejected": -1.7265403270721436, + "step": 115 + }, + { + "epoch": 0.4377358490566038, + "grad_norm": 0.31606027483940125, + "learning_rate": 3.905660377358491e-05, + "log_odds_chosen": 14.270502090454102, + "log_odds_ratio": -0.14657607674598694, + "logits/chosen": -1.5621134042739868, + "logits/rejected": -3.9824719429016113, + "logps/chosen": -1.5558480024337769, + "logps/rejected": -15.48137092590332, + "loss": 1.4384, + "nll_loss": 1.4237393140792847, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15558481216430664, + "rewards/margins": 1.392552375793457, + "rewards/rejected": -1.5481371879577637, + "step": 116 + }, + { + "epoch": 0.44150943396226416, + "grad_norm": 0.6901794672012329, + "learning_rate": 3.89622641509434e-05, + "log_odds_chosen": 12.610831260681152, + "log_odds_ratio": -0.11986835300922394, + "logits/chosen": -1.5961331129074097, + "logits/rejected": -4.9528584480285645, + "logps/chosen": -1.864069938659668, + "logps/rejected": -14.308099746704102, + "loss": 1.7146, + "nll_loss": 1.702580213546753, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.18640699982643127, + "rewards/margins": 1.2444028854370117, + "rewards/rejected": -1.4308099746704102, + "step": 117 + }, + { + "epoch": 0.44528301886792454, + "grad_norm": 0.28305572271347046, + "learning_rate": 3.886792452830189e-05, + "log_odds_chosen": 13.709443092346191, + "log_odds_ratio": -0.08205895870923996, + "logits/chosen": -1.5196449756622314, + "logits/rejected": -4.281819820404053, + "logps/chosen": -1.5432316064834595, + "logps/rejected": -14.945755958557129, + "loss": 1.5562, + "nll_loss": 1.5479600429534912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15432317554950714, + "rewards/margins": 1.34025239944458, + "rewards/rejected": -1.4945755004882812, + "step": 118 + }, + { + "epoch": 0.4490566037735849, + "grad_norm": 0.30540400743484497, + "learning_rate": 3.877358490566038e-05, + "log_odds_chosen": 14.886051177978516, + "log_odds_ratio": -0.0001277975970879197, + "logits/chosen": -2.5971062183380127, + "logits/rejected": -6.4186625480651855, + "logps/chosen": -1.446777105331421, + "logps/rejected": -15.858993530273438, + "loss": 1.3599, + "nll_loss": 1.3598815202713013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14467771351337433, + "rewards/margins": 1.4412215948104858, + "rewards/rejected": -1.5858993530273438, + "step": 119 + }, + { + "epoch": 0.4528301886792453, + "grad_norm": 0.279973566532135, + "learning_rate": 3.867924528301887e-05, + "log_odds_chosen": 12.182138442993164, + "log_odds_ratio": -0.007018540520220995, + "logits/chosen": -1.7155228853225708, + "logits/rejected": -5.174653053283691, + "logps/chosen": -1.7579047679901123, + "logps/rejected": -13.713508605957031, + "loss": 1.6865, + "nll_loss": 1.6858264207839966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1757904589176178, + "rewards/margins": 1.195560336112976, + "rewards/rejected": -1.371350884437561, + "step": 120 + }, + { + "epoch": 0.45660377358490567, + "grad_norm": 0.2613874077796936, + "learning_rate": 3.858490566037736e-05, + "log_odds_chosen": 14.095218658447266, + "log_odds_ratio": -0.07463585585355759, + "logits/chosen": -1.3325459957122803, + "logits/rejected": -6.121455192565918, + "logps/chosen": -1.5543732643127441, + "logps/rejected": -15.366453170776367, + "loss": 1.5167, + "nll_loss": 1.509216070175171, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15543733537197113, + "rewards/margins": 1.3812079429626465, + "rewards/rejected": -1.5366454124450684, + "step": 121 + }, + { + "epoch": 0.46037735849056605, + "grad_norm": 0.30324679613113403, + "learning_rate": 3.8490566037735854e-05, + "log_odds_chosen": 16.875022888183594, + "log_odds_ratio": -0.00017375449533574283, + "logits/chosen": -1.8117984533309937, + "logits/rejected": -4.163735866546631, + "logps/chosen": -1.515238881111145, + "logps/rejected": -17.963499069213867, + "loss": 1.4792, + "nll_loss": 1.4791332483291626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1515238881111145, + "rewards/margins": 1.6448261737823486, + "rewards/rejected": -1.7963500022888184, + "step": 122 + }, + { + "epoch": 0.4641509433962264, + "grad_norm": 0.3230510354042053, + "learning_rate": 3.8396226415094345e-05, + "log_odds_chosen": 17.355361938476562, + "log_odds_ratio": -8.270166631518805e-07, + "logits/chosen": -0.9121267795562744, + "logits/rejected": -4.532161712646484, + "logps/chosen": -1.351798415184021, + "logps/rejected": -18.30146026611328, + "loss": 1.4902, + "nll_loss": 1.4902395009994507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13517984747886658, + "rewards/margins": 1.6949663162231445, + "rewards/rejected": -1.8301459550857544, + "step": 123 + }, + { + "epoch": 0.4679245283018868, + "grad_norm": 0.27051493525505066, + "learning_rate": 3.8301886792452836e-05, + "log_odds_chosen": 16.027782440185547, + "log_odds_ratio": -0.05309152603149414, + "logits/chosen": -2.257577896118164, + "logits/rejected": -6.006890296936035, + "logps/chosen": -1.417616844177246, + "logps/rejected": -17.086490631103516, + "loss": 1.439, + "nll_loss": 1.4336605072021484, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1417616754770279, + "rewards/margins": 1.5668874979019165, + "rewards/rejected": -1.7086491584777832, + "step": 124 + }, + { + "epoch": 0.4716981132075472, + "grad_norm": 0.28948161005973816, + "learning_rate": 3.820754716981133e-05, + "log_odds_chosen": 13.788773536682129, + "log_odds_ratio": -0.057656653225421906, + "logits/chosen": -1.0248003005981445, + "logits/rejected": -6.2845001220703125, + "logps/chosen": -1.651491641998291, + "logps/rejected": -15.156482696533203, + "loss": 1.6935, + "nll_loss": 1.6877108812332153, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16514916718006134, + "rewards/margins": 1.350499153137207, + "rewards/rejected": -1.515648365020752, + "step": 125 + }, + { + "epoch": 0.47547169811320755, + "grad_norm": 0.29785606265068054, + "learning_rate": 3.811320754716982e-05, + "log_odds_chosen": 15.78986930847168, + "log_odds_ratio": -8.410341251874343e-05, + "logits/chosen": -2.4442198276519775, + "logits/rejected": -6.754056453704834, + "logps/chosen": -1.6627094745635986, + "logps/rejected": -17.13760757446289, + "loss": 1.6297, + "nll_loss": 1.62972891330719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16627094149589539, + "rewards/margins": 1.547489881515503, + "rewards/rejected": -1.7137608528137207, + "step": 126 + }, + { + "epoch": 0.47924528301886793, + "grad_norm": 0.3125203251838684, + "learning_rate": 3.801886792452831e-05, + "log_odds_chosen": 15.262259483337402, + "log_odds_ratio": -0.00025166559498757124, + "logits/chosen": -2.318103790283203, + "logits/rejected": -4.911985397338867, + "logps/chosen": -1.5119843482971191, + "logps/rejected": -16.430225372314453, + "loss": 1.5521, + "nll_loss": 1.5521178245544434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15119843184947968, + "rewards/margins": 1.4918241500854492, + "rewards/rejected": -1.6430225372314453, + "step": 127 + }, + { + "epoch": 0.4830188679245283, + "grad_norm": 0.4130852222442627, + "learning_rate": 3.7924528301886794e-05, + "log_odds_chosen": 13.181659698486328, + "log_odds_ratio": -0.02264661341905594, + "logits/chosen": -1.5559453964233398, + "logits/rejected": -5.835235595703125, + "logps/chosen": -1.6575170755386353, + "logps/rejected": -14.44080638885498, + "loss": 1.531, + "nll_loss": 1.5287659168243408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16575171053409576, + "rewards/margins": 1.2783288955688477, + "rewards/rejected": -1.4440807104110718, + "step": 128 + }, + { + "epoch": 0.4867924528301887, + "grad_norm": 0.27878594398498535, + "learning_rate": 3.7830188679245285e-05, + "log_odds_chosen": 15.444690704345703, + "log_odds_ratio": -0.024172237142920494, + "logits/chosen": -1.8147392272949219, + "logits/rejected": -5.994168758392334, + "logps/chosen": -1.4637315273284912, + "logps/rejected": -16.60024070739746, + "loss": 1.41, + "nll_loss": 1.4076224565505981, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14637315273284912, + "rewards/margins": 1.5136507749557495, + "rewards/rejected": -1.6600239276885986, + "step": 129 + }, + { + "epoch": 0.49056603773584906, + "grad_norm": 0.36368459463119507, + "learning_rate": 3.7735849056603776e-05, + "log_odds_chosen": 15.150455474853516, + "log_odds_ratio": -0.06176932156085968, + "logits/chosen": -2.638796806335449, + "logits/rejected": -6.133980751037598, + "logps/chosen": -1.5448698997497559, + "logps/rejected": -16.379032135009766, + "loss": 1.5112, + "nll_loss": 1.505061388015747, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1544869840145111, + "rewards/margins": 1.4834163188934326, + "rewards/rejected": -1.6379034519195557, + "step": 130 + }, + { + "epoch": 0.49433962264150944, + "grad_norm": 0.3242456316947937, + "learning_rate": 3.764150943396227e-05, + "log_odds_chosen": 13.985454559326172, + "log_odds_ratio": -4.5815289922757074e-05, + "logits/chosen": -1.494691014289856, + "logits/rejected": -5.898371696472168, + "logps/chosen": -1.9884130954742432, + "logps/rejected": -15.798318862915039, + "loss": 1.7428, + "nll_loss": 1.7428255081176758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19884130358695984, + "rewards/margins": 1.3809905052185059, + "rewards/rejected": -1.579831838607788, + "step": 131 + }, + { + "epoch": 0.4981132075471698, + "grad_norm": 0.29855185747146606, + "learning_rate": 3.754716981132076e-05, + "log_odds_chosen": 12.895346641540527, + "log_odds_ratio": -0.0862637311220169, + "logits/chosen": -0.5306591391563416, + "logits/rejected": -3.3876893520355225, + "logps/chosen": -1.6508989334106445, + "logps/rejected": -14.312397956848145, + "loss": 1.5424, + "nll_loss": 1.533748745918274, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1650899052619934, + "rewards/margins": 1.2661499977111816, + "rewards/rejected": -1.4312398433685303, + "step": 132 + }, + { + "epoch": 0.5018867924528302, + "grad_norm": 0.3010408282279968, + "learning_rate": 3.745283018867924e-05, + "log_odds_chosen": 14.359496116638184, + "log_odds_ratio": -0.11943729966878891, + "logits/chosen": -1.3975955247879028, + "logits/rejected": -5.733314037322998, + "logps/chosen": -1.5766863822937012, + "logps/rejected": -15.659753799438477, + "loss": 1.5544, + "nll_loss": 1.5425056219100952, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15766863524913788, + "rewards/margins": 1.4083068370819092, + "rewards/rejected": -1.5659754276275635, + "step": 133 + }, + { + "epoch": 0.5056603773584906, + "grad_norm": 0.28366541862487793, + "learning_rate": 3.735849056603773e-05, + "log_odds_chosen": 16.42931365966797, + "log_odds_ratio": -1.6768943169154227e-05, + "logits/chosen": -1.0664993524551392, + "logits/rejected": -5.362708568572998, + "logps/chosen": -1.4823436737060547, + "logps/rejected": -17.60668182373047, + "loss": 1.4417, + "nll_loss": 1.441743016242981, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14823436737060547, + "rewards/margins": 1.612433671951294, + "rewards/rejected": -1.7606680393218994, + "step": 134 + }, + { + "epoch": 0.5094339622641509, + "grad_norm": 0.297313928604126, + "learning_rate": 3.7264150943396224e-05, + "log_odds_chosen": 15.513206481933594, + "log_odds_ratio": -0.03894759714603424, + "logits/chosen": -1.827707290649414, + "logits/rejected": -5.077935218811035, + "logps/chosen": -1.5281659364700317, + "logps/rejected": -16.74795150756836, + "loss": 1.4237, + "nll_loss": 1.4197759628295898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15281659364700317, + "rewards/margins": 1.5219786167144775, + "rewards/rejected": -1.674795150756836, + "step": 135 + }, + { + "epoch": 0.5132075471698113, + "grad_norm": 0.2611730992794037, + "learning_rate": 3.7169811320754716e-05, + "log_odds_chosen": 14.942113876342773, + "log_odds_ratio": -0.001385436742566526, + "logits/chosen": -1.9019191265106201, + "logits/rejected": -5.761693000793457, + "logps/chosen": -1.4520561695098877, + "logps/rejected": -16.059383392333984, + "loss": 1.4831, + "nll_loss": 1.4829171895980835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14520561695098877, + "rewards/margins": 1.4607326984405518, + "rewards/rejected": -1.60593843460083, + "step": 136 + }, + { + "epoch": 0.5169811320754717, + "grad_norm": 0.37624403834342957, + "learning_rate": 3.7075471698113207e-05, + "log_odds_chosen": 15.124824523925781, + "log_odds_ratio": -0.06929050385951996, + "logits/chosen": -0.5525396466255188, + "logits/rejected": -4.60980749130249, + "logps/chosen": -1.61146080493927, + "logps/rejected": -16.51461410522461, + "loss": 1.4183, + "nll_loss": 1.4113963842391968, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16114608943462372, + "rewards/margins": 1.4903154373168945, + "rewards/rejected": -1.6514614820480347, + "step": 137 + }, + { + "epoch": 0.5207547169811321, + "grad_norm": 0.3030368387699127, + "learning_rate": 3.69811320754717e-05, + "log_odds_chosen": 12.055831909179688, + "log_odds_ratio": -0.11039459705352783, + "logits/chosen": -0.8935803174972534, + "logits/rejected": -4.527928352355957, + "logps/chosen": -1.668050765991211, + "logps/rejected": -13.411079406738281, + "loss": 1.5587, + "nll_loss": 1.5476906299591064, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16680508852005005, + "rewards/margins": 1.1743026971817017, + "rewards/rejected": -1.3411078453063965, + "step": 138 + }, + { + "epoch": 0.5245283018867924, + "grad_norm": 0.2544783353805542, + "learning_rate": 3.688679245283019e-05, + "log_odds_chosen": 15.021339416503906, + "log_odds_ratio": -0.06613866984844208, + "logits/chosen": -2.3585753440856934, + "logits/rejected": -6.415126323699951, + "logps/chosen": -1.4854167699813843, + "logps/rejected": -16.204166412353516, + "loss": 1.3412, + "nll_loss": 1.3345497846603394, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1485416740179062, + "rewards/margins": 1.4718750715255737, + "rewards/rejected": -1.6204167604446411, + "step": 139 + }, + { + "epoch": 0.5283018867924528, + "grad_norm": 0.3188823461532593, + "learning_rate": 3.679245283018868e-05, + "log_odds_chosen": 15.247077941894531, + "log_odds_ratio": -0.0011911022011190653, + "logits/chosen": -2.5056328773498535, + "logits/rejected": -5.532197952270508, + "logps/chosen": -1.569747805595398, + "logps/rejected": -16.510929107666016, + "loss": 1.4605, + "nll_loss": 1.4603441953659058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15697479248046875, + "rewards/margins": 1.494118094444275, + "rewards/rejected": -1.6510928869247437, + "step": 140 + }, + { + "epoch": 0.5320754716981132, + "grad_norm": 0.3175397515296936, + "learning_rate": 3.669811320754717e-05, + "log_odds_chosen": 13.892709732055664, + "log_odds_ratio": -0.0589115172624588, + "logits/chosen": -0.8055652379989624, + "logits/rejected": -3.4150259494781494, + "logps/chosen": -1.5232586860656738, + "logps/rejected": -14.959342956542969, + "loss": 1.5432, + "nll_loss": 1.5373512506484985, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15232588350772858, + "rewards/margins": 1.3436083793640137, + "rewards/rejected": -1.495934247970581, + "step": 141 + }, + { + "epoch": 0.5358490566037736, + "grad_norm": 0.27956297993659973, + "learning_rate": 3.660377358490566e-05, + "log_odds_chosen": 15.927668571472168, + "log_odds_ratio": -0.0016179109225049615, + "logits/chosen": -1.636272668838501, + "logits/rejected": -5.451028347015381, + "logps/chosen": -1.640097975730896, + "logps/rejected": -17.290578842163086, + "loss": 1.5572, + "nll_loss": 1.5570058822631836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16400979459285736, + "rewards/margins": 1.565048098564148, + "rewards/rejected": -1.729057788848877, + "step": 142 + }, + { + "epoch": 0.539622641509434, + "grad_norm": 0.2930643558502197, + "learning_rate": 3.650943396226415e-05, + "log_odds_chosen": 16.596500396728516, + "log_odds_ratio": -9.716653585201129e-05, + "logits/chosen": -1.436676263809204, + "logits/rejected": -4.450935363769531, + "logps/chosen": -1.3673814535140991, + "logps/rejected": -17.530248641967773, + "loss": 1.3955, + "nll_loss": 1.3954448699951172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1367381513118744, + "rewards/margins": 1.6162867546081543, + "rewards/rejected": -1.753024935722351, + "step": 143 + }, + { + "epoch": 0.5433962264150943, + "grad_norm": 0.4236195683479309, + "learning_rate": 3.641509433962264e-05, + "log_odds_chosen": 14.702760696411133, + "log_odds_ratio": -0.0004511699662543833, + "logits/chosen": -1.313840389251709, + "logits/rejected": -4.356607437133789, + "logps/chosen": -1.8211621046066284, + "logps/rejected": -16.25528335571289, + "loss": 1.547, + "nll_loss": 1.5469856262207031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18211621046066284, + "rewards/margins": 1.443412184715271, + "rewards/rejected": -1.6255284547805786, + "step": 144 + }, + { + "epoch": 0.5471698113207547, + "grad_norm": 0.33278077840805054, + "learning_rate": 3.632075471698113e-05, + "log_odds_chosen": 15.362979888916016, + "log_odds_ratio": -0.0003507338115014136, + "logits/chosen": -1.3937156200408936, + "logits/rejected": -5.756740570068359, + "logps/chosen": -1.7482523918151855, + "logps/rejected": -16.875289916992188, + "loss": 1.608, + "nll_loss": 1.6079708337783813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1748252511024475, + "rewards/margins": 1.5127036571502686, + "rewards/rejected": -1.6875288486480713, + "step": 145 + }, + { + "epoch": 0.5509433962264151, + "grad_norm": 0.32443860173225403, + "learning_rate": 3.622641509433962e-05, + "log_odds_chosen": 15.61093521118164, + "log_odds_ratio": -0.06764474511146545, + "logits/chosen": -1.9850184917449951, + "logits/rejected": -5.995416164398193, + "logps/chosen": -1.7201964855194092, + "logps/rejected": -17.044771194458008, + "loss": 1.6287, + "nll_loss": 1.6218953132629395, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17201966047286987, + "rewards/margins": 1.5324573516845703, + "rewards/rejected": -1.704477071762085, + "step": 146 + }, + { + "epoch": 0.5547169811320755, + "grad_norm": 0.2766498029232025, + "learning_rate": 3.613207547169811e-05, + "log_odds_chosen": 14.549701690673828, + "log_odds_ratio": -0.000522086163982749, + "logits/chosen": -2.167712688446045, + "logits/rejected": -6.221152305603027, + "logps/chosen": -1.494560956954956, + "logps/rejected": -15.703763961791992, + "loss": 1.4152, + "nll_loss": 1.4151456356048584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14945609867572784, + "rewards/margins": 1.4209203720092773, + "rewards/rejected": -1.5703763961791992, + "step": 147 + }, + { + "epoch": 0.5584905660377358, + "grad_norm": 0.4621659219264984, + "learning_rate": 3.60377358490566e-05, + "log_odds_chosen": 14.655370712280273, + "log_odds_ratio": -0.07225409895181656, + "logits/chosen": -0.9679865837097168, + "logits/rejected": -3.743302345275879, + "logps/chosen": -1.47898268699646, + "logps/rejected": -15.858048439025879, + "loss": 1.3764, + "nll_loss": 1.3691895008087158, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14789827167987823, + "rewards/margins": 1.4379065036773682, + "rewards/rejected": -1.5858049392700195, + "step": 148 + }, + { + "epoch": 0.5622641509433962, + "grad_norm": 0.319667249917984, + "learning_rate": 3.594339622641509e-05, + "log_odds_chosen": 15.044527053833008, + "log_odds_ratio": -0.00024553845287300646, + "logits/chosen": -1.18473219871521, + "logits/rejected": -4.766110420227051, + "logps/chosen": -1.5453764200210571, + "logps/rejected": -16.291166305541992, + "loss": 1.5824, + "nll_loss": 1.5824246406555176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1545376479625702, + "rewards/margins": 1.474579095840454, + "rewards/rejected": -1.6291167736053467, + "step": 149 + }, + { + "epoch": 0.5660377358490566, + "grad_norm": 0.3204805552959442, + "learning_rate": 3.5849056603773584e-05, + "log_odds_chosen": 17.485958099365234, + "log_odds_ratio": -0.10664539784193039, + "logits/chosen": -1.6424400806427002, + "logits/rejected": -3.842500686645508, + "logps/chosen": -1.4972755908966064, + "logps/rejected": -18.57717514038086, + "loss": 1.3499, + "nll_loss": 1.3392703533172607, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14972755312919617, + "rewards/margins": 1.7079898118972778, + "rewards/rejected": -1.8577172756195068, + "step": 150 + }, + { + "epoch": 0.569811320754717, + "grad_norm": 0.26958513259887695, + "learning_rate": 3.5754716981132075e-05, + "log_odds_chosen": 15.958301544189453, + "log_odds_ratio": -0.0007034969748929143, + "logits/chosen": -1.0648162364959717, + "logits/rejected": -6.030667304992676, + "logps/chosen": -1.6111671924591064, + "logps/rejected": -17.287628173828125, + "loss": 1.6164, + "nll_loss": 1.6163535118103027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16111671924591064, + "rewards/margins": 1.5676461458206177, + "rewards/rejected": -1.7287628650665283, + "step": 151 + }, + { + "epoch": 0.5735849056603773, + "grad_norm": 0.3305363059043884, + "learning_rate": 3.5660377358490566e-05, + "log_odds_chosen": 17.14991569519043, + "log_odds_ratio": -3.2623302104184404e-05, + "logits/chosen": -1.6892523765563965, + "logits/rejected": -4.232363224029541, + "logps/chosen": -1.5470982789993286, + "logps/rejected": -18.353713989257812, + "loss": 1.4759, + "nll_loss": 1.4759438037872314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1547098308801651, + "rewards/margins": 1.6806614398956299, + "rewards/rejected": -1.8353712558746338, + "step": 152 + }, + { + "epoch": 0.5773584905660377, + "grad_norm": 0.3334077000617981, + "learning_rate": 3.556603773584906e-05, + "log_odds_chosen": 15.461456298828125, + "log_odds_ratio": -3.081683098571375e-05, + "logits/chosen": -1.5841903686523438, + "logits/rejected": -5.0937604904174805, + "logps/chosen": -1.774914264678955, + "logps/rejected": -16.993589401245117, + "loss": 1.5688, + "nll_loss": 1.5687741041183472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1774914264678955, + "rewards/margins": 1.5218675136566162, + "rewards/rejected": -1.6993589401245117, + "step": 153 + }, + { + "epoch": 0.5811320754716981, + "grad_norm": 0.29705750942230225, + "learning_rate": 3.547169811320755e-05, + "log_odds_chosen": 14.837601661682129, + "log_odds_ratio": -4.0413448004983366e-05, + "logits/chosen": -2.566657781600952, + "logits/rejected": -6.176463603973389, + "logps/chosen": -1.7194881439208984, + "logps/rejected": -16.30776023864746, + "loss": 1.5701, + "nll_loss": 1.5700526237487793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17194882035255432, + "rewards/margins": 1.4588273763656616, + "rewards/rejected": -1.6307761669158936, + "step": 154 + }, + { + "epoch": 0.5849056603773585, + "grad_norm": 0.4479868710041046, + "learning_rate": 3.537735849056604e-05, + "log_odds_chosen": 16.22223472595215, + "log_odds_ratio": -5.1474453357513994e-05, + "logits/chosen": -1.6525013446807861, + "logits/rejected": -5.60921573638916, + "logps/chosen": -1.4534053802490234, + "logps/rejected": -17.35995101928711, + "loss": 1.3998, + "nll_loss": 1.3998193740844727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14534053206443787, + "rewards/margins": 1.5906546115875244, + "rewards/rejected": -1.7359951734542847, + "step": 155 + }, + { + "epoch": 0.5886792452830188, + "grad_norm": 0.3321553170681, + "learning_rate": 3.528301886792453e-05, + "log_odds_chosen": 14.45380973815918, + "log_odds_ratio": -0.00011496634397190064, + "logits/chosen": -1.5721373558044434, + "logits/rejected": -5.987687110900879, + "logps/chosen": -1.7010905742645264, + "logps/rejected": -15.914693832397461, + "loss": 1.4444, + "nll_loss": 1.4443511962890625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17010906338691711, + "rewards/margins": 1.4213604927062988, + "rewards/rejected": -1.591469407081604, + "step": 156 + }, + { + "epoch": 0.5924528301886792, + "grad_norm": 0.32465389370918274, + "learning_rate": 3.518867924528302e-05, + "log_odds_chosen": 15.948369979858398, + "log_odds_ratio": -1.5907631677691825e-05, + "logits/chosen": -1.2226861715316772, + "logits/rejected": -4.106148719787598, + "logps/chosen": -1.8807780742645264, + "logps/rejected": -17.614301681518555, + "loss": 1.5042, + "nll_loss": 1.5042164325714111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18807780742645264, + "rewards/margins": 1.5733524560928345, + "rewards/rejected": -1.761430263519287, + "step": 157 + }, + { + "epoch": 0.5962264150943396, + "grad_norm": 0.2754722237586975, + "learning_rate": 3.5094339622641506e-05, + "log_odds_chosen": 15.541213989257812, + "log_odds_ratio": -0.16899409890174866, + "logits/chosen": -2.1642675399780273, + "logits/rejected": -5.719496250152588, + "logps/chosen": -1.5195682048797607, + "logps/rejected": -16.678632736206055, + "loss": 1.6354, + "nll_loss": 1.6184601783752441, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15195682644844055, + "rewards/margins": 1.5159064531326294, + "rewards/rejected": -1.6678632497787476, + "step": 158 + }, + { + "epoch": 0.6, + "grad_norm": 0.28031638264656067, + "learning_rate": 3.5e-05, + "log_odds_chosen": 11.228550910949707, + "log_odds_ratio": -0.20393189787864685, + "logits/chosen": -0.9825246334075928, + "logits/rejected": -4.9232587814331055, + "logps/chosen": -1.7185983657836914, + "logps/rejected": -12.776618003845215, + "loss": 1.5923, + "nll_loss": 1.5718731880187988, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17185983061790466, + "rewards/margins": 1.1058019399642944, + "rewards/rejected": -1.2776618003845215, + "step": 159 + }, + { + "epoch": 0.6037735849056604, + "grad_norm": 0.2919527292251587, + "learning_rate": 3.490566037735849e-05, + "log_odds_chosen": 14.894678115844727, + "log_odds_ratio": -0.0009987247176468372, + "logits/chosen": -0.6535999178886414, + "logits/rejected": -4.017884254455566, + "logps/chosen": -1.5091729164123535, + "logps/rejected": -16.018918991088867, + "loss": 1.403, + "nll_loss": 1.40290367603302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15091729164123535, + "rewards/margins": 1.450974702835083, + "rewards/rejected": -1.6018919944763184, + "step": 160 + }, + { + "epoch": 0.6075471698113207, + "grad_norm": 0.3071415424346924, + "learning_rate": 3.481132075471698e-05, + "log_odds_chosen": 16.471067428588867, + "log_odds_ratio": -2.074857911793515e-05, + "logits/chosen": -0.09290022403001785, + "logits/rejected": -3.7179436683654785, + "logps/chosen": -1.471862554550171, + "logps/rejected": -17.618274688720703, + "loss": 1.4793, + "nll_loss": 1.4793322086334229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1471862643957138, + "rewards/margins": 1.6146413087844849, + "rewards/rejected": -1.7618277072906494, + "step": 161 + }, + { + "epoch": 0.6113207547169811, + "grad_norm": 0.2777910530567169, + "learning_rate": 3.471698113207547e-05, + "log_odds_chosen": 16.91250991821289, + "log_odds_ratio": -0.016157550737261772, + "logits/chosen": -1.235945463180542, + "logits/rejected": -5.316408157348633, + "logps/chosen": -1.6419563293457031, + "logps/rejected": -18.302946090698242, + "loss": 1.429, + "nll_loss": 1.4273896217346191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16419564187526703, + "rewards/margins": 1.6660988330841064, + "rewards/rejected": -1.8302946090698242, + "step": 162 + }, + { + "epoch": 0.6150943396226415, + "grad_norm": 0.2712962329387665, + "learning_rate": 3.462264150943396e-05, + "log_odds_chosen": 16.41084098815918, + "log_odds_ratio": -2.9441296646837145e-05, + "logits/chosen": -1.025072693824768, + "logits/rejected": -5.848681449890137, + "logps/chosen": -1.5957037210464478, + "logps/rejected": -17.6140193939209, + "loss": 1.5493, + "nll_loss": 1.5493419170379639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15957039594650269, + "rewards/margins": 1.6018316745758057, + "rewards/rejected": -1.7614020109176636, + "step": 163 + }, + { + "epoch": 0.6188679245283019, + "grad_norm": 0.30003705620765686, + "learning_rate": 3.452830188679245e-05, + "log_odds_chosen": 16.494930267333984, + "log_odds_ratio": -8.993155461212154e-06, + "logits/chosen": -1.4486334323883057, + "logits/rejected": -5.38150691986084, + "logps/chosen": -1.4585084915161133, + "logps/rejected": -17.579425811767578, + "loss": 1.5673, + "nll_loss": 1.5673246383666992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14585085213184357, + "rewards/margins": 1.6120917797088623, + "rewards/rejected": -1.7579425573349, + "step": 164 + }, + { + "epoch": 0.6226415094339622, + "grad_norm": 0.2940906286239624, + "learning_rate": 3.4433962264150943e-05, + "log_odds_chosen": 15.412787437438965, + "log_odds_ratio": -0.07001832127571106, + "logits/chosen": -1.34386146068573, + "logits/rejected": -5.122559070587158, + "logps/chosen": -1.8467469215393066, + "logps/rejected": -17.047271728515625, + "loss": 1.6754, + "nll_loss": 1.6683518886566162, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1846746802330017, + "rewards/margins": 1.5200525522232056, + "rewards/rejected": -1.7047271728515625, + "step": 165 + }, + { + "epoch": 0.6264150943396226, + "grad_norm": 0.3121996521949768, + "learning_rate": 3.4339622641509435e-05, + "log_odds_chosen": 16.29378318786621, + "log_odds_ratio": -0.05672796443104744, + "logits/chosen": -2.245677947998047, + "logits/rejected": -4.856801986694336, + "logps/chosen": -1.4243159294128418, + "logps/rejected": -17.36766815185547, + "loss": 1.5158, + "nll_loss": 1.5101523399353027, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1424316018819809, + "rewards/margins": 1.5943353176116943, + "rewards/rejected": -1.7367669343948364, + "step": 166 + }, + { + "epoch": 0.630188679245283, + "grad_norm": 0.28171584010124207, + "learning_rate": 3.4245283018867926e-05, + "log_odds_chosen": 15.490211486816406, + "log_odds_ratio": -0.024965543299913406, + "logits/chosen": -0.1043301597237587, + "logits/rejected": -4.409641742706299, + "logps/chosen": -1.771804690361023, + "logps/rejected": -17.02152442932129, + "loss": 1.6991, + "nll_loss": 1.6965808868408203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1771804690361023, + "rewards/margins": 1.5249719619750977, + "rewards/rejected": -1.7021524906158447, + "step": 167 + }, + { + "epoch": 0.6339622641509434, + "grad_norm": 0.27846312522888184, + "learning_rate": 3.415094339622642e-05, + "log_odds_chosen": 16.138580322265625, + "log_odds_ratio": -0.00026864392566494644, + "logits/chosen": -1.5777232646942139, + "logits/rejected": -6.5474162101745605, + "logps/chosen": -1.6822800636291504, + "logps/rejected": -17.583065032958984, + "loss": 1.5533, + "nll_loss": 1.5532336235046387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16822800040245056, + "rewards/margins": 1.5900784730911255, + "rewards/rejected": -1.7583065032958984, + "step": 168 + }, + { + "epoch": 0.6377358490566037, + "grad_norm": 0.3010658025741577, + "learning_rate": 3.405660377358491e-05, + "log_odds_chosen": 15.855169296264648, + "log_odds_ratio": -1.3776767445961013e-05, + "logits/chosen": -1.5479809045791626, + "logits/rejected": -5.985370635986328, + "logps/chosen": -1.7337384223937988, + "logps/rejected": -17.28085708618164, + "loss": 1.6455, + "nll_loss": 1.645505666732788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17337384819984436, + "rewards/margins": 1.5547118186950684, + "rewards/rejected": -1.7280856370925903, + "step": 169 + }, + { + "epoch": 0.6415094339622641, + "grad_norm": 0.3382321000099182, + "learning_rate": 3.39622641509434e-05, + "log_odds_chosen": 15.886013984680176, + "log_odds_ratio": -8.158626769727562e-06, + "logits/chosen": -1.8764511346817017, + "logits/rejected": -5.525568962097168, + "logps/chosen": -1.3874365091323853, + "logps/rejected": -16.957935333251953, + "loss": 1.4466, + "nll_loss": 1.4466451406478882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13874365389347076, + "rewards/margins": 1.5570499897003174, + "rewards/rejected": -1.695793628692627, + "step": 170 + }, + { + "epoch": 0.6452830188679245, + "grad_norm": 0.33056262135505676, + "learning_rate": 3.386792452830188e-05, + "log_odds_chosen": 15.484598159790039, + "log_odds_ratio": -8.27780422696378e-06, + "logits/chosen": -1.8570010662078857, + "logits/rejected": -4.6085357666015625, + "logps/chosen": -1.5708074569702148, + "logps/rejected": -16.722640991210938, + "loss": 1.5653, + "nll_loss": 1.5652695894241333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.157080739736557, + "rewards/margins": 1.5151833295822144, + "rewards/rejected": -1.6722640991210938, + "step": 171 + }, + { + "epoch": 0.6490566037735849, + "grad_norm": 0.31156399846076965, + "learning_rate": 3.3773584905660374e-05, + "log_odds_chosen": 15.454925537109375, + "log_odds_ratio": -0.01993393711745739, + "logits/chosen": -1.1617259979248047, + "logits/rejected": -5.1523756980896, + "logps/chosen": -1.5937132835388184, + "logps/rejected": -16.762189865112305, + "loss": 1.3782, + "nll_loss": 1.3762309551239014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15937133133411407, + "rewards/margins": 1.5168476104736328, + "rewards/rejected": -1.6762189865112305, + "step": 172 + }, + { + "epoch": 0.6528301886792452, + "grad_norm": 0.2925812005996704, + "learning_rate": 3.3679245283018865e-05, + "log_odds_chosen": 17.620845794677734, + "log_odds_ratio": -0.0001716611732263118, + "logits/chosen": -1.702080249786377, + "logits/rejected": -5.444385051727295, + "logps/chosen": -1.224086046218872, + "logps/rejected": -18.321523666381836, + "loss": 1.324, + "nll_loss": 1.3239673376083374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12240861356258392, + "rewards/margins": 1.709743857383728, + "rewards/rejected": -1.8321523666381836, + "step": 173 + }, + { + "epoch": 0.6566037735849056, + "grad_norm": 0.3308000862598419, + "learning_rate": 3.3584905660377356e-05, + "log_odds_chosen": 13.906808853149414, + "log_odds_ratio": -0.05518035590648651, + "logits/chosen": -1.4705300331115723, + "logits/rejected": -4.510253429412842, + "logps/chosen": -1.5690919160842896, + "logps/rejected": -15.069160461425781, + "loss": 1.3463, + "nll_loss": 1.3407379388809204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15690919756889343, + "rewards/margins": 1.3500069379806519, + "rewards/rejected": -1.5069161653518677, + "step": 174 + }, + { + "epoch": 0.660377358490566, + "grad_norm": 0.2951977550983429, + "learning_rate": 3.349056603773585e-05, + "log_odds_chosen": 17.145343780517578, + "log_odds_ratio": -0.06170666217803955, + "logits/chosen": -2.072368860244751, + "logits/rejected": -5.481960773468018, + "logps/chosen": -1.3195624351501465, + "logps/rejected": -18.049631118774414, + "loss": 1.3551, + "nll_loss": 1.348900556564331, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13195623457431793, + "rewards/margins": 1.6730068922042847, + "rewards/rejected": -1.8049631118774414, + "step": 175 + }, + { + "epoch": 0.6641509433962264, + "grad_norm": 0.37980836629867554, + "learning_rate": 3.339622641509434e-05, + "log_odds_chosen": 14.718152046203613, + "log_odds_ratio": -0.13777947425842285, + "logits/chosen": -1.4754526615142822, + "logits/rejected": -3.541693687438965, + "logps/chosen": -1.6201732158660889, + "logps/rejected": -15.98199462890625, + "loss": 1.4959, + "nll_loss": 1.4821652173995972, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1620173305273056, + "rewards/margins": 1.4361821413040161, + "rewards/rejected": -1.598199486732483, + "step": 176 + }, + { + "epoch": 0.6679245283018868, + "grad_norm": 0.2625764012336731, + "learning_rate": 3.330188679245283e-05, + "log_odds_chosen": 16.301048278808594, + "log_odds_ratio": -0.07389828562736511, + "logits/chosen": -1.6500033140182495, + "logits/rejected": -5.951912879943848, + "logps/chosen": -1.6418194770812988, + "logps/rejected": -17.625288009643555, + "loss": 1.6667, + "nll_loss": 1.659282922744751, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16418196260929108, + "rewards/margins": 1.5983469486236572, + "rewards/rejected": -1.762528896331787, + "step": 177 + }, + { + "epoch": 0.6716981132075471, + "grad_norm": 0.33599424362182617, + "learning_rate": 3.320754716981132e-05, + "log_odds_chosen": 18.26766014099121, + "log_odds_ratio": -2.645006361490232e-06, + "logits/chosen": -1.5161099433898926, + "logits/rejected": -4.741918087005615, + "logps/chosen": -1.222088098526001, + "logps/rejected": -18.985591888427734, + "loss": 1.4988, + "nll_loss": 1.4987843036651611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12220881879329681, + "rewards/margins": 1.7763502597808838, + "rewards/rejected": -1.8985592126846313, + "step": 178 + }, + { + "epoch": 0.6754716981132075, + "grad_norm": 0.3460422158241272, + "learning_rate": 3.311320754716981e-05, + "log_odds_chosen": 15.672019958496094, + "log_odds_ratio": -0.0003013765381183475, + "logits/chosen": -1.8128689527511597, + "logits/rejected": -5.0894670486450195, + "logps/chosen": -1.912743330001831, + "logps/rejected": -17.398412704467773, + "loss": 1.4925, + "nll_loss": 1.49249267578125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19127434492111206, + "rewards/margins": 1.5485669374465942, + "rewards/rejected": -1.7398412227630615, + "step": 179 + }, + { + "epoch": 0.6792452830188679, + "grad_norm": 0.325990229845047, + "learning_rate": 3.30188679245283e-05, + "log_odds_chosen": 16.67650032043457, + "log_odds_ratio": -7.659475159016438e-06, + "logits/chosen": -1.7975369691848755, + "logits/rejected": -5.100127220153809, + "logps/chosen": -1.545456886291504, + "logps/rejected": -17.93629264831543, + "loss": 1.3418, + "nll_loss": 1.3418041467666626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15454569458961487, + "rewards/margins": 1.639083743095398, + "rewards/rejected": -1.7936294078826904, + "step": 180 + }, + { + "epoch": 0.6830188679245283, + "grad_norm": 0.31531310081481934, + "learning_rate": 3.2924528301886794e-05, + "log_odds_chosen": 16.065807342529297, + "log_odds_ratio": -0.06276258826255798, + "logits/chosen": -1.224330186843872, + "logits/rejected": -4.1433610916137695, + "logps/chosen": -1.5791137218475342, + "logps/rejected": -17.382640838623047, + "loss": 1.2837, + "nll_loss": 1.277405858039856, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15791137516498566, + "rewards/margins": 1.580352783203125, + "rewards/rejected": -1.7382642030715942, + "step": 181 + }, + { + "epoch": 0.6867924528301886, + "grad_norm": 0.3160483241081238, + "learning_rate": 3.2830188679245285e-05, + "log_odds_chosen": 13.249960899353027, + "log_odds_ratio": -0.09639393538236618, + "logits/chosen": -1.4797954559326172, + "logits/rejected": -5.896228790283203, + "logps/chosen": -1.5692899227142334, + "logps/rejected": -14.452865600585938, + "loss": 1.4959, + "nll_loss": 1.4862685203552246, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15692900121212006, + "rewards/margins": 1.2883576154708862, + "rewards/rejected": -1.4452866315841675, + "step": 182 + }, + { + "epoch": 0.690566037735849, + "grad_norm": 0.3040304183959961, + "learning_rate": 3.2735849056603776e-05, + "log_odds_chosen": 18.430316925048828, + "log_odds_ratio": -0.00038310332456603646, + "logits/chosen": -1.5657317638397217, + "logits/rejected": -4.217155933380127, + "logps/chosen": -1.4011714458465576, + "logps/rejected": -19.451824188232422, + "loss": 1.5214, + "nll_loss": 1.5213794708251953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14011713862419128, + "rewards/margins": 1.805065393447876, + "rewards/rejected": -1.9451824426651, + "step": 183 + }, + { + "epoch": 0.6943396226415094, + "grad_norm": 0.34743577241897583, + "learning_rate": 3.264150943396227e-05, + "log_odds_chosen": 17.73297882080078, + "log_odds_ratio": -2.7628839234239422e-05, + "logits/chosen": -0.9867445230484009, + "logits/rejected": -3.8493878841400146, + "logps/chosen": -1.577934741973877, + "logps/rejected": -19.020362854003906, + "loss": 1.4107, + "nll_loss": 1.4106800556182861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15779347717761993, + "rewards/margins": 1.7442429065704346, + "rewards/rejected": -1.9020363092422485, + "step": 184 + }, + { + "epoch": 0.6981132075471698, + "grad_norm": 0.29433855414390564, + "learning_rate": 3.254716981132075e-05, + "log_odds_chosen": 18.695938110351562, + "log_odds_ratio": -2.980303406729945e-06, + "logits/chosen": -1.6250673532485962, + "logits/rejected": -5.061118125915527, + "logps/chosen": -1.1787736415863037, + "logps/rejected": -19.337848663330078, + "loss": 1.4456, + "nll_loss": 1.4456124305725098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11787736415863037, + "rewards/margins": 1.8159077167510986, + "rewards/rejected": -1.9337849617004395, + "step": 185 + }, + { + "epoch": 0.7018867924528301, + "grad_norm": 0.33233845233917236, + "learning_rate": 3.245283018867924e-05, + "log_odds_chosen": 18.042146682739258, + "log_odds_ratio": -3.9149457734311e-05, + "logits/chosen": -1.7575011253356934, + "logits/rejected": -5.358031272888184, + "logps/chosen": -1.8146780729293823, + "logps/rejected": -19.64087677001953, + "loss": 1.5776, + "nll_loss": 1.5775460004806519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18146783113479614, + "rewards/margins": 1.7826199531555176, + "rewards/rejected": -1.964087724685669, + "step": 186 + }, + { + "epoch": 0.7056603773584905, + "grad_norm": 0.3320876657962799, + "learning_rate": 3.2358490566037734e-05, + "log_odds_chosen": 17.938766479492188, + "log_odds_ratio": -3.449665200605523e-06, + "logits/chosen": -1.9206900596618652, + "logits/rejected": -4.899381160736084, + "logps/chosen": -1.370758295059204, + "logps/rejected": -18.916780471801758, + "loss": 1.3438, + "nll_loss": 1.3438050746917725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13707584142684937, + "rewards/margins": 1.754602313041687, + "rewards/rejected": -1.8916780948638916, + "step": 187 + }, + { + "epoch": 0.7094339622641509, + "grad_norm": 0.5529193878173828, + "learning_rate": 3.2264150943396225e-05, + "log_odds_chosen": 17.36458969116211, + "log_odds_ratio": -3.398192711756565e-05, + "logits/chosen": -2.5587615966796875, + "logits/rejected": -5.520333290100098, + "logps/chosen": -1.3868842124938965, + "logps/rejected": -18.35110855102539, + "loss": 1.3595, + "nll_loss": 1.3595402240753174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13868843019008636, + "rewards/margins": 1.6964225769042969, + "rewards/rejected": -1.8351107835769653, + "step": 188 + }, + { + "epoch": 0.7132075471698113, + "grad_norm": 0.49584269523620605, + "learning_rate": 3.2169811320754716e-05, + "log_odds_chosen": 16.835905075073242, + "log_odds_ratio": -0.018066758289933205, + "logits/chosen": -1.943171739578247, + "logits/rejected": -5.371037006378174, + "logps/chosen": -1.5242998600006104, + "logps/rejected": -18.055362701416016, + "loss": 1.6063, + "nll_loss": 1.6044623851776123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15242999792099, + "rewards/margins": 1.6531062126159668, + "rewards/rejected": -1.8055362701416016, + "step": 189 + }, + { + "epoch": 0.7169811320754716, + "grad_norm": 0.3044775128364563, + "learning_rate": 3.207547169811321e-05, + "log_odds_chosen": 17.216352462768555, + "log_odds_ratio": -1.419415457348805e-05, + "logits/chosen": -1.447345495223999, + "logits/rejected": -5.178557395935059, + "logps/chosen": -1.574653148651123, + "logps/rejected": -18.42108726501465, + "loss": 1.6679, + "nll_loss": 1.6678857803344727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15746530890464783, + "rewards/margins": 1.6846433877944946, + "rewards/rejected": -1.8421087265014648, + "step": 190 + }, + { + "epoch": 0.720754716981132, + "grad_norm": 0.33981502056121826, + "learning_rate": 3.19811320754717e-05, + "log_odds_chosen": 16.49580192565918, + "log_odds_ratio": -4.55726585641969e-05, + "logits/chosen": -2.627636671066284, + "logits/rejected": -5.452616214752197, + "logps/chosen": -1.7808005809783936, + "logps/rejected": -18.012622833251953, + "loss": 1.6983, + "nll_loss": 1.6982595920562744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17808005213737488, + "rewards/margins": 1.6231824159622192, + "rewards/rejected": -1.8012624979019165, + "step": 191 + }, + { + "epoch": 0.7245283018867924, + "grad_norm": 0.2643303871154785, + "learning_rate": 3.188679245283019e-05, + "log_odds_chosen": 18.62483024597168, + "log_odds_ratio": -2.2575586626771837e-06, + "logits/chosen": -0.962692141532898, + "logits/rejected": -5.363802433013916, + "logps/chosen": -1.6161587238311768, + "logps/rejected": -19.921184539794922, + "loss": 1.6401, + "nll_loss": 1.6400614976882935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16161587834358215, + "rewards/margins": 1.8305026292800903, + "rewards/rejected": -1.99211847782135, + "step": 192 + }, + { + "epoch": 0.7283018867924528, + "grad_norm": 0.5224670171737671, + "learning_rate": 3.179245283018868e-05, + "log_odds_chosen": 17.83526039123535, + "log_odds_ratio": -2.4736191335250624e-06, + "logits/chosen": -2.249207019805908, + "logits/rejected": -5.893120765686035, + "logps/chosen": -1.5763590335845947, + "logps/rejected": -19.069358825683594, + "loss": 1.4385, + "nll_loss": 1.438537359237671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.157635897397995, + "rewards/margins": 1.7493000030517578, + "rewards/rejected": -1.9069358110427856, + "step": 193 + }, + { + "epoch": 0.7320754716981132, + "grad_norm": 0.33392333984375, + "learning_rate": 3.169811320754717e-05, + "log_odds_chosen": 14.567859649658203, + "log_odds_ratio": -0.004244968760758638, + "logits/chosen": -1.951322317123413, + "logits/rejected": -6.3537211418151855, + "logps/chosen": -1.85011887550354, + "logps/rejected": -16.212106704711914, + "loss": 1.6572, + "nll_loss": 1.6567444801330566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18501189351081848, + "rewards/margins": 1.4361987113952637, + "rewards/rejected": -1.6212105751037598, + "step": 194 + }, + { + "epoch": 0.7358490566037735, + "grad_norm": 0.33259832859039307, + "learning_rate": 3.160377358490566e-05, + "log_odds_chosen": 14.361299514770508, + "log_odds_ratio": -0.18310227990150452, + "logits/chosen": -1.6968700885772705, + "logits/rejected": -5.388698577880859, + "logps/chosen": -1.7790277004241943, + "logps/rejected": -15.95116901397705, + "loss": 1.571, + "nll_loss": 1.552640438079834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17790275812149048, + "rewards/margins": 1.4172141551971436, + "rewards/rejected": -1.5951169729232788, + "step": 195 + }, + { + "epoch": 0.7396226415094339, + "grad_norm": 0.3773075342178345, + "learning_rate": 3.1509433962264154e-05, + "log_odds_chosen": 13.168961524963379, + "log_odds_ratio": -0.06529372930526733, + "logits/chosen": -1.13154935836792, + "logits/rejected": -4.5446624755859375, + "logps/chosen": -1.3777858018875122, + "logps/rejected": -14.206029891967773, + "loss": 1.3619, + "nll_loss": 1.3554112911224365, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13777858018875122, + "rewards/margins": 1.2828243970870972, + "rewards/rejected": -1.4206030368804932, + "step": 196 + }, + { + "epoch": 0.7433962264150943, + "grad_norm": 0.3047720789909363, + "learning_rate": 3.1415094339622645e-05, + "log_odds_chosen": 18.703304290771484, + "log_odds_ratio": -3.1293072879634565e-06, + "logits/chosen": -1.948151707649231, + "logits/rejected": -5.263698101043701, + "logps/chosen": -1.3663289546966553, + "logps/rejected": -19.713842391967773, + "loss": 1.2498, + "nll_loss": 1.2497950792312622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13663290441036224, + "rewards/margins": 1.8347513675689697, + "rewards/rejected": -1.9713842868804932, + "step": 197 + }, + { + "epoch": 0.7471698113207547, + "grad_norm": 0.3021809458732605, + "learning_rate": 3.132075471698113e-05, + "log_odds_chosen": 16.85780143737793, + "log_odds_ratio": -0.09297717362642288, + "logits/chosen": -2.2993781566619873, + "logits/rejected": -5.5674147605896, + "logps/chosen": -1.532175064086914, + "logps/rejected": -17.899972915649414, + "loss": 1.4076, + "nll_loss": 1.3982605934143066, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15321749448776245, + "rewards/margins": 1.63677978515625, + "rewards/rejected": -1.7899973392486572, + "step": 198 + }, + { + "epoch": 0.7509433962264151, + "grad_norm": 0.3545827269554138, + "learning_rate": 3.122641509433962e-05, + "log_odds_chosen": 16.710426330566406, + "log_odds_ratio": -1.721089915918128e-06, + "logits/chosen": -2.0543713569641113, + "logits/rejected": -6.516955375671387, + "logps/chosen": -1.7566440105438232, + "logps/rejected": -18.181135177612305, + "loss": 1.5498, + "nll_loss": 1.5497593879699707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17566442489624023, + "rewards/margins": 1.642449140548706, + "rewards/rejected": -1.8181135654449463, + "step": 199 + }, + { + "epoch": 0.7547169811320755, + "grad_norm": 0.34778276085853577, + "learning_rate": 3.113207547169811e-05, + "log_odds_chosen": 18.424583435058594, + "log_odds_ratio": -7.215005462057889e-05, + "logits/chosen": -1.3918465375900269, + "logits/rejected": -3.4874095916748047, + "logps/chosen": -1.4977375268936157, + "logps/rejected": -19.452709197998047, + "loss": 1.5377, + "nll_loss": 1.5376887321472168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1497737616300583, + "rewards/margins": 1.795497179031372, + "rewards/rejected": -1.9452710151672363, + "step": 200 + }, + { + "epoch": 0.7584905660377359, + "grad_norm": 0.2991779148578644, + "learning_rate": 3.10377358490566e-05, + "log_odds_chosen": 17.403640747070312, + "log_odds_ratio": -9.71866975305602e-05, + "logits/chosen": -1.1479456424713135, + "logits/rejected": -5.782166481018066, + "logps/chosen": -1.5020239353179932, + "logps/rejected": -18.525487899780273, + "loss": 1.4808, + "nll_loss": 1.4808180332183838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15020239353179932, + "rewards/margins": 1.7023463249206543, + "rewards/rejected": -1.8525487184524536, + "step": 201 + }, + { + "epoch": 0.7622641509433963, + "grad_norm": 0.32272449135780334, + "learning_rate": 3.094339622641509e-05, + "log_odds_chosen": 18.042186737060547, + "log_odds_ratio": -1.0580668458715081e-05, + "logits/chosen": -1.1393158435821533, + "logits/rejected": -5.057635307312012, + "logps/chosen": -1.3160855770111084, + "logps/rejected": -18.88675308227539, + "loss": 1.3602, + "nll_loss": 1.3602066040039062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13160854578018188, + "rewards/margins": 1.7570668458938599, + "rewards/rejected": -1.888675332069397, + "step": 202 + }, + { + "epoch": 0.7660377358490567, + "grad_norm": 0.32788515090942383, + "learning_rate": 3.0849056603773584e-05, + "log_odds_chosen": 16.405580520629883, + "log_odds_ratio": -0.06243692338466644, + "logits/chosen": -1.4627768993377686, + "logits/rejected": -5.623252868652344, + "logps/chosen": -1.8027026653289795, + "logps/rejected": -18.00653839111328, + "loss": 1.6046, + "nll_loss": 1.5983972549438477, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1802702695131302, + "rewards/margins": 1.6203837394714355, + "rewards/rejected": -1.8006539344787598, + "step": 203 + }, + { + "epoch": 0.769811320754717, + "grad_norm": 1.234761118888855, + "learning_rate": 3.0754716981132075e-05, + "log_odds_chosen": 18.839767456054688, + "log_odds_ratio": -4.664214884542162e-06, + "logits/chosen": -2.3654236793518066, + "logits/rejected": -5.212252140045166, + "logps/chosen": -1.4169280529022217, + "logps/rejected": -19.78860092163086, + "loss": 1.4248, + "nll_loss": 1.4247938394546509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14169281721115112, + "rewards/margins": 1.8371672630310059, + "rewards/rejected": -1.9788599014282227, + "step": 204 + }, + { + "epoch": 0.7735849056603774, + "grad_norm": 0.31495201587677, + "learning_rate": 3.0660377358490567e-05, + "log_odds_chosen": 17.49294090270996, + "log_odds_ratio": -1.4975751128076809e-06, + "logits/chosen": -2.495401382446289, + "logits/rejected": -5.783306121826172, + "logps/chosen": -1.6663013696670532, + "logps/rejected": -18.882904052734375, + "loss": 1.684, + "nll_loss": 1.683968186378479, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16663014888763428, + "rewards/margins": 1.7216603755950928, + "rewards/rejected": -1.8882904052734375, + "step": 205 + }, + { + "epoch": 0.7773584905660378, + "grad_norm": 0.4705086350440979, + "learning_rate": 3.056603773584906e-05, + "log_odds_chosen": 18.32155990600586, + "log_odds_ratio": -0.05019821971654892, + "logits/chosen": -1.1568958759307861, + "logits/rejected": -4.691790580749512, + "logps/chosen": -1.4851261377334595, + "logps/rejected": -19.43079376220703, + "loss": 1.4827, + "nll_loss": 1.4777144193649292, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14851261675357819, + "rewards/margins": 1.7945667505264282, + "rewards/rejected": -1.9430793523788452, + "step": 206 + }, + { + "epoch": 0.7811320754716982, + "grad_norm": 0.3913361728191376, + "learning_rate": 3.047169811320755e-05, + "log_odds_chosen": 16.319984436035156, + "log_odds_ratio": -0.05193231999874115, + "logits/chosen": -2.967703342437744, + "logits/rejected": -5.948724269866943, + "logps/chosen": -1.7976689338684082, + "logps/rejected": -17.946117401123047, + "loss": 1.7308, + "nll_loss": 1.7255607843399048, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17976689338684082, + "rewards/margins": 1.6148450374603271, + "rewards/rejected": -1.7946120500564575, + "step": 207 + }, + { + "epoch": 0.7849056603773585, + "grad_norm": 0.309137761592865, + "learning_rate": 3.0377358490566036e-05, + "log_odds_chosen": 19.219402313232422, + "log_odds_ratio": -7.450602197422995e-07, + "logits/chosen": -0.8913941383361816, + "logits/rejected": -4.125641822814941, + "logps/chosen": -1.470247507095337, + "logps/rejected": -20.202777862548828, + "loss": 1.4043, + "nll_loss": 1.4043306112289429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14702476561069489, + "rewards/margins": 1.8732528686523438, + "rewards/rejected": -2.020277500152588, + "step": 208 + }, + { + "epoch": 0.7886792452830189, + "grad_norm": 0.28629574179649353, + "learning_rate": 3.0283018867924528e-05, + "log_odds_chosen": 16.79374122619629, + "log_odds_ratio": -0.00662041874602437, + "logits/chosen": -0.6648188829421997, + "logits/rejected": -6.581351280212402, + "logps/chosen": -1.4513788223266602, + "logps/rejected": -17.918479919433594, + "loss": 1.4518, + "nll_loss": 1.4511276483535767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14513790607452393, + "rewards/margins": 1.6467101573944092, + "rewards/rejected": -1.7918481826782227, + "step": 209 + }, + { + "epoch": 0.7924528301886793, + "grad_norm": 0.32280489802360535, + "learning_rate": 3.018867924528302e-05, + "log_odds_chosen": 17.51369857788086, + "log_odds_ratio": -3.799800651904661e-07, + "logits/chosen": -1.7065296173095703, + "logits/rejected": -5.712586402893066, + "logps/chosen": -1.4828323125839233, + "logps/rejected": -18.661277770996094, + "loss": 1.5052, + "nll_loss": 1.5052316188812256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1482832282781601, + "rewards/margins": 1.7178447246551514, + "rewards/rejected": -1.8661279678344727, + "step": 210 + }, + { + "epoch": 0.7962264150943397, + "grad_norm": 1.3183481693267822, + "learning_rate": 3.009433962264151e-05, + "log_odds_chosen": 16.796680450439453, + "log_odds_ratio": -1.360561145702377e-05, + "logits/chosen": -1.5383379459381104, + "logits/rejected": -4.357212543487549, + "logps/chosen": -1.435273289680481, + "logps/rejected": -17.903779983520508, + "loss": 1.4249, + "nll_loss": 1.4248584508895874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1435273289680481, + "rewards/margins": 1.646850824356079, + "rewards/rejected": -1.790378212928772, + "step": 211 + }, + { + "epoch": 0.8, + "grad_norm": 0.3218695819377899, + "learning_rate": 3e-05, + "log_odds_chosen": 17.916414260864258, + "log_odds_ratio": -1.38259565574117e-05, + "logits/chosen": -0.24590548872947693, + "logits/rejected": -4.629819869995117, + "logps/chosen": -1.4526543617248535, + "logps/rejected": -18.942493438720703, + "loss": 1.5065, + "nll_loss": 1.5065315961837769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14526543021202087, + "rewards/margins": 1.7489840984344482, + "rewards/rejected": -1.894249439239502, + "step": 212 + }, + { + "epoch": 0.8037735849056604, + "grad_norm": 0.3358078896999359, + "learning_rate": 2.9905660377358492e-05, + "log_odds_chosen": 14.796281814575195, + "log_odds_ratio": -0.0924949049949646, + "logits/chosen": -1.8153976202011108, + "logits/rejected": -4.928350448608398, + "logps/chosen": -1.4929924011230469, + "logps/rejected": -15.970230102539062, + "loss": 1.4921, + "nll_loss": 1.4828470945358276, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1492992341518402, + "rewards/margins": 1.4477238655090332, + "rewards/rejected": -1.5970230102539062, + "step": 213 + }, + { + "epoch": 0.8075471698113208, + "grad_norm": 0.3094753921031952, + "learning_rate": 2.9811320754716983e-05, + "log_odds_chosen": 15.192641258239746, + "log_odds_ratio": -0.02841039001941681, + "logits/chosen": -2.0973596572875977, + "logits/rejected": -4.845073699951172, + "logps/chosen": -1.5211032629013062, + "logps/rejected": -16.33423614501953, + "loss": 1.5114, + "nll_loss": 1.5085326433181763, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15211032330989838, + "rewards/margins": 1.4813133478164673, + "rewards/rejected": -1.6334238052368164, + "step": 214 + }, + { + "epoch": 0.8113207547169812, + "grad_norm": 0.3006260395050049, + "learning_rate": 2.971698113207547e-05, + "log_odds_chosen": 16.960983276367188, + "log_odds_ratio": -0.05776922032237053, + "logits/chosen": -0.9070106744766235, + "logits/rejected": -4.620122909545898, + "logps/chosen": -1.5111815929412842, + "logps/rejected": -18.19387435913086, + "loss": 1.4915, + "nll_loss": 1.4857306480407715, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15111815929412842, + "rewards/margins": 1.6682692766189575, + "rewards/rejected": -1.819387435913086, + "step": 215 + }, + { + "epoch": 0.8150943396226416, + "grad_norm": 0.33227863907814026, + "learning_rate": 2.9622641509433962e-05, + "log_odds_chosen": 18.719661712646484, + "log_odds_ratio": -9.611276254872791e-07, + "logits/chosen": -1.1189182996749878, + "logits/rejected": -4.95672082901001, + "logps/chosen": -1.2981202602386475, + "logps/rejected": -19.64653778076172, + "loss": 1.3002, + "nll_loss": 1.3002173900604248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12981203198432922, + "rewards/margins": 1.8348416090011597, + "rewards/rejected": -1.9646536111831665, + "step": 216 + }, + { + "epoch": 0.8188679245283019, + "grad_norm": 0.2781373858451843, + "learning_rate": 2.9528301886792453e-05, + "log_odds_chosen": 17.168392181396484, + "log_odds_ratio": -0.0007286164909601212, + "logits/chosen": -2.3207314014434814, + "logits/rejected": -6.724469184875488, + "logps/chosen": -1.6115381717681885, + "logps/rejected": -18.52199935913086, + "loss": 1.576, + "nll_loss": 1.575928807258606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16115380823612213, + "rewards/margins": 1.6910459995269775, + "rewards/rejected": -1.8521997928619385, + "step": 217 + }, + { + "epoch": 0.8226415094339623, + "grad_norm": 0.29558250308036804, + "learning_rate": 2.9433962264150944e-05, + "log_odds_chosen": 15.084373474121094, + "log_odds_ratio": -0.025247111916542053, + "logits/chosen": -0.26611119508743286, + "logits/rejected": -4.655291557312012, + "logps/chosen": -1.8194210529327393, + "logps/rejected": -16.723453521728516, + "loss": 1.6581, + "nll_loss": 1.6555492877960205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1819421350955963, + "rewards/margins": 1.490403175354004, + "rewards/rejected": -1.6723453998565674, + "step": 218 + }, + { + "epoch": 0.8264150943396227, + "grad_norm": 0.2880527973175049, + "learning_rate": 2.9339622641509435e-05, + "log_odds_chosen": 17.260496139526367, + "log_odds_ratio": -4.0319591789739206e-05, + "logits/chosen": -0.7846205830574036, + "logits/rejected": -4.96677827835083, + "logps/chosen": -1.624293565750122, + "logps/rejected": -18.620319366455078, + "loss": 1.5828, + "nll_loss": 1.5828019380569458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16242937743663788, + "rewards/margins": 1.6996028423309326, + "rewards/rejected": -1.8620320558547974, + "step": 219 + }, + { + "epoch": 0.8301886792452831, + "grad_norm": 0.24706144630908966, + "learning_rate": 2.9245283018867926e-05, + "log_odds_chosen": 18.422889709472656, + "log_odds_ratio": -8.091576091828756e-06, + "logits/chosen": -1.6815857887268066, + "logits/rejected": -6.923168659210205, + "logps/chosen": -1.4026367664337158, + "logps/rejected": -19.40614891052246, + "loss": 1.4331, + "nll_loss": 1.4331367015838623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14026367664337158, + "rewards/margins": 1.8003512620925903, + "rewards/rejected": -1.940614938735962, + "step": 220 + }, + { + "epoch": 0.8339622641509434, + "grad_norm": 0.3142763078212738, + "learning_rate": 2.9150943396226417e-05, + "log_odds_chosen": 17.446882247924805, + "log_odds_ratio": -0.07998549938201904, + "logits/chosen": -1.7830405235290527, + "logits/rejected": -5.290676593780518, + "logps/chosen": -1.7186626195907593, + "logps/rejected": -18.9097957611084, + "loss": 1.672, + "nll_loss": 1.6639615297317505, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1718662679195404, + "rewards/margins": 1.7191133499145508, + "rewards/rejected": -1.890979528427124, + "step": 221 + }, + { + "epoch": 0.8377358490566038, + "grad_norm": 0.32208096981048584, + "learning_rate": 2.9056603773584905e-05, + "log_odds_chosen": 18.16156005859375, + "log_odds_ratio": -8.31518536870135e-06, + "logits/chosen": -2.01678729057312, + "logits/rejected": -5.499569892883301, + "logps/chosen": -1.758958101272583, + "logps/rejected": -19.644182205200195, + "loss": 1.5194, + "nll_loss": 1.5194087028503418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1758958250284195, + "rewards/margins": 1.788522481918335, + "rewards/rejected": -1.9644180536270142, + "step": 222 + }, + { + "epoch": 0.8415094339622642, + "grad_norm": 0.3508431613445282, + "learning_rate": 2.8962264150943396e-05, + "log_odds_chosen": 16.593107223510742, + "log_odds_ratio": -4.483833254198544e-05, + "logits/chosen": -1.0808777809143066, + "logits/rejected": -5.002264022827148, + "logps/chosen": -1.6178133487701416, + "logps/rejected": -17.956409454345703, + "loss": 1.4168, + "nll_loss": 1.4167941808700562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16178134083747864, + "rewards/margins": 1.633859395980835, + "rewards/rejected": -1.7956409454345703, + "step": 223 + }, + { + "epoch": 0.8452830188679246, + "grad_norm": 0.3514016270637512, + "learning_rate": 2.8867924528301887e-05, + "log_odds_chosen": 16.712297439575195, + "log_odds_ratio": -3.978675522375852e-06, + "logits/chosen": -1.4249446392059326, + "logits/rejected": -4.565216064453125, + "logps/chosen": -1.8810580968856812, + "logps/rejected": -18.380489349365234, + "loss": 1.4511, + "nll_loss": 1.4511330127716064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18810580670833588, + "rewards/margins": 1.6499433517456055, + "rewards/rejected": -1.8380491733551025, + "step": 224 + }, + { + "epoch": 0.8490566037735849, + "grad_norm": 0.3235526978969574, + "learning_rate": 2.8773584905660378e-05, + "log_odds_chosen": 17.07125473022461, + "log_odds_ratio": -4.053184511576546e-06, + "logits/chosen": -0.9398770332336426, + "logits/rejected": -4.418087482452393, + "logps/chosen": -1.5705912113189697, + "logps/rejected": -18.33386993408203, + "loss": 1.5413, + "nll_loss": 1.5412647724151611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15705913305282593, + "rewards/margins": 1.6763279438018799, + "rewards/rejected": -1.8333872556686401, + "step": 225 + }, + { + "epoch": 0.8528301886792453, + "grad_norm": 0.3555522561073303, + "learning_rate": 2.867924528301887e-05, + "log_odds_chosen": 16.7454833984375, + "log_odds_ratio": -0.09962432086467743, + "logits/chosen": -2.2143068313598633, + "logits/rejected": -5.618562698364258, + "logps/chosen": -1.8031433820724487, + "logps/rejected": -18.335561752319336, + "loss": 1.5886, + "nll_loss": 1.5786436796188354, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1803143322467804, + "rewards/margins": 1.6532416343688965, + "rewards/rejected": -1.833556056022644, + "step": 226 + }, + { + "epoch": 0.8566037735849057, + "grad_norm": 0.30511021614074707, + "learning_rate": 2.858490566037736e-05, + "log_odds_chosen": 16.435148239135742, + "log_odds_ratio": -0.05051492527127266, + "logits/chosen": -1.1615848541259766, + "logits/rejected": -4.774338722229004, + "logps/chosen": -1.4906309843063354, + "logps/rejected": -17.58717155456543, + "loss": 1.4203, + "nll_loss": 1.4152026176452637, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1490630954504013, + "rewards/margins": 1.6096539497375488, + "rewards/rejected": -1.7587170600891113, + "step": 227 + }, + { + "epoch": 0.8603773584905661, + "grad_norm": 0.2953304946422577, + "learning_rate": 2.8490566037735848e-05, + "log_odds_chosen": 17.240137100219727, + "log_odds_ratio": -0.00012110001989640296, + "logits/chosen": -2.184664726257324, + "logits/rejected": -6.46320915222168, + "logps/chosen": -1.8011096715927124, + "logps/rejected": -18.774145126342773, + "loss": 1.5879, + "nll_loss": 1.5879219770431519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18011096119880676, + "rewards/margins": 1.6973035335540771, + "rewards/rejected": -1.8774144649505615, + "step": 228 + }, + { + "epoch": 0.8641509433962264, + "grad_norm": 0.3585098087787628, + "learning_rate": 2.839622641509434e-05, + "log_odds_chosen": 15.124954223632812, + "log_odds_ratio": -0.00011490716133266687, + "logits/chosen": -0.7336174249649048, + "logits/rejected": -3.8608899116516113, + "logps/chosen": -1.7269798517227173, + "logps/rejected": -16.582866668701172, + "loss": 1.6302, + "nll_loss": 1.6301769018173218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1726979911327362, + "rewards/margins": 1.4855889081954956, + "rewards/rejected": -1.6582868099212646, + "step": 229 + }, + { + "epoch": 0.8679245283018868, + "grad_norm": 0.8896408677101135, + "learning_rate": 2.830188679245283e-05, + "log_odds_chosen": 18.62564468383789, + "log_odds_ratio": -1.3411050758804777e-07, + "logits/chosen": -0.7562139630317688, + "logits/rejected": -6.943488121032715, + "logps/chosen": -1.6029720306396484, + "logps/rejected": -19.97881507873535, + "loss": 1.5316, + "nll_loss": 1.531610131263733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1602971851825714, + "rewards/margins": 1.837584376335144, + "rewards/rejected": -1.997881531715393, + "step": 230 + }, + { + "epoch": 0.8716981132075472, + "grad_norm": 0.3106488883495331, + "learning_rate": 2.820754716981132e-05, + "log_odds_chosen": 17.55572509765625, + "log_odds_ratio": -0.057248592376708984, + "logits/chosen": -1.0949640274047852, + "logits/rejected": -3.665689468383789, + "logps/chosen": -1.5709400177001953, + "logps/rejected": -18.767902374267578, + "loss": 1.3768, + "nll_loss": 1.3711098432540894, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15709398686885834, + "rewards/margins": 1.719696283340454, + "rewards/rejected": -1.8767902851104736, + "step": 231 + }, + { + "epoch": 0.8754716981132076, + "grad_norm": 0.329771488904953, + "learning_rate": 2.8113207547169812e-05, + "log_odds_chosen": 14.081006050109863, + "log_odds_ratio": -0.22504055500030518, + "logits/chosen": -1.2311842441558838, + "logits/rejected": -3.282421827316284, + "logps/chosen": -1.647598385810852, + "logps/rejected": -15.423025131225586, + "loss": 1.4569, + "nll_loss": 1.4343469142913818, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16475984454154968, + "rewards/margins": 1.3775426149368286, + "rewards/rejected": -1.5423026084899902, + "step": 232 + }, + { + "epoch": 0.879245283018868, + "grad_norm": 0.3315247595310211, + "learning_rate": 2.8018867924528303e-05, + "log_odds_chosen": 16.6135196685791, + "log_odds_ratio": -0.015212212689220905, + "logits/chosen": -1.0095354318618774, + "logits/rejected": -5.442407608032227, + "logps/chosen": -1.6335803270339966, + "logps/rejected": -17.933931350708008, + "loss": 1.5397, + "nll_loss": 1.5382212400436401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16335803270339966, + "rewards/margins": 1.6300350427627563, + "rewards/rejected": -1.7933931350708008, + "step": 233 + }, + { + "epoch": 0.8830188679245283, + "grad_norm": 0.2853158116340637, + "learning_rate": 2.7924528301886794e-05, + "log_odds_chosen": 17.776758193969727, + "log_odds_ratio": -0.00014782443759031594, + "logits/chosen": -1.9506645202636719, + "logits/rejected": -6.754343509674072, + "logps/chosen": -1.5541913509368896, + "logps/rejected": -19.017948150634766, + "loss": 1.5127, + "nll_loss": 1.5127075910568237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15541914105415344, + "rewards/margins": 1.7463756799697876, + "rewards/rejected": -1.9017947912216187, + "step": 234 + }, + { + "epoch": 0.8867924528301887, + "grad_norm": 0.3323514759540558, + "learning_rate": 2.7830188679245282e-05, + "log_odds_chosen": 19.43785285949707, + "log_odds_ratio": -1.6764031443017302e-06, + "logits/chosen": -1.0080933570861816, + "logits/rejected": -4.819693565368652, + "logps/chosen": -1.4333504438400269, + "logps/rejected": -20.516963958740234, + "loss": 1.3111, + "nll_loss": 1.3111367225646973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14333504438400269, + "rewards/margins": 1.9083614349365234, + "rewards/rejected": -2.051696538925171, + "step": 235 + }, + { + "epoch": 0.8905660377358491, + "grad_norm": 0.39661461114883423, + "learning_rate": 2.7735849056603773e-05, + "log_odds_chosen": 17.063642501831055, + "log_odds_ratio": -0.02693340554833412, + "logits/chosen": -2.2235286235809326, + "logits/rejected": -4.167008876800537, + "logps/chosen": -1.677812099456787, + "logps/rejected": -18.464336395263672, + "loss": 1.5569, + "nll_loss": 1.5542023181915283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16778121888637543, + "rewards/margins": 1.6786524057388306, + "rewards/rejected": -1.8464335203170776, + "step": 236 + }, + { + "epoch": 0.8943396226415095, + "grad_norm": 0.2811707854270935, + "learning_rate": 2.7641509433962264e-05, + "log_odds_chosen": 17.162025451660156, + "log_odds_ratio": -0.0785740464925766, + "logits/chosen": -2.9656736850738525, + "logits/rejected": -5.683234214782715, + "logps/chosen": -1.4220517873764038, + "logps/rejected": -18.229448318481445, + "loss": 1.5842, + "nll_loss": 1.576366662979126, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14220517873764038, + "rewards/margins": 1.6807397603988647, + "rewards/rejected": -1.8229451179504395, + "step": 237 + }, + { + "epoch": 0.8981132075471698, + "grad_norm": 0.29917973279953003, + "learning_rate": 2.7547169811320755e-05, + "log_odds_chosen": 16.159685134887695, + "log_odds_ratio": -0.022400004789233208, + "logits/chosen": -0.6307175755500793, + "logits/rejected": -5.9625935554504395, + "logps/chosen": -1.3831230401992798, + "logps/rejected": -17.1973876953125, + "loss": 1.3797, + "nll_loss": 1.377410888671875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13831230998039246, + "rewards/margins": 1.5814265012741089, + "rewards/rejected": -1.7197388410568237, + "step": 238 + }, + { + "epoch": 0.9018867924528302, + "grad_norm": 0.4006063938140869, + "learning_rate": 2.7452830188679247e-05, + "log_odds_chosen": 17.939586639404297, + "log_odds_ratio": -1.753244941937737e-05, + "logits/chosen": -0.8297520875930786, + "logits/rejected": -5.683241844177246, + "logps/chosen": -1.3332468271255493, + "logps/rejected": -18.919038772583008, + "loss": 1.3098, + "nll_loss": 1.3098140954971313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13332468271255493, + "rewards/margins": 1.7585792541503906, + "rewards/rejected": -1.8919038772583008, + "step": 239 + }, + { + "epoch": 0.9056603773584906, + "grad_norm": 0.30147290229797363, + "learning_rate": 2.7358490566037738e-05, + "log_odds_chosen": 15.91531753540039, + "log_odds_ratio": -0.03641377389431, + "logits/chosen": -0.7411012649536133, + "logits/rejected": -4.249468803405762, + "logps/chosen": -1.5858731269836426, + "logps/rejected": -17.18975830078125, + "loss": 1.5853, + "nll_loss": 1.5816415548324585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15858730673789978, + "rewards/margins": 1.5603885650634766, + "rewards/rejected": -1.7189757823944092, + "step": 240 + }, + { + "epoch": 0.909433962264151, + "grad_norm": 0.31893160939216614, + "learning_rate": 2.726415094339623e-05, + "log_odds_chosen": 16.919687271118164, + "log_odds_ratio": -0.0328671857714653, + "logits/chosen": -1.2528479099273682, + "logits/rejected": -4.872306823730469, + "logps/chosen": -1.2921593189239502, + "logps/rejected": -17.771411895751953, + "loss": 1.4341, + "nll_loss": 1.43081533908844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12921592593193054, + "rewards/margins": 1.6479253768920898, + "rewards/rejected": -1.7771413326263428, + "step": 241 + }, + { + "epoch": 0.9132075471698113, + "grad_norm": 0.295743465423584, + "learning_rate": 2.7169811320754716e-05, + "log_odds_chosen": 17.80528450012207, + "log_odds_ratio": -0.00010832020052475855, + "logits/chosen": -0.645114541053772, + "logits/rejected": -5.190818786621094, + "logps/chosen": -1.5482758283615112, + "logps/rejected": -19.037681579589844, + "loss": 1.4926, + "nll_loss": 1.492592215538025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15482759475708008, + "rewards/margins": 1.7489407062530518, + "rewards/rejected": -1.9037683010101318, + "step": 242 + }, + { + "epoch": 0.9169811320754717, + "grad_norm": 0.362249493598938, + "learning_rate": 2.7075471698113207e-05, + "log_odds_chosen": 17.395801544189453, + "log_odds_ratio": -0.007926247082650661, + "logits/chosen": -2.3562960624694824, + "logits/rejected": -6.801301956176758, + "logps/chosen": -1.1412396430969238, + "logps/rejected": -17.93915557861328, + "loss": 1.2173, + "nll_loss": 1.2165017127990723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11412396281957626, + "rewards/margins": 1.6797915697097778, + "rewards/rejected": -1.7939155101776123, + "step": 243 + }, + { + "epoch": 0.9207547169811321, + "grad_norm": 0.3344380259513855, + "learning_rate": 2.69811320754717e-05, + "log_odds_chosen": 18.15846824645996, + "log_odds_ratio": -1.184646635010722e-06, + "logits/chosen": -1.4028515815734863, + "logits/rejected": -5.666131019592285, + "logps/chosen": -1.6515846252441406, + "logps/rejected": -19.494230270385742, + "loss": 1.4627, + "nll_loss": 1.462727665901184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1651584655046463, + "rewards/margins": 1.7842646837234497, + "rewards/rejected": -1.9494233131408691, + "step": 244 + }, + { + "epoch": 0.9245283018867925, + "grad_norm": 0.3528442084789276, + "learning_rate": 2.688679245283019e-05, + "log_odds_chosen": 16.469255447387695, + "log_odds_ratio": -0.16001646220684052, + "logits/chosen": -0.5904859304428101, + "logits/rejected": -4.443323135375977, + "logps/chosen": -1.5586724281311035, + "logps/rejected": -17.767322540283203, + "loss": 1.3949, + "nll_loss": 1.3788522481918335, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15586724877357483, + "rewards/margins": 1.620864987373352, + "rewards/rejected": -1.7767322063446045, + "step": 245 + }, + { + "epoch": 0.9283018867924528, + "grad_norm": 0.36119183897972107, + "learning_rate": 2.679245283018868e-05, + "log_odds_chosen": 16.416719436645508, + "log_odds_ratio": -0.20440763235092163, + "logits/chosen": -1.4841980934143066, + "logits/rejected": -4.816326141357422, + "logps/chosen": -1.387542724609375, + "logps/rejected": -17.60167121887207, + "loss": 1.4418, + "nll_loss": 1.4213593006134033, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13875426352024078, + "rewards/margins": 1.6214128732681274, + "rewards/rejected": -1.7601672410964966, + "step": 246 + }, + { + "epoch": 0.9320754716981132, + "grad_norm": 0.3182571828365326, + "learning_rate": 2.6698113207547172e-05, + "log_odds_chosen": 16.2152099609375, + "log_odds_ratio": -0.03764305263757706, + "logits/chosen": -1.0904548168182373, + "logits/rejected": -3.9518043994903564, + "logps/chosen": -1.4248569011688232, + "logps/rejected": -17.271034240722656, + "loss": 1.4531, + "nll_loss": 1.4493829011917114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14248570799827576, + "rewards/margins": 1.5846177339553833, + "rewards/rejected": -1.7271034717559814, + "step": 247 + }, + { + "epoch": 0.9358490566037736, + "grad_norm": 0.321162611246109, + "learning_rate": 2.6603773584905663e-05, + "log_odds_chosen": 18.773868560791016, + "log_odds_ratio": -1.393025740981102e-05, + "logits/chosen": -0.8928079009056091, + "logits/rejected": -5.256990432739258, + "logps/chosen": -1.6134278774261475, + "logps/rejected": -20.060321807861328, + "loss": 1.5017, + "nll_loss": 1.5017071962356567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1613427847623825, + "rewards/margins": 1.8446893692016602, + "rewards/rejected": -2.0060322284698486, + "step": 248 + }, + { + "epoch": 0.939622641509434, + "grad_norm": 0.8356024026870728, + "learning_rate": 2.650943396226415e-05, + "log_odds_chosen": 16.179561614990234, + "log_odds_ratio": -0.04995302855968475, + "logits/chosen": -1.4648141860961914, + "logits/rejected": -4.60720682144165, + "logps/chosen": -1.707082748413086, + "logps/rejected": -17.653757095336914, + "loss": 1.5352, + "nll_loss": 1.5301692485809326, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17070826888084412, + "rewards/margins": 1.5946673154830933, + "rewards/rejected": -1.7653756141662598, + "step": 249 + }, + { + "epoch": 0.9433962264150944, + "grad_norm": 0.3033410608768463, + "learning_rate": 2.641509433962264e-05, + "log_odds_chosen": 16.49986457824707, + "log_odds_ratio": -0.0050300052389502525, + "logits/chosen": -1.6823077201843262, + "logits/rejected": -5.105968475341797, + "logps/chosen": -1.6065022945404053, + "logps/rejected": -17.84182357788086, + "loss": 1.5878, + "nll_loss": 1.5873433351516724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16065022349357605, + "rewards/margins": 1.6235322952270508, + "rewards/rejected": -1.7841825485229492, + "step": 250 + }, + { + "epoch": 0.9471698113207547, + "grad_norm": 0.35018157958984375, + "learning_rate": 2.6320754716981133e-05, + "log_odds_chosen": 19.928390502929688, + "log_odds_ratio": -8.717207720110309e-07, + "logits/chosen": -0.29920998215675354, + "logits/rejected": -4.818760395050049, + "logps/chosen": -1.2851896286010742, + "logps/rejected": -20.66608238220215, + "loss": 1.2603, + "nll_loss": 1.2603236436843872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1285189688205719, + "rewards/margins": 1.93808913230896, + "rewards/rejected": -2.066608428955078, + "step": 251 + }, + { + "epoch": 0.9509433962264151, + "grad_norm": 0.3246742784976959, + "learning_rate": 2.6226415094339624e-05, + "log_odds_chosen": 17.97346305847168, + "log_odds_ratio": -1.0215319889539387e-05, + "logits/chosen": -1.2597813606262207, + "logits/rejected": -5.395329475402832, + "logps/chosen": -1.4511744976043701, + "logps/rejected": -19.067272186279297, + "loss": 1.492, + "nll_loss": 1.4920153617858887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14511744678020477, + "rewards/margins": 1.761609673500061, + "rewards/rejected": -1.9067270755767822, + "step": 252 + }, + { + "epoch": 0.9547169811320755, + "grad_norm": 0.3272016942501068, + "learning_rate": 2.6132075471698115e-05, + "log_odds_chosen": 18.806455612182617, + "log_odds_ratio": -0.04364859312772751, + "logits/chosen": -1.467919945716858, + "logits/rejected": -4.545738220214844, + "logps/chosen": -1.4638400077819824, + "logps/rejected": -19.876646041870117, + "loss": 1.3501, + "nll_loss": 1.3457329273223877, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14638400077819824, + "rewards/margins": 1.8412805795669556, + "rewards/rejected": -1.9876646995544434, + "step": 253 + }, + { + "epoch": 0.9584905660377359, + "grad_norm": 0.6240985989570618, + "learning_rate": 2.6037735849056606e-05, + "log_odds_chosen": 19.93435287475586, + "log_odds_ratio": -7.450581485102248e-09, + "logits/chosen": -0.8115564584732056, + "logits/rejected": -6.536435127258301, + "logps/chosen": -1.6901774406433105, + "logps/rejected": -21.379650115966797, + "loss": 1.674, + "nll_loss": 1.6740447282791138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1690177470445633, + "rewards/margins": 1.968947410583496, + "rewards/rejected": -2.137965202331543, + "step": 254 + }, + { + "epoch": 0.9622641509433962, + "grad_norm": 0.3309653401374817, + "learning_rate": 2.5943396226415094e-05, + "log_odds_chosen": 19.6575927734375, + "log_odds_ratio": -0.01984320767223835, + "logits/chosen": -0.7965545654296875, + "logits/rejected": -3.3540658950805664, + "logps/chosen": -1.5116550922393799, + "logps/rejected": -20.71581268310547, + "loss": 1.5013, + "nll_loss": 1.4993512630462646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15116551518440247, + "rewards/margins": 1.9204158782958984, + "rewards/rejected": -2.0715813636779785, + "step": 255 + }, + { + "epoch": 0.9660377358490566, + "grad_norm": 0.3074422776699066, + "learning_rate": 2.5849056603773585e-05, + "log_odds_chosen": 19.469036102294922, + "log_odds_ratio": -0.000835958169773221, + "logits/chosen": -0.7382791638374329, + "logits/rejected": -3.960020065307617, + "logps/chosen": -1.3473050594329834, + "logps/rejected": -20.31693458557129, + "loss": 1.4874, + "nll_loss": 1.4873261451721191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1347305178642273, + "rewards/margins": 1.896963119506836, + "rewards/rejected": -2.031693696975708, + "step": 256 + }, + { + "epoch": 0.969811320754717, + "grad_norm": 0.2996930480003357, + "learning_rate": 2.5754716981132076e-05, + "log_odds_chosen": 19.313798904418945, + "log_odds_ratio": -3.367738599990844e-06, + "logits/chosen": -1.150359869003296, + "logits/rejected": -6.183455467224121, + "logps/chosen": -1.4993988275527954, + "logps/rejected": -20.48924446105957, + "loss": 1.4565, + "nll_loss": 1.4564992189407349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1499398946762085, + "rewards/margins": 1.898984670639038, + "rewards/rejected": -2.048924446105957, + "step": 257 + }, + { + "epoch": 0.9735849056603774, + "grad_norm": 0.3131140470504761, + "learning_rate": 2.5660377358490567e-05, + "log_odds_chosen": 18.251684188842773, + "log_odds_ratio": -4.395888026920147e-06, + "logits/chosen": -0.7550607919692993, + "logits/rejected": -4.872794151306152, + "logps/chosen": -1.650226354598999, + "logps/rejected": -19.56356430053711, + "loss": 1.5635, + "nll_loss": 1.5635182857513428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16502264142036438, + "rewards/margins": 1.791333794593811, + "rewards/rejected": -1.956356406211853, + "step": 258 + }, + { + "epoch": 0.9773584905660377, + "grad_norm": 0.31624630093574524, + "learning_rate": 2.5566037735849058e-05, + "log_odds_chosen": 17.00825309753418, + "log_odds_ratio": -0.0031702774576842785, + "logits/chosen": -1.8281440734863281, + "logits/rejected": -5.850688934326172, + "logps/chosen": -1.6439521312713623, + "logps/rejected": -18.4215087890625, + "loss": 1.4766, + "nll_loss": 1.4762651920318604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16439521312713623, + "rewards/margins": 1.6777557134628296, + "rewards/rejected": -1.842151165008545, + "step": 259 + }, + { + "epoch": 0.9811320754716981, + "grad_norm": 0.34498074650764465, + "learning_rate": 2.547169811320755e-05, + "log_odds_chosen": 18.038841247558594, + "log_odds_ratio": -0.0025024032220244408, + "logits/chosen": -1.4132332801818848, + "logits/rejected": -5.574821949005127, + "logps/chosen": -1.6830906867980957, + "logps/rejected": -19.414941787719727, + "loss": 1.4516, + "nll_loss": 1.4513163566589355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1683090627193451, + "rewards/margins": 1.773185133934021, + "rewards/rejected": -1.9414939880371094, + "step": 260 + }, + { + "epoch": 0.9849056603773585, + "grad_norm": 0.32891684770584106, + "learning_rate": 2.537735849056604e-05, + "log_odds_chosen": 19.396223068237305, + "log_odds_ratio": -9.61128534981981e-07, + "logits/chosen": -1.6232181787490845, + "logits/rejected": -6.2155914306640625, + "logps/chosen": -1.5656075477600098, + "logps/rejected": -20.644664764404297, + "loss": 1.3423, + "nll_loss": 1.3423418998718262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1565607637166977, + "rewards/margins": 1.9079058170318604, + "rewards/rejected": -2.0644664764404297, + "step": 261 + }, + { + "epoch": 0.9886792452830189, + "grad_norm": 0.39039233326911926, + "learning_rate": 2.5283018867924528e-05, + "log_odds_chosen": 15.765559196472168, + "log_odds_ratio": -0.0038383540231734514, + "logits/chosen": -0.5578839778900146, + "logits/rejected": -5.357296466827393, + "logps/chosen": -1.6717973947525024, + "logps/rejected": -17.1871395111084, + "loss": 1.5226, + "nll_loss": 1.522261381149292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16717973351478577, + "rewards/margins": 1.5515341758728027, + "rewards/rejected": -1.7187139987945557, + "step": 262 + }, + { + "epoch": 0.9924528301886792, + "grad_norm": 0.2830381989479065, + "learning_rate": 2.518867924528302e-05, + "log_odds_chosen": 21.367420196533203, + "log_odds_ratio": -7.450581485102248e-09, + "logits/chosen": -2.053356409072876, + "logits/rejected": -5.862036228179932, + "logps/chosen": -1.3459280729293823, + "logps/rejected": -22.330286026000977, + "loss": 1.2824, + "nll_loss": 1.2823607921600342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13459281623363495, + "rewards/margins": 2.098435878753662, + "rewards/rejected": -2.2330286502838135, + "step": 263 + }, + { + "epoch": 0.9962264150943396, + "grad_norm": 0.34556370973587036, + "learning_rate": 2.509433962264151e-05, + "log_odds_chosen": 17.593475341796875, + "log_odds_ratio": -0.051692862063646317, + "logits/chosen": -1.9133423566818237, + "logits/rejected": -4.535181999206543, + "logps/chosen": -1.6120280027389526, + "logps/rejected": -18.84408950805664, + "loss": 1.5221, + "nll_loss": 1.5169554948806763, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1612028032541275, + "rewards/margins": 1.7232062816619873, + "rewards/rejected": -1.8844091892242432, + "step": 264 + }, + { + "epoch": 1.0, + "grad_norm": 0.3379838764667511, + "learning_rate": 2.5e-05, + "log_odds_chosen": 19.729963302612305, + "log_odds_ratio": -3.501869741739938e-06, + "logits/chosen": -1.0773916244506836, + "logits/rejected": -6.408069610595703, + "logps/chosen": -1.8509314060211182, + "logps/rejected": -21.368595123291016, + "loss": 1.8248, + "nll_loss": 1.824795126914978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18509314954280853, + "rewards/margins": 1.9517664909362793, + "rewards/rejected": -2.136859655380249, + "step": 265 + }, + { + "epoch": 1.0037735849056604, + "grad_norm": 0.32272571325302124, + "learning_rate": 2.4905660377358492e-05, + "log_odds_chosen": 19.660472869873047, + "log_odds_ratio": -5.528574547497556e-06, + "logits/chosen": -2.0058681964874268, + "logits/rejected": -6.02784538269043, + "logps/chosen": -1.462933897972107, + "logps/rejected": -20.805423736572266, + "loss": 1.434, + "nll_loss": 1.43397057056427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14629340171813965, + "rewards/margins": 1.9342491626739502, + "rewards/rejected": -2.080542802810669, + "step": 266 + }, + { + "epoch": 1.0075471698113208, + "grad_norm": 0.3159734308719635, + "learning_rate": 2.4811320754716983e-05, + "log_odds_chosen": 18.871841430664062, + "log_odds_ratio": -1.0505355021450669e-06, + "logits/chosen": -1.4568092823028564, + "logits/rejected": -5.773335933685303, + "logps/chosen": -1.4329451322555542, + "logps/rejected": -19.894126892089844, + "loss": 1.5099, + "nll_loss": 1.5098960399627686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14329451322555542, + "rewards/margins": 1.8461179733276367, + "rewards/rejected": -1.989412546157837, + "step": 267 + }, + { + "epoch": 1.0113207547169811, + "grad_norm": 0.280902236700058, + "learning_rate": 2.4716981132075474e-05, + "log_odds_chosen": 20.24521255493164, + "log_odds_ratio": -2.2351767370309972e-07, + "logits/chosen": -1.0519423484802246, + "logits/rejected": -6.738117218017578, + "logps/chosen": -1.7242472171783447, + "logps/rejected": -21.655092239379883, + "loss": 1.678, + "nll_loss": 1.67795729637146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17242471873760223, + "rewards/margins": 1.9930845499038696, + "rewards/rejected": -2.1655092239379883, + "step": 268 + }, + { + "epoch": 1.0150943396226415, + "grad_norm": 0.6097087264060974, + "learning_rate": 2.4622641509433962e-05, + "log_odds_chosen": 17.3674373626709, + "log_odds_ratio": -0.06029047444462776, + "logits/chosen": -2.0133872032165527, + "logits/rejected": -5.912678241729736, + "logps/chosen": -1.5788557529449463, + "logps/rejected": -18.655887603759766, + "loss": 1.5696, + "nll_loss": 1.5636141300201416, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1578855961561203, + "rewards/margins": 1.7077033519744873, + "rewards/rejected": -1.8655890226364136, + "step": 269 + }, + { + "epoch": 1.0188679245283019, + "grad_norm": 0.3251427710056305, + "learning_rate": 2.4528301886792453e-05, + "log_odds_chosen": 19.473735809326172, + "log_odds_ratio": -0.05202309042215347, + "logits/chosen": -2.5387845039367676, + "logits/rejected": -6.715412616729736, + "logps/chosen": -1.5151475667953491, + "logps/rejected": -20.52288055419922, + "loss": 1.5736, + "nll_loss": 1.5684118270874023, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15151476860046387, + "rewards/margins": 1.9007731676101685, + "rewards/rejected": -2.052288055419922, + "step": 270 + }, + { + "epoch": 1.0226415094339623, + "grad_norm": 0.28263339400291443, + "learning_rate": 2.4433962264150944e-05, + "log_odds_chosen": 21.164031982421875, + "log_odds_ratio": -8.56822566674964e-07, + "logits/chosen": -1.4600391387939453, + "logits/rejected": -5.919426441192627, + "logps/chosen": -1.4743690490722656, + "logps/rejected": -22.336339950561523, + "loss": 1.5115, + "nll_loss": 1.51149582862854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14743690192699432, + "rewards/margins": 2.0861971378326416, + "rewards/rejected": -2.2336339950561523, + "step": 271 + }, + { + "epoch": 1.0264150943396226, + "grad_norm": 0.28004854917526245, + "learning_rate": 2.4339622641509435e-05, + "log_odds_chosen": 20.22897720336914, + "log_odds_ratio": -1.5720835335741867e-06, + "logits/chosen": -2.687819004058838, + "logits/rejected": -5.694319248199463, + "logps/chosen": -1.3161007165908813, + "logps/rejected": -21.028356552124023, + "loss": 1.1834, + "nll_loss": 1.1834136247634888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13161008059978485, + "rewards/margins": 1.971225619316101, + "rewards/rejected": -2.1028356552124023, + "step": 272 + }, + { + "epoch": 1.030188679245283, + "grad_norm": 0.3043549358844757, + "learning_rate": 2.4245283018867926e-05, + "log_odds_chosen": 18.964893341064453, + "log_odds_ratio": -0.04836106672883034, + "logits/chosen": -1.09200918674469, + "logits/rejected": -5.859206199645996, + "logps/chosen": -1.600953221321106, + "logps/rejected": -20.301076889038086, + "loss": 1.5009, + "nll_loss": 1.4960479736328125, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16009533405303955, + "rewards/margins": 1.8700122833251953, + "rewards/rejected": -2.0301077365875244, + "step": 273 + }, + { + "epoch": 1.0339622641509434, + "grad_norm": 0.29216280579566956, + "learning_rate": 2.4150943396226418e-05, + "log_odds_chosen": 18.339717864990234, + "log_odds_ratio": -0.00336459930986166, + "logits/chosen": -2.02860689163208, + "logits/rejected": -6.949286460876465, + "logps/chosen": -1.6870245933532715, + "logps/rejected": -19.78402328491211, + "loss": 1.5731, + "nll_loss": 1.5727555751800537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16870248317718506, + "rewards/margins": 1.8096998929977417, + "rewards/rejected": -1.9784023761749268, + "step": 274 + }, + { + "epoch": 1.0377358490566038, + "grad_norm": 0.2941288352012634, + "learning_rate": 2.405660377358491e-05, + "log_odds_chosen": 22.276226043701172, + "log_odds_ratio": 0.0, + "logits/chosen": -2.0723040103912354, + "logits/rejected": -5.220292568206787, + "logps/chosen": -1.4046821594238281, + "logps/rejected": -23.268173217773438, + "loss": 1.3712, + "nll_loss": 1.371185541152954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14046822488307953, + "rewards/margins": 2.1863491535186768, + "rewards/rejected": -2.326817512512207, + "step": 275 + }, + { + "epoch": 1.0415094339622641, + "grad_norm": 0.29630234837532043, + "learning_rate": 2.3962264150943396e-05, + "log_odds_chosen": 22.004188537597656, + "log_odds_ratio": 0.0, + "logits/chosen": -1.7940192222595215, + "logits/rejected": -7.220149040222168, + "logps/chosen": -1.6048816442489624, + "logps/rejected": -23.359508514404297, + "loss": 1.4889, + "nll_loss": 1.4889363050460815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16048815846443176, + "rewards/margins": 2.1754627227783203, + "rewards/rejected": -2.3359508514404297, + "step": 276 + }, + { + "epoch": 1.0452830188679245, + "grad_norm": 0.3317105174064636, + "learning_rate": 2.3867924528301887e-05, + "log_odds_chosen": 17.85032081604004, + "log_odds_ratio": -2.0116699488426093e-06, + "logits/chosen": -0.9674760699272156, + "logits/rejected": -5.794704914093018, + "logps/chosen": -1.475320816040039, + "logps/rejected": -18.945966720581055, + "loss": 1.3938, + "nll_loss": 1.3938060998916626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14753207564353943, + "rewards/margins": 1.7470645904541016, + "rewards/rejected": -1.8945965766906738, + "step": 277 + }, + { + "epoch": 1.049056603773585, + "grad_norm": 0.30001261830329895, + "learning_rate": 2.377358490566038e-05, + "log_odds_chosen": 20.2429256439209, + "log_odds_ratio": -4.4703490686970326e-08, + "logits/chosen": -0.8269245624542236, + "logits/rejected": -5.202162265777588, + "logps/chosen": -1.4923174381256104, + "logps/rejected": -21.42803192138672, + "loss": 1.3961, + "nll_loss": 1.3961377143859863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14923176169395447, + "rewards/margins": 1.9935715198516846, + "rewards/rejected": -2.142803192138672, + "step": 278 + }, + { + "epoch": 1.0528301886792453, + "grad_norm": 0.35864245891571045, + "learning_rate": 2.367924528301887e-05, + "log_odds_chosen": 18.86318588256836, + "log_odds_ratio": -0.06927872449159622, + "logits/chosen": -1.760351538658142, + "logits/rejected": -5.471229553222656, + "logps/chosen": -1.6287931203842163, + "logps/rejected": -20.220352172851562, + "loss": 1.4988, + "nll_loss": 1.4918767213821411, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1628793179988861, + "rewards/margins": 1.8591558933258057, + "rewards/rejected": -2.0220353603363037, + "step": 279 + }, + { + "epoch": 1.0566037735849056, + "grad_norm": 0.3526778519153595, + "learning_rate": 2.358490566037736e-05, + "log_odds_chosen": 21.42916488647461, + "log_odds_ratio": -1.3187654985813424e-06, + "logits/chosen": -1.282188057899475, + "logits/rejected": -4.82998514175415, + "logps/chosen": -1.2494144439697266, + "logps/rejected": -22.1539306640625, + "loss": 1.2298, + "nll_loss": 1.2297983169555664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12494143843650818, + "rewards/margins": 2.090451240539551, + "rewards/rejected": -2.21539306640625, + "step": 280 + }, + { + "epoch": 1.060377358490566, + "grad_norm": 0.28869158029556274, + "learning_rate": 2.3490566037735852e-05, + "log_odds_chosen": 20.201156616210938, + "log_odds_ratio": -1.1175878711355836e-07, + "logits/chosen": -1.7291052341461182, + "logits/rejected": -7.483429431915283, + "logps/chosen": -1.5107462406158447, + "logps/rejected": -21.40848159790039, + "loss": 1.4772, + "nll_loss": 1.4772439002990723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15107461810112, + "rewards/margins": 1.9897735118865967, + "rewards/rejected": -2.140848159790039, + "step": 281 + }, + { + "epoch": 1.0641509433962264, + "grad_norm": 0.31443092226982117, + "learning_rate": 2.339622641509434e-05, + "log_odds_chosen": 19.611129760742188, + "log_odds_ratio": -3.022684541065246e-05, + "logits/chosen": -1.9202224016189575, + "logits/rejected": -6.25022029876709, + "logps/chosen": -1.5923198461532593, + "logps/rejected": -20.92377471923828, + "loss": 1.5393, + "nll_loss": 1.539318561553955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15923196077346802, + "rewards/margins": 1.933145523071289, + "rewards/rejected": -2.0923774242401123, + "step": 282 + }, + { + "epoch": 1.0679245283018868, + "grad_norm": 0.33393925428390503, + "learning_rate": 2.330188679245283e-05, + "log_odds_chosen": 19.191036224365234, + "log_odds_ratio": -0.051419854164123535, + "logits/chosen": -1.4811030626296997, + "logits/rejected": -5.777919769287109, + "logps/chosen": -1.5946496725082397, + "logps/rejected": -20.5208797454834, + "loss": 1.6181, + "nll_loss": 1.6130021810531616, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1594649851322174, + "rewards/margins": 1.8926230669021606, + "rewards/rejected": -2.0520882606506348, + "step": 283 + }, + { + "epoch": 1.0716981132075472, + "grad_norm": 0.2892889082431793, + "learning_rate": 2.320754716981132e-05, + "log_odds_chosen": 20.433334350585938, + "log_odds_ratio": -2.980237638894323e-07, + "logits/chosen": -0.5262770652770996, + "logits/rejected": -5.781856536865234, + "logps/chosen": -1.421867847442627, + "logps/rejected": -21.404203414916992, + "loss": 1.409, + "nll_loss": 1.408989667892456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14218679070472717, + "rewards/margins": 1.998233675956726, + "rewards/rejected": -2.140420436859131, + "step": 284 + }, + { + "epoch": 1.0754716981132075, + "grad_norm": 0.3078497350215912, + "learning_rate": 2.3113207547169813e-05, + "log_odds_chosen": 18.807418823242188, + "log_odds_ratio": -0.0475982129573822, + "logits/chosen": -1.0839369297027588, + "logits/rejected": -3.862969398498535, + "logps/chosen": -1.431301474571228, + "logps/rejected": -19.818119049072266, + "loss": 1.5687, + "nll_loss": 1.563894510269165, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14313015341758728, + "rewards/margins": 1.838681936264038, + "rewards/rejected": -1.9818120002746582, + "step": 285 + }, + { + "epoch": 1.079245283018868, + "grad_norm": 0.9563864469528198, + "learning_rate": 2.3018867924528304e-05, + "log_odds_chosen": 19.715076446533203, + "log_odds_ratio": -7.935160283523146e-06, + "logits/chosen": -2.1578338146209717, + "logits/rejected": -4.625355243682861, + "logps/chosen": -1.4262417554855347, + "logps/rejected": -20.82624053955078, + "loss": 1.4746, + "nll_loss": 1.4745495319366455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14262418448925018, + "rewards/margins": 1.9399999380111694, + "rewards/rejected": -2.0826241970062256, + "step": 286 + }, + { + "epoch": 1.0830188679245283, + "grad_norm": 0.29822468757629395, + "learning_rate": 2.2924528301886795e-05, + "log_odds_chosen": 20.520158767700195, + "log_odds_ratio": -5.96046660916727e-08, + "logits/chosen": -1.959904670715332, + "logits/rejected": -5.203014850616455, + "logps/chosen": -1.3631935119628906, + "logps/rejected": -21.410646438598633, + "loss": 1.4288, + "nll_loss": 1.4287554025650024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1363193392753601, + "rewards/margins": 2.0047454833984375, + "rewards/rejected": -2.1410648822784424, + "step": 287 + }, + { + "epoch": 1.0867924528301887, + "grad_norm": 0.31314578652381897, + "learning_rate": 2.2830188679245286e-05, + "log_odds_chosen": 20.801912307739258, + "log_odds_ratio": 0.0, + "logits/chosen": -2.6818971633911133, + "logits/rejected": -7.3150763511657715, + "logps/chosen": -1.596503496170044, + "logps/rejected": -22.121469497680664, + "loss": 1.4044, + "nll_loss": 1.4043606519699097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15965035557746887, + "rewards/margins": 2.0524966716766357, + "rewards/rejected": -2.2121472358703613, + "step": 288 + }, + { + "epoch": 1.090566037735849, + "grad_norm": 0.3501436412334442, + "learning_rate": 2.2735849056603774e-05, + "log_odds_chosen": 19.989656448364258, + "log_odds_ratio": -1.385821519761521e-06, + "logits/chosen": -2.4570391178131104, + "logits/rejected": -6.362525463104248, + "logps/chosen": -1.6581473350524902, + "logps/rejected": -21.345970153808594, + "loss": 1.5318, + "nll_loss": 1.53177011013031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16581472754478455, + "rewards/margins": 1.9687824249267578, + "rewards/rejected": -2.134597063064575, + "step": 289 + }, + { + "epoch": 1.0943396226415094, + "grad_norm": 0.33854931592941284, + "learning_rate": 2.2641509433962265e-05, + "log_odds_chosen": 19.100032806396484, + "log_odds_ratio": -5.811464234284358e-07, + "logits/chosen": -0.7418297529220581, + "logits/rejected": -4.735363960266113, + "logps/chosen": -1.7482143640518188, + "logps/rejected": -20.59360694885254, + "loss": 1.581, + "nll_loss": 1.5810353755950928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17482145130634308, + "rewards/margins": 1.884539246559143, + "rewards/rejected": -2.0593605041503906, + "step": 290 + }, + { + "epoch": 1.0981132075471698, + "grad_norm": 0.3294488489627838, + "learning_rate": 2.2547169811320756e-05, + "log_odds_chosen": 17.665861129760742, + "log_odds_ratio": -0.08739493787288666, + "logits/chosen": -0.7531633377075195, + "logits/rejected": -5.106237888336182, + "logps/chosen": -1.710965871810913, + "logps/rejected": -19.166933059692383, + "loss": 1.5588, + "nll_loss": 1.5500967502593994, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1710965931415558, + "rewards/margins": 1.7455967664718628, + "rewards/rejected": -1.9166933298110962, + "step": 291 + }, + { + "epoch": 1.1018867924528302, + "grad_norm": 0.3396783769130707, + "learning_rate": 2.2452830188679247e-05, + "log_odds_chosen": 18.452682495117188, + "log_odds_ratio": -0.07296016067266464, + "logits/chosen": -0.9494550228118896, + "logits/rejected": -4.494377613067627, + "logps/chosen": -1.316884994506836, + "logps/rejected": -19.361968994140625, + "loss": 1.1822, + "nll_loss": 1.1749296188354492, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13168850541114807, + "rewards/margins": 1.8045084476470947, + "rewards/rejected": -1.9361971616744995, + "step": 292 + }, + { + "epoch": 1.1056603773584905, + "grad_norm": 0.3204915523529053, + "learning_rate": 2.2358490566037738e-05, + "log_odds_chosen": 21.57263946533203, + "log_odds_ratio": -3.62108698936936e-06, + "logits/chosen": -0.9738727807998657, + "logits/rejected": -5.303981304168701, + "logps/chosen": -1.5682296752929688, + "logps/rejected": -22.808374404907227, + "loss": 1.462, + "nll_loss": 1.462032437324524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15682296454906464, + "rewards/margins": 2.1240146160125732, + "rewards/rejected": -2.280837297439575, + "step": 293 + }, + { + "epoch": 1.109433962264151, + "grad_norm": 0.3384822607040405, + "learning_rate": 2.226415094339623e-05, + "log_odds_chosen": 18.951013565063477, + "log_odds_ratio": -5.208150469115935e-06, + "logits/chosen": -1.3006513118743896, + "logits/rejected": -4.634444236755371, + "logps/chosen": -1.5552959442138672, + "logps/rejected": -20.169574737548828, + "loss": 1.5539, + "nll_loss": 1.5539031028747559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15552960336208344, + "rewards/margins": 1.8614277839660645, + "rewards/rejected": -2.0169572830200195, + "step": 294 + }, + { + "epoch": 1.1132075471698113, + "grad_norm": 1.1054335832595825, + "learning_rate": 2.216981132075472e-05, + "log_odds_chosen": 18.913715362548828, + "log_odds_ratio": -4.693871460403898e-07, + "logits/chosen": -0.9914897680282593, + "logits/rejected": -5.061755180358887, + "logps/chosen": -1.4729012250900269, + "logps/rejected": -20.089599609375, + "loss": 1.3732, + "nll_loss": 1.373215913772583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14729014039039612, + "rewards/margins": 1.8616697788238525, + "rewards/rejected": -2.008960247039795, + "step": 295 + }, + { + "epoch": 1.1169811320754717, + "grad_norm": 0.41514357924461365, + "learning_rate": 2.2075471698113208e-05, + "log_odds_chosen": 20.548986434936523, + "log_odds_ratio": -5.2154071283894154e-08, + "logits/chosen": -1.4249317646026611, + "logits/rejected": -5.687395095825195, + "logps/chosen": -1.5955243110656738, + "logps/rejected": -21.793594360351562, + "loss": 1.4593, + "nll_loss": 1.4593462944030762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1595524251461029, + "rewards/margins": 2.0198073387145996, + "rewards/rejected": -2.1793594360351562, + "step": 296 + }, + { + "epoch": 1.120754716981132, + "grad_norm": 0.33038532733917236, + "learning_rate": 2.19811320754717e-05, + "log_odds_chosen": 19.50081443786621, + "log_odds_ratio": -1.6255449736490846e-05, + "logits/chosen": -2.909379482269287, + "logits/rejected": -7.22389030456543, + "logps/chosen": -1.4944963455200195, + "logps/rejected": -20.643218994140625, + "loss": 1.3455, + "nll_loss": 1.345502257347107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1494496464729309, + "rewards/margins": 1.914872169494629, + "rewards/rejected": -2.064321756362915, + "step": 297 + }, + { + "epoch": 1.1245283018867924, + "grad_norm": 0.29738759994506836, + "learning_rate": 2.188679245283019e-05, + "log_odds_chosen": 20.67082405090332, + "log_odds_ratio": -0.0003345193399582058, + "logits/chosen": -1.593409776687622, + "logits/rejected": -5.576838493347168, + "logps/chosen": -1.746903657913208, + "logps/rejected": -22.0710506439209, + "loss": 1.3993, + "nll_loss": 1.3992958068847656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1746903657913208, + "rewards/margins": 2.032414674758911, + "rewards/rejected": -2.2071051597595215, + "step": 298 + }, + { + "epoch": 1.1283018867924528, + "grad_norm": 0.4151991605758667, + "learning_rate": 2.179245283018868e-05, + "log_odds_chosen": 17.427101135253906, + "log_odds_ratio": -0.11041603237390518, + "logits/chosen": -0.4509122669696808, + "logits/rejected": -4.386507034301758, + "logps/chosen": -1.5307447910308838, + "logps/rejected": -18.620765686035156, + "loss": 1.4554, + "nll_loss": 1.4443206787109375, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1530744880437851, + "rewards/margins": 1.7090023756027222, + "rewards/rejected": -1.862076759338379, + "step": 299 + }, + { + "epoch": 1.1320754716981132, + "grad_norm": 0.327581524848938, + "learning_rate": 2.1698113207547172e-05, + "log_odds_chosen": 17.98784065246582, + "log_odds_ratio": -4.783316398970783e-06, + "logits/chosen": -1.5300605297088623, + "logits/rejected": -6.33521842956543, + "logps/chosen": -1.5090255737304688, + "logps/rejected": -19.206493377685547, + "loss": 1.401, + "nll_loss": 1.4010220766067505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15090256929397583, + "rewards/margins": 1.7697467803955078, + "rewards/rejected": -1.9206492900848389, + "step": 300 + }, + { + "epoch": 1.1358490566037736, + "grad_norm": 0.33648499846458435, + "learning_rate": 2.1603773584905663e-05, + "log_odds_chosen": 20.12273597717285, + "log_odds_ratio": -5.960480393696344e-07, + "logits/chosen": -0.9786512851715088, + "logits/rejected": -5.974287986755371, + "logps/chosen": -1.504534363746643, + "logps/rejected": -21.347797393798828, + "loss": 1.4384, + "nll_loss": 1.4384446144104004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15045343339443207, + "rewards/margins": 1.9843263626098633, + "rewards/rejected": -2.134779930114746, + "step": 301 + }, + { + "epoch": 1.139622641509434, + "grad_norm": 0.4433332681655884, + "learning_rate": 2.1509433962264154e-05, + "log_odds_chosen": 18.096084594726562, + "log_odds_ratio": -0.07683061808347702, + "logits/chosen": -1.3693748712539673, + "logits/rejected": -5.253786087036133, + "logps/chosen": -1.7420886754989624, + "logps/rejected": -19.588783264160156, + "loss": 1.4441, + "nll_loss": 1.4364526271820068, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1742088496685028, + "rewards/margins": 1.7846695184707642, + "rewards/rejected": -1.9588782787322998, + "step": 302 + }, + { + "epoch": 1.1433962264150943, + "grad_norm": 0.2902994155883789, + "learning_rate": 2.1415094339622642e-05, + "log_odds_chosen": 22.471710205078125, + "log_odds_ratio": -1.1175880842984043e-07, + "logits/chosen": -1.3224496841430664, + "logits/rejected": -6.421805381774902, + "logps/chosen": -1.4149394035339355, + "logps/rejected": -23.561216354370117, + "loss": 1.3612, + "nll_loss": 1.36124587059021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14149394631385803, + "rewards/margins": 2.214627742767334, + "rewards/rejected": -2.356121778488159, + "step": 303 + }, + { + "epoch": 1.1471698113207547, + "grad_norm": 0.35126540064811707, + "learning_rate": 2.1320754716981133e-05, + "log_odds_chosen": 20.562442779541016, + "log_odds_ratio": -9.716237400425598e-06, + "logits/chosen": -0.9620351791381836, + "logits/rejected": -6.237518787384033, + "logps/chosen": -1.6788274049758911, + "logps/rejected": -21.986478805541992, + "loss": 1.4306, + "nll_loss": 1.4306453466415405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1678827404975891, + "rewards/margins": 2.0307650566101074, + "rewards/rejected": -2.1986477375030518, + "step": 304 + }, + { + "epoch": 1.150943396226415, + "grad_norm": 0.3252897560596466, + "learning_rate": 2.1226415094339624e-05, + "log_odds_chosen": 20.130619049072266, + "log_odds_ratio": -0.00018702806846704334, + "logits/chosen": -1.5204761028289795, + "logits/rejected": -4.985358238220215, + "logps/chosen": -1.533634901046753, + "logps/rejected": -21.230838775634766, + "loss": 1.6996, + "nll_loss": 1.6996041536331177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15336349606513977, + "rewards/margins": 1.9697201251983643, + "rewards/rejected": -2.1230835914611816, + "step": 305 + }, + { + "epoch": 1.1547169811320754, + "grad_norm": 0.4213809072971344, + "learning_rate": 2.1132075471698115e-05, + "log_odds_chosen": 22.56460952758789, + "log_odds_ratio": -1.7881419012155675e-07, + "logits/chosen": -1.123805046081543, + "logits/rejected": -4.740581035614014, + "logps/chosen": -1.4182591438293457, + "logps/rejected": -23.586502075195312, + "loss": 1.3603, + "nll_loss": 1.360290288925171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14182589948177338, + "rewards/margins": 2.2168242931365967, + "rewards/rejected": -2.3586502075195312, + "step": 306 + }, + { + "epoch": 1.1584905660377358, + "grad_norm": 0.3281170725822449, + "learning_rate": 2.1037735849056606e-05, + "log_odds_chosen": 19.17583656311035, + "log_odds_ratio": -2.123439571732888e-06, + "logits/chosen": -0.20950892567634583, + "logits/rejected": -4.772949695587158, + "logps/chosen": -1.5309257507324219, + "logps/rejected": -20.371936798095703, + "loss": 1.4766, + "nll_loss": 1.476578950881958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15309256315231323, + "rewards/margins": 1.884101390838623, + "rewards/rejected": -2.037193775177002, + "step": 307 + }, + { + "epoch": 1.1622641509433962, + "grad_norm": 0.3546697497367859, + "learning_rate": 2.0943396226415098e-05, + "log_odds_chosen": 23.225772857666016, + "log_odds_ratio": -7.450581485102248e-09, + "logits/chosen": -0.4746635854244232, + "logits/rejected": -2.8694400787353516, + "logps/chosen": -1.2704362869262695, + "logps/rejected": -24.101713180541992, + "loss": 1.2967, + "nll_loss": 1.2966803312301636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12704363465309143, + "rewards/margins": 2.283127546310425, + "rewards/rejected": -2.4101712703704834, + "step": 308 + }, + { + "epoch": 1.1660377358490566, + "grad_norm": 1.0174925327301025, + "learning_rate": 2.0849056603773585e-05, + "log_odds_chosen": 19.03729248046875, + "log_odds_ratio": -0.10631541162729263, + "logits/chosen": -2.604809522628784, + "logits/rejected": -5.2874040603637695, + "logps/chosen": -2.420698642730713, + "logps/rejected": -21.103818893432617, + "loss": 1.6129, + "nll_loss": 1.6022355556488037, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.24206990003585815, + "rewards/margins": 1.8683120012283325, + "rewards/rejected": -2.110382080078125, + "step": 309 + }, + { + "epoch": 1.169811320754717, + "grad_norm": 0.29557734727859497, + "learning_rate": 2.0754716981132076e-05, + "log_odds_chosen": 16.802288055419922, + "log_odds_ratio": -0.02574881538748741, + "logits/chosen": -1.028839111328125, + "logits/rejected": -4.576976776123047, + "logps/chosen": -1.665901780128479, + "logps/rejected": -18.113601684570312, + "loss": 1.5644, + "nll_loss": 1.561858057975769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16659018397331238, + "rewards/margins": 1.6447699069976807, + "rewards/rejected": -1.8113601207733154, + "step": 310 + }, + { + "epoch": 1.1735849056603773, + "grad_norm": 0.3357307016849518, + "learning_rate": 2.0660377358490567e-05, + "log_odds_chosen": 18.195541381835938, + "log_odds_ratio": -0.05465611815452576, + "logits/chosen": -2.216845989227295, + "logits/rejected": -5.583156108856201, + "logps/chosen": -1.4862521886825562, + "logps/rejected": -19.355636596679688, + "loss": 1.4894, + "nll_loss": 1.4839730262756348, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1486252397298813, + "rewards/margins": 1.7869385480880737, + "rewards/rejected": -1.9355638027191162, + "step": 311 + }, + { + "epoch": 1.1773584905660377, + "grad_norm": 0.43319424986839294, + "learning_rate": 2.056603773584906e-05, + "log_odds_chosen": 20.049089431762695, + "log_odds_ratio": -8.195664804588887e-07, + "logits/chosen": -1.8693492412567139, + "logits/rejected": -6.614497184753418, + "logps/chosen": -1.4651036262512207, + "logps/rejected": -21.203433990478516, + "loss": 1.3849, + "nll_loss": 1.384904384613037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14651036262512207, + "rewards/margins": 1.9738330841064453, + "rewards/rejected": -2.1203434467315674, + "step": 312 + }, + { + "epoch": 1.181132075471698, + "grad_norm": 0.3610400855541229, + "learning_rate": 2.047169811320755e-05, + "log_odds_chosen": 18.725547790527344, + "log_odds_ratio": -0.0023514274507761, + "logits/chosen": -0.6479529738426208, + "logits/rejected": -3.6973776817321777, + "logps/chosen": -1.5272626876831055, + "logps/rejected": -19.948326110839844, + "loss": 1.4189, + "nll_loss": 1.4186826944351196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15272627770900726, + "rewards/margins": 1.8421063423156738, + "rewards/rejected": -1.9948326349258423, + "step": 313 + }, + { + "epoch": 1.1849056603773584, + "grad_norm": 0.3513402044773102, + "learning_rate": 2.037735849056604e-05, + "log_odds_chosen": 17.298198699951172, + "log_odds_ratio": -0.002320481464266777, + "logits/chosen": -1.4968432188034058, + "logits/rejected": -5.039670944213867, + "logps/chosen": -1.4913229942321777, + "logps/rejected": -18.48564910888672, + "loss": 1.4084, + "nll_loss": 1.4081535339355469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14913231134414673, + "rewards/margins": 1.6994324922561646, + "rewards/rejected": -1.848564863204956, + "step": 314 + }, + { + "epoch": 1.1886792452830188, + "grad_norm": 0.3477972447872162, + "learning_rate": 2.0283018867924532e-05, + "log_odds_chosen": 18.793106079101562, + "log_odds_ratio": -8.6095547885634e-05, + "logits/chosen": -2.6079301834106445, + "logits/rejected": -5.591516494750977, + "logps/chosen": -1.5191848278045654, + "logps/rejected": -19.985429763793945, + "loss": 1.5223, + "nll_loss": 1.5223308801651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1519184708595276, + "rewards/margins": 1.8466243743896484, + "rewards/rejected": -1.9985430240631104, + "step": 315 + }, + { + "epoch": 1.1924528301886792, + "grad_norm": 0.35549482703208923, + "learning_rate": 2.018867924528302e-05, + "log_odds_chosen": 18.749046325683594, + "log_odds_ratio": -5.386953034758335e-06, + "logits/chosen": -0.9891374707221985, + "logits/rejected": -5.465826988220215, + "logps/chosen": -1.3818237781524658, + "logps/rejected": -19.735633850097656, + "loss": 1.4975, + "nll_loss": 1.4974541664123535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1381823718547821, + "rewards/margins": 1.835381031036377, + "rewards/rejected": -1.973563313484192, + "step": 316 + }, + { + "epoch": 1.1962264150943396, + "grad_norm": 0.34496262669563293, + "learning_rate": 2.009433962264151e-05, + "log_odds_chosen": 15.491250038146973, + "log_odds_ratio": -0.20011255145072937, + "logits/chosen": -1.0274806022644043, + "logits/rejected": -5.168841361999512, + "logps/chosen": -1.7780976295471191, + "logps/rejected": -17.050220489501953, + "loss": 1.6287, + "nll_loss": 1.6086933612823486, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17780977487564087, + "rewards/margins": 1.527212142944336, + "rewards/rejected": -1.7050219774246216, + "step": 317 + }, + { + "epoch": 1.2, + "grad_norm": 0.32769209146499634, + "learning_rate": 2e-05, + "log_odds_chosen": 16.972900390625, + "log_odds_ratio": -0.00024161383043974638, + "logits/chosen": -0.6759935617446899, + "logits/rejected": -5.338271141052246, + "logps/chosen": -1.6053509712219238, + "logps/rejected": -18.34259796142578, + "loss": 1.4908, + "nll_loss": 1.4907457828521729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16053511202335358, + "rewards/margins": 1.6737247705459595, + "rewards/rejected": -1.8342599868774414, + "step": 318 + }, + { + "epoch": 1.2037735849056603, + "grad_norm": 0.33330532908439636, + "learning_rate": 1.9905660377358493e-05, + "log_odds_chosen": 18.765106201171875, + "log_odds_ratio": -0.007183433044701815, + "logits/chosen": -3.5232436656951904, + "logits/rejected": -7.270813465118408, + "logps/chosen": -1.494720697402954, + "logps/rejected": -19.954343795776367, + "loss": 1.4191, + "nll_loss": 1.4183340072631836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14947207272052765, + "rewards/margins": 1.8459622859954834, + "rewards/rejected": -1.9954345226287842, + "step": 319 + }, + { + "epoch": 1.2075471698113207, + "grad_norm": 0.35828787088394165, + "learning_rate": 1.9811320754716984e-05, + "log_odds_chosen": 15.424698829650879, + "log_odds_ratio": -0.006778358481824398, + "logits/chosen": -2.202425956726074, + "logits/rejected": -7.088006973266602, + "logps/chosen": -1.7607218027114868, + "logps/rejected": -16.957534790039062, + "loss": 1.6174, + "nll_loss": 1.616753339767456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17607218027114868, + "rewards/margins": 1.5196813344955444, + "rewards/rejected": -1.6957534551620483, + "step": 320 + }, + { + "epoch": 1.211320754716981, + "grad_norm": 0.34036463499069214, + "learning_rate": 1.9716981132075475e-05, + "log_odds_chosen": 18.791336059570312, + "log_odds_ratio": -3.852009740512585e-06, + "logits/chosen": -1.9099091291427612, + "logits/rejected": -7.520392417907715, + "logps/chosen": -1.561924934387207, + "logps/rejected": -19.94056510925293, + "loss": 1.471, + "nll_loss": 1.4709736108779907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15619248151779175, + "rewards/margins": 1.8378640413284302, + "rewards/rejected": -1.9940567016601562, + "step": 321 + }, + { + "epoch": 1.2150943396226415, + "grad_norm": 0.3238532841205597, + "learning_rate": 1.9622641509433966e-05, + "log_odds_chosen": 17.8562068939209, + "log_odds_ratio": -8.6430118244607e-06, + "logits/chosen": -0.658406138420105, + "logits/rejected": -4.410658836364746, + "logps/chosen": -1.2828052043914795, + "logps/rejected": -18.753049850463867, + "loss": 1.3437, + "nll_loss": 1.3436634540557861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12828052043914795, + "rewards/margins": 1.7470245361328125, + "rewards/rejected": -1.875304937362671, + "step": 322 + }, + { + "epoch": 1.2188679245283018, + "grad_norm": 0.3303399682044983, + "learning_rate": 1.9528301886792454e-05, + "log_odds_chosen": 14.796485900878906, + "log_odds_ratio": -0.055772680789232254, + "logits/chosen": -1.0683139562606812, + "logits/rejected": -7.066629409790039, + "logps/chosen": -1.8164430856704712, + "logps/rejected": -16.421266555786133, + "loss": 1.613, + "nll_loss": 1.6073920726776123, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.18164430558681488, + "rewards/margins": 1.4604823589324951, + "rewards/rejected": -1.6421265602111816, + "step": 323 + }, + { + "epoch": 1.2226415094339622, + "grad_norm": 0.35494038462638855, + "learning_rate": 1.9433962264150945e-05, + "log_odds_chosen": 17.69695472717285, + "log_odds_ratio": -0.005540680605918169, + "logits/chosen": -1.0188480615615845, + "logits/rejected": -4.988126277923584, + "logps/chosen": -1.4110499620437622, + "logps/rejected": -18.787839889526367, + "loss": 1.3428, + "nll_loss": 1.3422446250915527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14110499620437622, + "rewards/margins": 1.7376790046691895, + "rewards/rejected": -1.878783941268921, + "step": 324 + }, + { + "epoch": 1.2264150943396226, + "grad_norm": 0.35788360238075256, + "learning_rate": 1.9339622641509436e-05, + "log_odds_chosen": 14.982892990112305, + "log_odds_ratio": -0.049817949533462524, + "logits/chosen": -1.3258823156356812, + "logits/rejected": -4.249111652374268, + "logps/chosen": -1.2381442785263062, + "logps/rejected": -15.715821266174316, + "loss": 1.2877, + "nll_loss": 1.2827649116516113, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.12381443381309509, + "rewards/margins": 1.447767734527588, + "rewards/rejected": -1.5715820789337158, + "step": 325 + }, + { + "epoch": 1.230188679245283, + "grad_norm": 0.39687126874923706, + "learning_rate": 1.9245283018867927e-05, + "log_odds_chosen": 16.164493560791016, + "log_odds_ratio": -0.054912034422159195, + "logits/chosen": -1.1859130859375, + "logits/rejected": -4.106606960296631, + "logps/chosen": -1.3010371923446655, + "logps/rejected": -16.917049407958984, + "loss": 1.4605, + "nll_loss": 1.4549907445907593, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1301037073135376, + "rewards/margins": 1.561601161956787, + "rewards/rejected": -1.6917049884796143, + "step": 326 + }, + { + "epoch": 1.2339622641509433, + "grad_norm": 0.3798399865627289, + "learning_rate": 1.9150943396226418e-05, + "log_odds_chosen": 15.7042875289917, + "log_odds_ratio": -0.00472813518717885, + "logits/chosen": 0.0541728138923645, + "logits/rejected": -4.224884033203125, + "logps/chosen": -1.7171664237976074, + "logps/rejected": -17.207719802856445, + "loss": 1.5367, + "nll_loss": 1.5362149477005005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1717166304588318, + "rewards/margins": 1.5490553379058838, + "rewards/rejected": -1.7207720279693604, + "step": 327 + }, + { + "epoch": 1.2377358490566037, + "grad_norm": 0.3624114990234375, + "learning_rate": 1.905660377358491e-05, + "log_odds_chosen": 16.288440704345703, + "log_odds_ratio": -0.010537970811128616, + "logits/chosen": -0.5711084008216858, + "logits/rejected": -4.729043006896973, + "logps/chosen": -1.5029557943344116, + "logps/rejected": -17.498348236083984, + "loss": 1.3411, + "nll_loss": 1.340043544769287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15029558539390564, + "rewards/margins": 1.5995393991470337, + "rewards/rejected": -1.7498348951339722, + "step": 328 + }, + { + "epoch": 1.241509433962264, + "grad_norm": 0.3725604712963104, + "learning_rate": 1.8962264150943397e-05, + "log_odds_chosen": 15.354927062988281, + "log_odds_ratio": -0.02149188332259655, + "logits/chosen": -1.4398235082626343, + "logits/rejected": -4.620980739593506, + "logps/chosen": -1.4068002700805664, + "logps/rejected": -16.40656280517578, + "loss": 1.2986, + "nll_loss": 1.2964096069335938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14068001508712769, + "rewards/margins": 1.4999765157699585, + "rewards/rejected": -1.640656590461731, + "step": 329 + }, + { + "epoch": 1.2452830188679245, + "grad_norm": 0.370463490486145, + "learning_rate": 1.8867924528301888e-05, + "log_odds_chosen": 14.876998901367188, + "log_odds_ratio": -0.05089250206947327, + "logits/chosen": -1.1731348037719727, + "logits/rejected": -4.278902530670166, + "logps/chosen": -1.3039909601211548, + "logps/rejected": -15.608766555786133, + "loss": 1.4343, + "nll_loss": 1.4292408227920532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13039910793304443, + "rewards/margins": 1.4304776191711426, + "rewards/rejected": -1.5608766078948975, + "step": 330 + }, + { + "epoch": 1.2490566037735849, + "grad_norm": 0.3387150466442108, + "learning_rate": 1.877358490566038e-05, + "log_odds_chosen": 15.262411117553711, + "log_odds_ratio": -6.615633174078539e-05, + "logits/chosen": -2.305337905883789, + "logits/rejected": -5.992312431335449, + "logps/chosen": -1.5741474628448486, + "logps/rejected": -16.528202056884766, + "loss": 1.4408, + "nll_loss": 1.440836787223816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1574147343635559, + "rewards/margins": 1.4954053163528442, + "rewards/rejected": -1.652820110321045, + "step": 331 + }, + { + "epoch": 1.2528301886792452, + "grad_norm": 0.37555763125419617, + "learning_rate": 1.8679245283018867e-05, + "log_odds_chosen": 18.441608428955078, + "log_odds_ratio": -1.5244633686961606e-05, + "logits/chosen": -2.3576619625091553, + "logits/rejected": -4.850790500640869, + "logps/chosen": -1.4474366903305054, + "logps/rejected": -19.480161666870117, + "loss": 1.397, + "nll_loss": 1.3970084190368652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1447436660528183, + "rewards/margins": 1.8032724857330322, + "rewards/rejected": -1.9480161666870117, + "step": 332 + }, + { + "epoch": 1.2566037735849056, + "grad_norm": 0.37432780861854553, + "learning_rate": 1.8584905660377358e-05, + "log_odds_chosen": 16.72200584411621, + "log_odds_ratio": -2.3494829292758368e-05, + "logits/chosen": -0.13491854071617126, + "logits/rejected": -3.951087474822998, + "logps/chosen": -1.4685932397842407, + "logps/rejected": -17.866477966308594, + "loss": 1.5172, + "nll_loss": 1.5171722173690796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1468593329191208, + "rewards/margins": 1.6397886276245117, + "rewards/rejected": -1.7866477966308594, + "step": 333 + }, + { + "epoch": 1.260377358490566, + "grad_norm": 0.3993586301803589, + "learning_rate": 1.849056603773585e-05, + "log_odds_chosen": 15.771561622619629, + "log_odds_ratio": -3.5672157537192106e-05, + "logits/chosen": -1.266805648803711, + "logits/rejected": -4.964008808135986, + "logps/chosen": -1.478631854057312, + "logps/rejected": -16.92867088317871, + "loss": 1.5646, + "nll_loss": 1.5645837783813477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14786317944526672, + "rewards/margins": 1.545003890991211, + "rewards/rejected": -1.6928670406341553, + "step": 334 + }, + { + "epoch": 1.2641509433962264, + "grad_norm": 0.34949737787246704, + "learning_rate": 1.839622641509434e-05, + "log_odds_chosen": 14.615917205810547, + "log_odds_ratio": -0.18080249428749084, + "logits/chosen": -2.7243032455444336, + "logits/rejected": -6.512376308441162, + "logps/chosen": -1.9120750427246094, + "logps/rejected": -16.35055160522461, + "loss": 1.6374, + "nll_loss": 1.6193275451660156, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19120751321315765, + "rewards/margins": 1.44384765625, + "rewards/rejected": -1.6350551843643188, + "step": 335 + }, + { + "epoch": 1.2679245283018867, + "grad_norm": 0.3900119960308075, + "learning_rate": 1.830188679245283e-05, + "log_odds_chosen": 15.152379035949707, + "log_odds_ratio": -0.07246612012386322, + "logits/chosen": -2.0148720741271973, + "logits/rejected": -5.148480415344238, + "logps/chosen": -1.7054778337478638, + "logps/rejected": -16.5804443359375, + "loss": 1.4763, + "nll_loss": 1.4690625667572021, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.17054779827594757, + "rewards/margins": 1.4874964952468872, + "rewards/rejected": -1.6580443382263184, + "step": 336 + }, + { + "epoch": 1.271698113207547, + "grad_norm": 0.3045012354850769, + "learning_rate": 1.820754716981132e-05, + "log_odds_chosen": 14.322192192077637, + "log_odds_ratio": -0.0033934221137315035, + "logits/chosen": -1.8886581659317017, + "logits/rejected": -7.217465877532959, + "logps/chosen": -1.5194714069366455, + "logps/rejected": -15.524861335754395, + "loss": 1.6423, + "nll_loss": 1.6419386863708496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15194714069366455, + "rewards/margins": 1.4005389213562012, + "rewards/rejected": -1.5524861812591553, + "step": 337 + }, + { + "epoch": 1.2754716981132075, + "grad_norm": 0.27202707529067993, + "learning_rate": 1.811320754716981e-05, + "log_odds_chosen": 17.09416389465332, + "log_odds_ratio": -0.01481825951486826, + "logits/chosen": -2.577331066131592, + "logits/rejected": -6.886340141296387, + "logps/chosen": -1.345436930656433, + "logps/rejected": -17.985553741455078, + "loss": 1.3331, + "nll_loss": 1.3316359519958496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13454370200634003, + "rewards/margins": 1.6640119552612305, + "rewards/rejected": -1.7985554933547974, + "step": 338 + }, + { + "epoch": 1.2792452830188679, + "grad_norm": 0.35674479603767395, + "learning_rate": 1.80188679245283e-05, + "log_odds_chosen": 17.09935760498047, + "log_odds_ratio": -0.04254509136080742, + "logits/chosen": -1.4332383871078491, + "logits/rejected": -6.181375503540039, + "logps/chosen": -1.9294726848602295, + "logps/rejected": -18.842205047607422, + "loss": 1.6018, + "nll_loss": 1.597574234008789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19294726848602295, + "rewards/margins": 1.6912733316421509, + "rewards/rejected": -1.8842206001281738, + "step": 339 + }, + { + "epoch": 1.2830188679245282, + "grad_norm": 0.33350715041160583, + "learning_rate": 1.7924528301886792e-05, + "log_odds_chosen": 17.224140167236328, + "log_odds_ratio": -2.8834060685767327e-06, + "logits/chosen": -1.3092684745788574, + "logits/rejected": -7.616753578186035, + "logps/chosen": -1.7400907278060913, + "logps/rejected": -18.713537216186523, + "loss": 1.5776, + "nll_loss": 1.5776221752166748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1740090698003769, + "rewards/margins": 1.6973445415496826, + "rewards/rejected": -1.8713536262512207, + "step": 340 + }, + { + "epoch": 1.2867924528301886, + "grad_norm": 0.31082576513290405, + "learning_rate": 1.7830188679245283e-05, + "log_odds_chosen": 16.050865173339844, + "log_odds_ratio": -0.00012872874503955245, + "logits/chosen": -1.133547067642212, + "logits/rejected": -5.591988563537598, + "logps/chosen": -1.5776842832565308, + "logps/rejected": -17.315185546875, + "loss": 1.3479, + "nll_loss": 1.347907543182373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15776842832565308, + "rewards/margins": 1.5737502574920654, + "rewards/rejected": -1.7315186262130737, + "step": 341 + }, + { + "epoch": 1.290566037735849, + "grad_norm": 0.37589412927627563, + "learning_rate": 1.7735849056603774e-05, + "log_odds_chosen": 15.594026565551758, + "log_odds_ratio": -0.08674325048923492, + "logits/chosen": -1.7124016284942627, + "logits/rejected": -4.819745063781738, + "logps/chosen": -1.672520637512207, + "logps/rejected": -17.02138328552246, + "loss": 1.4996, + "nll_loss": 1.4909459352493286, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1672520637512207, + "rewards/margins": 1.534886360168457, + "rewards/rejected": -1.7021384239196777, + "step": 342 + }, + { + "epoch": 1.2943396226415094, + "grad_norm": 0.3507995307445526, + "learning_rate": 1.7641509433962265e-05, + "log_odds_chosen": 16.04661750793457, + "log_odds_ratio": -0.007493563462048769, + "logits/chosen": -1.3305596113204956, + "logits/rejected": -5.7259111404418945, + "logps/chosen": -1.474043846130371, + "logps/rejected": -17.168848037719727, + "loss": 1.4017, + "nll_loss": 1.4009082317352295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14740438759326935, + "rewards/margins": 1.569480538368225, + "rewards/rejected": -1.7168848514556885, + "step": 343 + }, + { + "epoch": 1.2981132075471697, + "grad_norm": 0.37658241391181946, + "learning_rate": 1.7547169811320753e-05, + "log_odds_chosen": 17.842830657958984, + "log_odds_ratio": -1.3475509149429854e-05, + "logits/chosen": -2.2428817749023438, + "logits/rejected": -5.751044273376465, + "logps/chosen": -1.5199084281921387, + "logps/rejected": -19.044391632080078, + "loss": 1.4837, + "nll_loss": 1.4836554527282715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1519908457994461, + "rewards/margins": 1.752448320388794, + "rewards/rejected": -1.9044389724731445, + "step": 344 + }, + { + "epoch": 1.3018867924528301, + "grad_norm": 0.35167789459228516, + "learning_rate": 1.7452830188679244e-05, + "log_odds_chosen": 15.047775268554688, + "log_odds_ratio": -0.008871389552950859, + "logits/chosen": -1.8355504274368286, + "logits/rejected": -6.161797046661377, + "logps/chosen": -1.6795439720153809, + "logps/rejected": -16.46615982055664, + "loss": 1.488, + "nll_loss": 1.4871084690093994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16795440018177032, + "rewards/margins": 1.4786615371704102, + "rewards/rejected": -1.646615982055664, + "step": 345 + }, + { + "epoch": 1.3056603773584905, + "grad_norm": 0.35509005188941956, + "learning_rate": 1.7358490566037735e-05, + "log_odds_chosen": 13.43126106262207, + "log_odds_ratio": -0.05844378471374512, + "logits/chosen": -0.840483546257019, + "logits/rejected": -5.20952844619751, + "logps/chosen": -1.282634973526001, + "logps/rejected": -14.154840469360352, + "loss": 1.3724, + "nll_loss": 1.3665363788604736, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.12826348841190338, + "rewards/margins": 1.2872204780578613, + "rewards/rejected": -1.4154839515686035, + "step": 346 + }, + { + "epoch": 1.3094339622641509, + "grad_norm": 0.3561452031135559, + "learning_rate": 1.7264150943396226e-05, + "log_odds_chosen": 18.20964813232422, + "log_odds_ratio": -4.6193684966056026e-07, + "logits/chosen": -2.4437973499298096, + "logits/rejected": -6.393451690673828, + "logps/chosen": -1.564558982849121, + "logps/rejected": -19.447908401489258, + "loss": 1.6323, + "nll_loss": 1.6323482990264893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1564558893442154, + "rewards/margins": 1.788334846496582, + "rewards/rejected": -1.9447907209396362, + "step": 347 + }, + { + "epoch": 1.3132075471698113, + "grad_norm": 0.2806137204170227, + "learning_rate": 1.7169811320754717e-05, + "log_odds_chosen": 18.3279972076416, + "log_odds_ratio": -1.1324946171953343e-06, + "logits/chosen": -1.2502247095108032, + "logits/rejected": -6.719517230987549, + "logps/chosen": -1.4687795639038086, + "logps/rejected": -19.442962646484375, + "loss": 1.2923, + "nll_loss": 1.2922688722610474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1468779593706131, + "rewards/margins": 1.7974183559417725, + "rewards/rejected": -1.9442962408065796, + "step": 348 + }, + { + "epoch": 1.3169811320754716, + "grad_norm": 0.3450721502304077, + "learning_rate": 1.707547169811321e-05, + "log_odds_chosen": 14.217653274536133, + "log_odds_ratio": -0.10679548978805542, + "logits/chosen": -1.2525098323822021, + "logits/rejected": -4.723179340362549, + "logps/chosen": -1.651379942893982, + "logps/rejected": -15.504240036010742, + "loss": 1.5352, + "nll_loss": 1.5244849920272827, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16513800621032715, + "rewards/margins": 1.3852860927581787, + "rewards/rejected": -1.5504240989685059, + "step": 349 + }, + { + "epoch": 1.320754716981132, + "grad_norm": 0.33943700790405273, + "learning_rate": 1.69811320754717e-05, + "log_odds_chosen": 16.3641300201416, + "log_odds_ratio": -0.061000462621450424, + "logits/chosen": -1.4788792133331299, + "logits/rejected": -5.779600143432617, + "logps/chosen": -1.4309313297271729, + "logps/rejected": -17.400362014770508, + "loss": 1.4527, + "nll_loss": 1.4466229677200317, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14309315383434296, + "rewards/margins": 1.5969431400299072, + "rewards/rejected": -1.7400362491607666, + "step": 350 + }, + { + "epoch": 1.3245283018867924, + "grad_norm": 0.32729095220565796, + "learning_rate": 1.6886792452830187e-05, + "log_odds_chosen": 15.319132804870605, + "log_odds_ratio": -0.056552521884441376, + "logits/chosen": -0.8255969285964966, + "logits/rejected": -5.052517890930176, + "logps/chosen": -1.3465955257415771, + "logps/rejected": -16.29346466064453, + "loss": 1.4911, + "nll_loss": 1.4854259490966797, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1346595585346222, + "rewards/margins": 1.4946870803833008, + "rewards/rejected": -1.6293466091156006, + "step": 351 + }, + { + "epoch": 1.3283018867924528, + "grad_norm": 0.3329434394836426, + "learning_rate": 1.6792452830188678e-05, + "log_odds_chosen": 16.610496520996094, + "log_odds_ratio": -3.859699427266605e-05, + "logits/chosen": -2.8449089527130127, + "logits/rejected": -7.928000450134277, + "logps/chosen": -1.596476435661316, + "logps/rejected": -17.940351486206055, + "loss": 1.4448, + "nll_loss": 1.4447648525238037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15964765846729279, + "rewards/margins": 1.634387493133545, + "rewards/rejected": -1.7940351963043213, + "step": 352 + }, + { + "epoch": 1.3320754716981131, + "grad_norm": 0.3026202321052551, + "learning_rate": 1.669811320754717e-05, + "log_odds_chosen": 16.932235717773438, + "log_odds_ratio": -0.03685789927840233, + "logits/chosen": -1.5450692176818848, + "logits/rejected": -4.212255001068115, + "logps/chosen": -1.2493813037872314, + "logps/rejected": -17.693649291992188, + "loss": 1.4363, + "nll_loss": 1.4326279163360596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12493812292814255, + "rewards/margins": 1.644426941871643, + "rewards/rejected": -1.7693649530410767, + "step": 353 + }, + { + "epoch": 1.3358490566037735, + "grad_norm": 0.2926734685897827, + "learning_rate": 1.660377358490566e-05, + "log_odds_chosen": 17.678836822509766, + "log_odds_ratio": -8.680287464812864e-06, + "logits/chosen": -2.915510654449463, + "logits/rejected": -7.7731733322143555, + "logps/chosen": -1.7902599573135376, + "logps/rejected": -19.27121353149414, + "loss": 1.5991, + "nll_loss": 1.5991185903549194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17902600765228271, + "rewards/margins": 1.7480952739715576, + "rewards/rejected": -1.9271214008331299, + "step": 354 + }, + { + "epoch": 1.3396226415094339, + "grad_norm": 0.340572327375412, + "learning_rate": 1.650943396226415e-05, + "log_odds_chosen": 17.665685653686523, + "log_odds_ratio": -0.02697843872010708, + "logits/chosen": -1.601323127746582, + "logits/rejected": -5.377262592315674, + "logps/chosen": -1.6611402034759521, + "logps/rejected": -19.03213119506836, + "loss": 1.4539, + "nll_loss": 1.4511959552764893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16611401736736298, + "rewards/margins": 1.7370991706848145, + "rewards/rejected": -1.9032131433486938, + "step": 355 + }, + { + "epoch": 1.3433962264150943, + "grad_norm": 1.6760241985321045, + "learning_rate": 1.6415094339622643e-05, + "log_odds_chosen": 18.934751510620117, + "log_odds_ratio": -1.7136345320523105e-07, + "logits/chosen": -2.097600221633911, + "logits/rejected": -6.444354057312012, + "logps/chosen": -1.4015324115753174, + "logps/rejected": -19.90370750427246, + "loss": 1.4334, + "nll_loss": 1.4333903789520264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14015324413776398, + "rewards/margins": 1.850217580795288, + "rewards/rejected": -1.990370750427246, + "step": 356 + }, + { + "epoch": 1.3471698113207546, + "grad_norm": 0.3295755386352539, + "learning_rate": 1.6320754716981134e-05, + "log_odds_chosen": 19.07089614868164, + "log_odds_ratio": -5.215408194203519e-08, + "logits/chosen": -0.8209649324417114, + "logits/rejected": -4.625163555145264, + "logps/chosen": -1.615844964981079, + "logps/rejected": -20.342763900756836, + "loss": 1.7516, + "nll_loss": 1.7515586614608765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1615844964981079, + "rewards/margins": 1.8726916313171387, + "rewards/rejected": -2.034276247024536, + "step": 357 + }, + { + "epoch": 1.350943396226415, + "grad_norm": 0.3472559154033661, + "learning_rate": 1.622641509433962e-05, + "log_odds_chosen": 16.93330955505371, + "log_odds_ratio": -0.0761246532201767, + "logits/chosen": -1.1531541347503662, + "logits/rejected": -4.757190704345703, + "logps/chosen": -1.6295312643051147, + "logps/rejected": -18.269256591796875, + "loss": 1.452, + "nll_loss": 1.4443397521972656, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16295313835144043, + "rewards/margins": 1.6639723777770996, + "rewards/rejected": -1.82692551612854, + "step": 358 + }, + { + "epoch": 1.3547169811320754, + "grad_norm": 0.3795447051525116, + "learning_rate": 1.6132075471698112e-05, + "log_odds_chosen": 18.87713623046875, + "log_odds_ratio": -2.4379320166190155e-05, + "logits/chosen": -3.349454402923584, + "logits/rejected": -7.033996105194092, + "logps/chosen": -1.5019011497497559, + "logps/rejected": -20.02358055114746, + "loss": 1.4321, + "nll_loss": 1.4321367740631104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15019011497497559, + "rewards/margins": 1.8521679639816284, + "rewards/rejected": -2.0023581981658936, + "step": 359 + }, + { + "epoch": 1.3584905660377358, + "grad_norm": 0.40565335750579834, + "learning_rate": 1.6037735849056604e-05, + "log_odds_chosen": 15.8294095993042, + "log_odds_ratio": -0.08895085752010345, + "logits/chosen": -1.62678861618042, + "logits/rejected": -4.0340166091918945, + "logps/chosen": -1.596952199935913, + "logps/rejected": -17.054447174072266, + "loss": 1.4759, + "nll_loss": 1.4669734239578247, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15969522297382355, + "rewards/margins": 1.5457494258880615, + "rewards/rejected": -1.705444574356079, + "step": 360 + }, + { + "epoch": 1.3622641509433961, + "grad_norm": 0.363148033618927, + "learning_rate": 1.5943396226415095e-05, + "log_odds_chosen": 18.350400924682617, + "log_odds_ratio": -2.0042255073349224e-06, + "logits/chosen": -0.9768545031547546, + "logits/rejected": -5.307037353515625, + "logps/chosen": -1.459285020828247, + "logps/rejected": -19.410093307495117, + "loss": 1.3574, + "nll_loss": 1.3574293851852417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1459285020828247, + "rewards/margins": 1.7950809001922607, + "rewards/rejected": -1.941009283065796, + "step": 361 + }, + { + "epoch": 1.3660377358490565, + "grad_norm": 0.3766542673110962, + "learning_rate": 1.5849056603773586e-05, + "log_odds_chosen": 16.924793243408203, + "log_odds_ratio": -6.675824806734454e-06, + "logits/chosen": -2.368605136871338, + "logits/rejected": -7.107510566711426, + "logps/chosen": -1.487027883529663, + "logps/rejected": -18.08290672302246, + "loss": 1.4416, + "nll_loss": 1.4416375160217285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14870277047157288, + "rewards/margins": 1.6595878601074219, + "rewards/rejected": -1.8082906007766724, + "step": 362 + }, + { + "epoch": 1.369811320754717, + "grad_norm": 0.36386638879776, + "learning_rate": 1.5754716981132077e-05, + "log_odds_chosen": 18.357254028320312, + "log_odds_ratio": -3.6732189983013086e-06, + "logits/chosen": -1.288124680519104, + "logits/rejected": -5.227755546569824, + "logps/chosen": -1.0662007331848145, + "logps/rejected": -18.76520538330078, + "loss": 1.0875, + "nll_loss": 1.0875431299209595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10662007331848145, + "rewards/margins": 1.7699006795883179, + "rewards/rejected": -1.8765207529067993, + "step": 363 + }, + { + "epoch": 1.3735849056603773, + "grad_norm": 0.3877186179161072, + "learning_rate": 1.5660377358490564e-05, + "log_odds_chosen": 17.02109718322754, + "log_odds_ratio": -9.321229299530387e-06, + "logits/chosen": -1.9352566003799438, + "logits/rejected": -5.730966567993164, + "logps/chosen": -1.414484977722168, + "logps/rejected": -18.040632247924805, + "loss": 1.3428, + "nll_loss": 1.3428069353103638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1414484977722168, + "rewards/margins": 1.6626148223876953, + "rewards/rejected": -1.804063320159912, + "step": 364 + }, + { + "epoch": 1.3773584905660377, + "grad_norm": 1.2752561569213867, + "learning_rate": 1.5566037735849056e-05, + "log_odds_chosen": 16.804956436157227, + "log_odds_ratio": -0.0012535701971501112, + "logits/chosen": -1.5871500968933105, + "logits/rejected": -5.9648027420043945, + "logps/chosen": -1.3970187902450562, + "logps/rejected": -17.712099075317383, + "loss": 1.5305, + "nll_loss": 1.5304073095321655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13970187306404114, + "rewards/margins": 1.6315079927444458, + "rewards/rejected": -1.7712098360061646, + "step": 365 + }, + { + "epoch": 1.3811320754716983, + "grad_norm": 0.33747872710227966, + "learning_rate": 1.5471698113207547e-05, + "log_odds_chosen": 18.296247482299805, + "log_odds_ratio": -6.034980515323696e-07, + "logits/chosen": -0.8064440488815308, + "logits/rejected": -5.531889915466309, + "logps/chosen": -1.6278749704360962, + "logps/rejected": -19.650760650634766, + "loss": 1.5717, + "nll_loss": 1.5716900825500488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16278749704360962, + "rewards/margins": 1.8022886514663696, + "rewards/rejected": -1.965076208114624, + "step": 366 + }, + { + "epoch": 1.3849056603773584, + "grad_norm": 0.4021959900856018, + "learning_rate": 1.5377358490566038e-05, + "log_odds_chosen": 17.49240493774414, + "log_odds_ratio": -0.0005192816024646163, + "logits/chosen": -1.245084285736084, + "logits/rejected": -5.5846710205078125, + "logps/chosen": -1.4274266958236694, + "logps/rejected": -18.540454864501953, + "loss": 1.3852, + "nll_loss": 1.3851439952850342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14274267852306366, + "rewards/margins": 1.7113028764724731, + "rewards/rejected": -1.8540456295013428, + "step": 367 + }, + { + "epoch": 1.388679245283019, + "grad_norm": 0.4979332685470581, + "learning_rate": 1.528301886792453e-05, + "log_odds_chosen": 16.063011169433594, + "log_odds_ratio": -0.06267835944890976, + "logits/chosen": -1.0869340896606445, + "logits/rejected": -4.391514778137207, + "logps/chosen": -1.4439592361450195, + "logps/rejected": -17.10696029663086, + "loss": 1.4131, + "nll_loss": 1.406846046447754, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14439593255519867, + "rewards/margins": 1.5663002729415894, + "rewards/rejected": -1.7106962203979492, + "step": 368 + }, + { + "epoch": 1.3924528301886792, + "grad_norm": 1.9730045795440674, + "learning_rate": 1.5188679245283018e-05, + "log_odds_chosen": 15.107033729553223, + "log_odds_ratio": -0.06132856011390686, + "logits/chosen": -1.2891849279403687, + "logits/rejected": -4.657969951629639, + "logps/chosen": -1.6434388160705566, + "logps/rejected": -16.488523483276367, + "loss": 1.4781, + "nll_loss": 1.4719549417495728, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16434389352798462, + "rewards/margins": 1.4845083951950073, + "rewards/rejected": -1.6488523483276367, + "step": 369 + }, + { + "epoch": 1.3962264150943398, + "grad_norm": 0.3606973886489868, + "learning_rate": 1.509433962264151e-05, + "log_odds_chosen": 18.26697540283203, + "log_odds_ratio": -2.7567176630327594e-07, + "logits/chosen": -0.31442150473594666, + "logits/rejected": -4.0157999992370605, + "logps/chosen": -1.3492426872253418, + "logps/rejected": -19.13711929321289, + "loss": 1.4501, + "nll_loss": 1.4500954151153564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1349242627620697, + "rewards/margins": 1.778787612915039, + "rewards/rejected": -1.9137119054794312, + "step": 370 + }, + { + "epoch": 1.4, + "grad_norm": 0.7839713096618652, + "learning_rate": 1.5e-05, + "log_odds_chosen": 18.16492462158203, + "log_odds_ratio": -0.06902754306793213, + "logits/chosen": -2.4219627380371094, + "logits/rejected": -5.2284698486328125, + "logps/chosen": -1.5058531761169434, + "logps/rejected": -19.259458541870117, + "loss": 1.5453, + "nll_loss": 1.5383750200271606, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1505853235721588, + "rewards/margins": 1.7753607034683228, + "rewards/rejected": -1.9259459972381592, + "step": 371 + }, + { + "epoch": 1.4037735849056605, + "grad_norm": 0.3838679790496826, + "learning_rate": 1.4905660377358491e-05, + "log_odds_chosen": 16.6832218170166, + "log_odds_ratio": -0.06244910508394241, + "logits/chosen": -1.3226251602172852, + "logits/rejected": -6.231893539428711, + "logps/chosen": -1.6935502290725708, + "logps/rejected": -18.128742218017578, + "loss": 1.6526, + "nll_loss": 1.6463897228240967, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16935503482818604, + "rewards/margins": 1.6435191631317139, + "rewards/rejected": -1.8128741979599, + "step": 372 + }, + { + "epoch": 1.4075471698113207, + "grad_norm": 0.38915058970451355, + "learning_rate": 1.4811320754716981e-05, + "log_odds_chosen": 16.009296417236328, + "log_odds_ratio": -1.8557118892204016e-05, + "logits/chosen": -1.1935677528381348, + "logits/rejected": -5.60360050201416, + "logps/chosen": -1.3822619915008545, + "logps/rejected": -17.003089904785156, + "loss": 1.3057, + "nll_loss": 1.305704951286316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1382262110710144, + "rewards/margins": 1.5620828866958618, + "rewards/rejected": -1.7003090381622314, + "step": 373 + }, + { + "epoch": 1.4113207547169813, + "grad_norm": 0.31976625323295593, + "learning_rate": 1.4716981132075472e-05, + "log_odds_chosen": 18.23406982421875, + "log_odds_ratio": -0.0007746726041659713, + "logits/chosen": -2.0351643562316895, + "logits/rejected": -5.288931846618652, + "logps/chosen": -1.3592875003814697, + "logps/rejected": -19.12387466430664, + "loss": 1.4888, + "nll_loss": 1.4887058734893799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13592875003814697, + "rewards/margins": 1.776458978652954, + "rewards/rejected": -1.9123876094818115, + "step": 374 + }, + { + "epoch": 1.4150943396226414, + "grad_norm": 0.3839375376701355, + "learning_rate": 1.4622641509433963e-05, + "log_odds_chosen": 16.633087158203125, + "log_odds_ratio": -0.18126872181892395, + "logits/chosen": -1.4143149852752686, + "logits/rejected": -4.575836658477783, + "logps/chosen": -1.3354686498641968, + "logps/rejected": -17.673622131347656, + "loss": 1.4257, + "nll_loss": 1.4075796604156494, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1335468739271164, + "rewards/margins": 1.6338152885437012, + "rewards/rejected": -1.7673622369766235, + "step": 375 + }, + { + "epoch": 1.418867924528302, + "grad_norm": 0.3899994194507599, + "learning_rate": 1.4528301886792452e-05, + "log_odds_chosen": 18.611719131469727, + "log_odds_ratio": -5.587950795415964e-07, + "logits/chosen": -1.2232400178909302, + "logits/rejected": -5.466405868530273, + "logps/chosen": -1.512157917022705, + "logps/rejected": -19.77882194519043, + "loss": 1.5621, + "nll_loss": 1.5620990991592407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1512157917022705, + "rewards/margins": 1.8266665935516357, + "rewards/rejected": -1.9778823852539062, + "step": 376 + }, + { + "epoch": 1.4226415094339622, + "grad_norm": 0.31554582715034485, + "learning_rate": 1.4433962264150944e-05, + "log_odds_chosen": 17.60826301574707, + "log_odds_ratio": -3.2186994758376386e-06, + "logits/chosen": -1.3368791341781616, + "logits/rejected": -6.721653938293457, + "logps/chosen": -1.4264721870422363, + "logps/rejected": -18.68645477294922, + "loss": 1.37, + "nll_loss": 1.3699793815612793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14264723658561707, + "rewards/margins": 1.7259981632232666, + "rewards/rejected": -1.868645429611206, + "step": 377 + }, + { + "epoch": 1.4264150943396228, + "grad_norm": 0.3350052535533905, + "learning_rate": 1.4339622641509435e-05, + "log_odds_chosen": 15.84263801574707, + "log_odds_ratio": -0.05588989332318306, + "logits/chosen": -2.5817272663116455, + "logits/rejected": -7.034815311431885, + "logps/chosen": -1.5793471336364746, + "logps/rejected": -17.11503791809082, + "loss": 1.4305, + "nll_loss": 1.4249083995819092, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15793471038341522, + "rewards/margins": 1.5535691976547241, + "rewards/rejected": -1.7115037441253662, + "step": 378 + }, + { + "epoch": 1.430188679245283, + "grad_norm": 0.3305111825466156, + "learning_rate": 1.4245283018867924e-05, + "log_odds_chosen": 17.508865356445312, + "log_odds_ratio": -4.04572256229585e-06, + "logits/chosen": -1.8427374362945557, + "logits/rejected": -7.116032600402832, + "logps/chosen": -1.6888805627822876, + "logps/rejected": -18.881450653076172, + "loss": 1.4913, + "nll_loss": 1.491302490234375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16888806223869324, + "rewards/margins": 1.719257116317749, + "rewards/rejected": -1.888145089149475, + "step": 379 + }, + { + "epoch": 1.4339622641509435, + "grad_norm": 0.5024957656860352, + "learning_rate": 1.4150943396226415e-05, + "log_odds_chosen": 16.263195037841797, + "log_odds_ratio": -0.0004213673819322139, + "logits/chosen": -1.309700608253479, + "logits/rejected": -5.588892459869385, + "logps/chosen": -1.4294378757476807, + "logps/rejected": -17.363752365112305, + "loss": 1.3392, + "nll_loss": 1.339144229888916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14294379949569702, + "rewards/margins": 1.5934314727783203, + "rewards/rejected": -1.736375331878662, + "step": 380 + }, + { + "epoch": 1.4377358490566037, + "grad_norm": 0.3632482588291168, + "learning_rate": 1.4056603773584906e-05, + "log_odds_chosen": 17.019535064697266, + "log_odds_ratio": -5.90098215980106e-06, + "logits/chosen": -1.0069024562835693, + "logits/rejected": -5.128624439239502, + "logps/chosen": -1.4965219497680664, + "logps/rejected": -18.188318252563477, + "loss": 1.4404, + "nll_loss": 1.440355896949768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14965218305587769, + "rewards/margins": 1.6691796779632568, + "rewards/rejected": -1.8188319206237793, + "step": 381 + }, + { + "epoch": 1.4415094339622643, + "grad_norm": 0.3996204435825348, + "learning_rate": 1.3962264150943397e-05, + "log_odds_chosen": 16.09657096862793, + "log_odds_ratio": -0.050253380089998245, + "logits/chosen": -1.884864330291748, + "logits/rejected": -5.274215221405029, + "logps/chosen": -1.6420609951019287, + "logps/rejected": -17.452035903930664, + "loss": 1.556, + "nll_loss": 1.5509570837020874, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1642061024904251, + "rewards/margins": 1.5809974670410156, + "rewards/rejected": -1.7452034950256348, + "step": 382 + }, + { + "epoch": 1.4452830188679244, + "grad_norm": 0.38239994645118713, + "learning_rate": 1.3867924528301887e-05, + "log_odds_chosen": 17.446556091308594, + "log_odds_ratio": -0.021241456270217896, + "logits/chosen": -1.7642720937728882, + "logits/rejected": -4.676743507385254, + "logps/chosen": -1.4043127298355103, + "logps/rejected": -18.422576904296875, + "loss": 1.4152, + "nll_loss": 1.4130536317825317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14043128490447998, + "rewards/margins": 1.7018264532089233, + "rewards/rejected": -1.8422577381134033, + "step": 383 + }, + { + "epoch": 1.449056603773585, + "grad_norm": 0.32839640974998474, + "learning_rate": 1.3773584905660378e-05, + "log_odds_chosen": 16.299285888671875, + "log_odds_ratio": -4.187269496469526e-06, + "logits/chosen": -2.137349843978882, + "logits/rejected": -8.301512718200684, + "logps/chosen": -1.6436606645584106, + "logps/rejected": -17.667858123779297, + "loss": 1.5086, + "nll_loss": 1.5085557699203491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16436606645584106, + "rewards/margins": 1.6024198532104492, + "rewards/rejected": -1.7667858600616455, + "step": 384 + }, + { + "epoch": 1.4528301886792452, + "grad_norm": 0.3542121648788452, + "learning_rate": 1.3679245283018869e-05, + "log_odds_chosen": 15.682723999023438, + "log_odds_ratio": -2.753584340098314e-05, + "logits/chosen": -1.6206883192062378, + "logits/rejected": -5.171188831329346, + "logps/chosen": -1.3008809089660645, + "logps/rejected": -16.44762420654297, + "loss": 1.2725, + "nll_loss": 1.2724624872207642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13008809089660645, + "rewards/margins": 1.514674425125122, + "rewards/rejected": -1.644762396812439, + "step": 385 + }, + { + "epoch": 1.4566037735849058, + "grad_norm": 0.3344227373600006, + "learning_rate": 1.3584905660377358e-05, + "log_odds_chosen": 16.07890510559082, + "log_odds_ratio": -0.00016838790907058865, + "logits/chosen": -1.1785922050476074, + "logits/rejected": -5.77630090713501, + "logps/chosen": -1.60475492477417, + "logps/rejected": -17.42005157470703, + "loss": 1.4546, + "nll_loss": 1.4545769691467285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.160475492477417, + "rewards/margins": 1.5815297365188599, + "rewards/rejected": -1.7420052289962769, + "step": 386 + }, + { + "epoch": 1.460377358490566, + "grad_norm": 0.3509989380836487, + "learning_rate": 1.349056603773585e-05, + "log_odds_chosen": 17.025386810302734, + "log_odds_ratio": -4.601534237735905e-05, + "logits/chosen": -1.3022335767745972, + "logits/rejected": -5.336873531341553, + "logps/chosen": -1.3712674379348755, + "logps/rejected": -17.987762451171875, + "loss": 1.2749, + "nll_loss": 1.2749412059783936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13712672889232635, + "rewards/margins": 1.6616493463516235, + "rewards/rejected": -1.7987761497497559, + "step": 387 + }, + { + "epoch": 1.4641509433962265, + "grad_norm": 0.35176974534988403, + "learning_rate": 1.339622641509434e-05, + "log_odds_chosen": 18.190950393676758, + "log_odds_ratio": -1.4410341464099474e-05, + "logits/chosen": -2.399728298187256, + "logits/rejected": -6.07908296585083, + "logps/chosen": -1.1239242553710938, + "logps/rejected": -18.555187225341797, + "loss": 1.3573, + "nll_loss": 1.3572728633880615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11239242553710938, + "rewards/margins": 1.7431262731552124, + "rewards/rejected": -1.8555186986923218, + "step": 388 + }, + { + "epoch": 1.4679245283018867, + "grad_norm": 0.35888710618019104, + "learning_rate": 1.3301886792452831e-05, + "log_odds_chosen": 16.425899505615234, + "log_odds_ratio": -0.062047071754932404, + "logits/chosen": -2.126614570617676, + "logits/rejected": -6.522840976715088, + "logps/chosen": -1.5465141534805298, + "logps/rejected": -17.640609741210938, + "loss": 1.4328, + "nll_loss": 1.4265483617782593, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15465140342712402, + "rewards/margins": 1.6094096899032593, + "rewards/rejected": -1.7640612125396729, + "step": 389 + }, + { + "epoch": 1.4716981132075473, + "grad_norm": 0.4034729301929474, + "learning_rate": 1.320754716981132e-05, + "log_odds_chosen": 15.568371772766113, + "log_odds_ratio": -3.7845486076548696e-05, + "logits/chosen": -0.5623266696929932, + "logits/rejected": -4.619795799255371, + "logps/chosen": -1.4479458332061768, + "logps/rejected": -16.667530059814453, + "loss": 1.3254, + "nll_loss": 1.3253822326660156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14479456841945648, + "rewards/margins": 1.5219584703445435, + "rewards/rejected": -1.6667530536651611, + "step": 390 + }, + { + "epoch": 1.4754716981132074, + "grad_norm": 0.3520585596561432, + "learning_rate": 1.3113207547169812e-05, + "log_odds_chosen": 14.612020492553711, + "log_odds_ratio": -0.0023431943263858557, + "logits/chosen": -2.2221946716308594, + "logits/rejected": -6.539764881134033, + "logps/chosen": -1.4834494590759277, + "logps/rejected": -15.739931106567383, + "loss": 1.4064, + "nll_loss": 1.4061977863311768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.148344948887825, + "rewards/margins": 1.4256480932235718, + "rewards/rejected": -1.5739930868148804, + "step": 391 + }, + { + "epoch": 1.479245283018868, + "grad_norm": 0.36251071095466614, + "learning_rate": 1.3018867924528303e-05, + "log_odds_chosen": 16.438549041748047, + "log_odds_ratio": -0.09116589277982712, + "logits/chosen": -2.7520642280578613, + "logits/rejected": -7.930850028991699, + "logps/chosen": -1.535467267036438, + "logps/rejected": -17.66228485107422, + "loss": 1.4605, + "nll_loss": 1.4513870477676392, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15354672074317932, + "rewards/margins": 1.6126817464828491, + "rewards/rejected": -1.766228437423706, + "step": 392 + }, + { + "epoch": 1.4830188679245282, + "grad_norm": 0.34293168783187866, + "learning_rate": 1.2924528301886792e-05, + "log_odds_chosen": 16.164066314697266, + "log_odds_ratio": -0.0003289695887360722, + "logits/chosen": -1.1001029014587402, + "logits/rejected": -6.432461738586426, + "logps/chosen": -1.488477349281311, + "logps/rejected": -17.351869583129883, + "loss": 1.3264, + "nll_loss": 1.3263657093048096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14884772896766663, + "rewards/margins": 1.5863392353057861, + "rewards/rejected": -1.73518705368042, + "step": 393 + }, + { + "epoch": 1.4867924528301888, + "grad_norm": 0.3245314359664917, + "learning_rate": 1.2830188679245283e-05, + "log_odds_chosen": 17.266613006591797, + "log_odds_ratio": -0.00029518589144572616, + "logits/chosen": -1.3167364597320557, + "logits/rejected": -5.948409080505371, + "logps/chosen": -1.1578724384307861, + "logps/rejected": -17.86254119873047, + "loss": 1.2429, + "nll_loss": 1.2429192066192627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11578723788261414, + "rewards/margins": 1.6704668998718262, + "rewards/rejected": -1.7862541675567627, + "step": 394 + }, + { + "epoch": 1.490566037735849, + "grad_norm": 0.3447877764701843, + "learning_rate": 1.2735849056603775e-05, + "log_odds_chosen": 15.028793334960938, + "log_odds_ratio": -0.1531548947095871, + "logits/chosen": -2.7501590251922607, + "logits/rejected": -6.11824369430542, + "logps/chosen": -1.720342993736267, + "logps/rejected": -16.537107467651367, + "loss": 1.6015, + "nll_loss": 1.5861958265304565, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17203430831432343, + "rewards/margins": 1.481676459312439, + "rewards/rejected": -1.6537107229232788, + "step": 395 + }, + { + "epoch": 1.4943396226415095, + "grad_norm": 0.3362826704978943, + "learning_rate": 1.2641509433962264e-05, + "log_odds_chosen": 17.786401748657227, + "log_odds_ratio": -3.323038754388108e-06, + "logits/chosen": -1.742743730545044, + "logits/rejected": -5.260643482208252, + "logps/chosen": -1.4009883403778076, + "logps/rejected": -18.64501190185547, + "loss": 1.4321, + "nll_loss": 1.4321409463882446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14009883999824524, + "rewards/margins": 1.7244024276733398, + "rewards/rejected": -1.8645012378692627, + "step": 396 + }, + { + "epoch": 1.4981132075471697, + "grad_norm": 0.3690617084503174, + "learning_rate": 1.2547169811320755e-05, + "log_odds_chosen": 18.10134506225586, + "log_odds_ratio": -0.050154730677604675, + "logits/chosen": -1.2822532653808594, + "logits/rejected": -4.944076061248779, + "logps/chosen": -1.3214470148086548, + "logps/rejected": -19.015644073486328, + "loss": 1.219, + "nll_loss": 1.2139716148376465, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13214470446109772, + "rewards/margins": 1.7694196701049805, + "rewards/rejected": -1.9015644788742065, + "step": 397 + }, + { + "epoch": 1.5018867924528303, + "grad_norm": 0.3674098551273346, + "learning_rate": 1.2452830188679246e-05, + "log_odds_chosen": 15.48218059539795, + "log_odds_ratio": -0.06490848958492279, + "logits/chosen": -1.954714298248291, + "logits/rejected": -6.481346130371094, + "logps/chosen": -1.3896689414978027, + "logps/rejected": -16.480030059814453, + "loss": 1.4439, + "nll_loss": 1.4373944997787476, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1389668881893158, + "rewards/margins": 1.5090359449386597, + "rewards/rejected": -1.6480028629302979, + "step": 398 + }, + { + "epoch": 1.5056603773584905, + "grad_norm": 0.36629459261894226, + "learning_rate": 1.2358490566037737e-05, + "log_odds_chosen": 16.291677474975586, + "log_odds_ratio": -0.08093234151601791, + "logits/chosen": -0.5040303468704224, + "logits/rejected": -4.219124794006348, + "logps/chosen": -1.6667414903640747, + "logps/rejected": -17.61298370361328, + "loss": 1.6038, + "nll_loss": 1.5956670045852661, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1666741520166397, + "rewards/margins": 1.594624400138855, + "rewards/rejected": -1.761298418045044, + "step": 399 + }, + { + "epoch": 1.509433962264151, + "grad_norm": 0.4143602252006531, + "learning_rate": 1.2264150943396227e-05, + "log_odds_chosen": 16.119110107421875, + "log_odds_ratio": -0.0009257096680812538, + "logits/chosen": -0.8463224172592163, + "logits/rejected": -4.586696624755859, + "logps/chosen": -1.3971197605133057, + "logps/rejected": -17.024677276611328, + "loss": 1.4237, + "nll_loss": 1.423575758934021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13971199095249176, + "rewards/margins": 1.5627559423446655, + "rewards/rejected": -1.702467918395996, + "step": 400 + }, + { + "epoch": 1.5132075471698112, + "grad_norm": 0.3812906742095947, + "learning_rate": 1.2169811320754718e-05, + "log_odds_chosen": 14.85096549987793, + "log_odds_ratio": -0.05318630486726761, + "logits/chosen": -0.3447520136833191, + "logits/rejected": -4.831925868988037, + "logps/chosen": -1.568985939025879, + "logps/rejected": -16.13321304321289, + "loss": 1.4211, + "nll_loss": 1.4157638549804688, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1568985879421234, + "rewards/margins": 1.4564226865768433, + "rewards/rejected": -1.613321304321289, + "step": 401 + }, + { + "epoch": 1.5169811320754718, + "grad_norm": 0.3796207010746002, + "learning_rate": 1.2075471698113209e-05, + "log_odds_chosen": 14.776948928833008, + "log_odds_ratio": -0.13772796094417572, + "logits/chosen": -2.6085329055786133, + "logits/rejected": -5.985441207885742, + "logps/chosen": -1.564119815826416, + "logps/rejected": -16.096309661865234, + "loss": 1.3687, + "nll_loss": 1.3549602031707764, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15641197562217712, + "rewards/margins": 1.4532190561294556, + "rewards/rejected": -1.6096309423446655, + "step": 402 + }, + { + "epoch": 1.520754716981132, + "grad_norm": 0.6695427894592285, + "learning_rate": 1.1981132075471698e-05, + "log_odds_chosen": 15.497125625610352, + "log_odds_ratio": -7.294305760296993e-06, + "logits/chosen": -0.9591398239135742, + "logits/rejected": -5.608551979064941, + "logps/chosen": -1.536940097808838, + "logps/rejected": -16.734256744384766, + "loss": 1.3521, + "nll_loss": 1.3521382808685303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1536940038204193, + "rewards/margins": 1.5197317600250244, + "rewards/rejected": -1.6734257936477661, + "step": 403 + }, + { + "epoch": 1.5245283018867926, + "grad_norm": 0.5477868318557739, + "learning_rate": 1.188679245283019e-05, + "log_odds_chosen": 16.6223087310791, + "log_odds_ratio": -0.0006986000225879252, + "logits/chosen": -1.419918179512024, + "logits/rejected": -4.885513782501221, + "logps/chosen": -1.3331007957458496, + "logps/rejected": -17.54778480529785, + "loss": 1.3093, + "nll_loss": 1.3091806173324585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13331007957458496, + "rewards/margins": 1.621468424797058, + "rewards/rejected": -1.7547786235809326, + "step": 404 + }, + { + "epoch": 1.5283018867924527, + "grad_norm": 0.333292156457901, + "learning_rate": 1.179245283018868e-05, + "log_odds_chosen": 18.347660064697266, + "log_odds_ratio": -1.3806706192553975e-05, + "logits/chosen": -1.4701156616210938, + "logits/rejected": -6.684134483337402, + "logps/chosen": -1.4482415914535522, + "logps/rejected": -19.408504486083984, + "loss": 1.3793, + "nll_loss": 1.3793381452560425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14482416212558746, + "rewards/margins": 1.7960264682769775, + "rewards/rejected": -1.9408507347106934, + "step": 405 + }, + { + "epoch": 1.5320754716981133, + "grad_norm": 0.3505614399909973, + "learning_rate": 1.169811320754717e-05, + "log_odds_chosen": 18.066612243652344, + "log_odds_ratio": -6.847452823421918e-06, + "logits/chosen": -2.2876837253570557, + "logits/rejected": -6.302707195281982, + "logps/chosen": -1.4648969173431396, + "logps/rejected": -19.111392974853516, + "loss": 1.6152, + "nll_loss": 1.6151999235153198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1464896947145462, + "rewards/margins": 1.7646493911743164, + "rewards/rejected": -1.9111392498016357, + "step": 406 + }, + { + "epoch": 1.5358490566037735, + "grad_norm": 0.356839120388031, + "learning_rate": 1.160377358490566e-05, + "log_odds_chosen": 15.59557819366455, + "log_odds_ratio": -0.0001072017039405182, + "logits/chosen": -0.30015936493873596, + "logits/rejected": -6.0860114097595215, + "logps/chosen": -1.6691813468933105, + "logps/rejected": -17.017396926879883, + "loss": 1.4345, + "nll_loss": 1.434523344039917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16691814363002777, + "rewards/margins": 1.534821629524231, + "rewards/rejected": -1.70173978805542, + "step": 407 + }, + { + "epoch": 1.539622641509434, + "grad_norm": 0.36700817942619324, + "learning_rate": 1.1509433962264152e-05, + "log_odds_chosen": 17.510787963867188, + "log_odds_ratio": -1.4975793192206766e-06, + "logits/chosen": -1.014905571937561, + "logits/rejected": -5.991207122802734, + "logps/chosen": -1.4787532091140747, + "logps/rejected": -18.62135887145996, + "loss": 1.4582, + "nll_loss": 1.4582459926605225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1478753238916397, + "rewards/margins": 1.7142606973648071, + "rewards/rejected": -1.8621360063552856, + "step": 408 + }, + { + "epoch": 1.5433962264150942, + "grad_norm": 0.355266273021698, + "learning_rate": 1.1415094339622643e-05, + "log_odds_chosen": 16.35395050048828, + "log_odds_ratio": -7.43008786230348e-05, + "logits/chosen": -1.7440268993377686, + "logits/rejected": -6.251463890075684, + "logps/chosen": -1.4440710544586182, + "logps/rejected": -17.370180130004883, + "loss": 1.6528, + "nll_loss": 1.6527677774429321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14440712332725525, + "rewards/margins": 1.5926108360290527, + "rewards/rejected": -1.73701810836792, + "step": 409 + }, + { + "epoch": 1.5471698113207548, + "grad_norm": 0.3182680904865265, + "learning_rate": 1.1320754716981132e-05, + "log_odds_chosen": 15.64585018157959, + "log_odds_ratio": -0.013405690900981426, + "logits/chosen": -1.3935030698776245, + "logits/rejected": -5.763398170471191, + "logps/chosen": -1.366743564605713, + "logps/rejected": -16.644275665283203, + "loss": 1.4451, + "nll_loss": 1.443767786026001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13667435944080353, + "rewards/margins": 1.5277531147003174, + "rewards/rejected": -1.6644275188446045, + "step": 410 + }, + { + "epoch": 1.550943396226415, + "grad_norm": 0.3500687777996063, + "learning_rate": 1.1226415094339623e-05, + "log_odds_chosen": 16.229408264160156, + "log_odds_ratio": -5.9309946664143354e-05, + "logits/chosen": -2.4652421474456787, + "logits/rejected": -6.496399879455566, + "logps/chosen": -1.3401199579238892, + "logps/rejected": -17.225467681884766, + "loss": 1.5363, + "nll_loss": 1.5363225936889648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13401198387145996, + "rewards/margins": 1.5885348320007324, + "rewards/rejected": -1.7225468158721924, + "step": 411 + }, + { + "epoch": 1.5547169811320756, + "grad_norm": 0.427716463804245, + "learning_rate": 1.1132075471698115e-05, + "log_odds_chosen": 16.30535888671875, + "log_odds_ratio": -0.02768136002123356, + "logits/chosen": -1.3295129537582397, + "logits/rejected": -5.655584335327148, + "logps/chosen": -1.5316812992095947, + "logps/rejected": -17.553970336914062, + "loss": 1.3546, + "nll_loss": 1.3517988920211792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15316811203956604, + "rewards/margins": 1.602229118347168, + "rewards/rejected": -1.7553972005844116, + "step": 412 + }, + { + "epoch": 1.5584905660377357, + "grad_norm": 0.3918018639087677, + "learning_rate": 1.1037735849056604e-05, + "log_odds_chosen": 17.692913055419922, + "log_odds_ratio": -9.760309467310435e-07, + "logits/chosen": -0.8427505493164062, + "logits/rejected": -4.809689521789551, + "logps/chosen": -1.7556877136230469, + "logps/rejected": -19.217164993286133, + "loss": 1.3681, + "nll_loss": 1.368148684501648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17556877434253693, + "rewards/margins": 1.746147632598877, + "rewards/rejected": -1.9217164516448975, + "step": 413 + }, + { + "epoch": 1.5622641509433963, + "grad_norm": 0.3670799434185028, + "learning_rate": 1.0943396226415095e-05, + "log_odds_chosen": 17.20905303955078, + "log_odds_ratio": -0.020199574530124664, + "logits/chosen": -1.5989339351654053, + "logits/rejected": -5.965342044830322, + "logps/chosen": -1.5377042293548584, + "logps/rejected": -18.371013641357422, + "loss": 1.5964, + "nll_loss": 1.5944006443023682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15377041697502136, + "rewards/margins": 1.68333101272583, + "rewards/rejected": -1.8371014595031738, + "step": 414 + }, + { + "epoch": 1.5660377358490565, + "grad_norm": 0.35576295852661133, + "learning_rate": 1.0849056603773586e-05, + "log_odds_chosen": 15.496426582336426, + "log_odds_ratio": -0.049891576170921326, + "logits/chosen": -0.8893330097198486, + "logits/rejected": -5.53910493850708, + "logps/chosen": -1.553781270980835, + "logps/rejected": -16.68655776977539, + "loss": 1.5559, + "nll_loss": 1.5509579181671143, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15537814795970917, + "rewards/margins": 1.5132777690887451, + "rewards/rejected": -1.6686559915542603, + "step": 415 + }, + { + "epoch": 1.569811320754717, + "grad_norm": 0.32513803243637085, + "learning_rate": 1.0754716981132077e-05, + "log_odds_chosen": 17.47341537475586, + "log_odds_ratio": -0.0002301803178852424, + "logits/chosen": -1.110384225845337, + "logits/rejected": -6.549045562744141, + "logps/chosen": -1.5312076807022095, + "logps/rejected": -18.674665451049805, + "loss": 1.5338, + "nll_loss": 1.5337331295013428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15312077105045319, + "rewards/margins": 1.7143458127975464, + "rewards/rejected": -1.867466688156128, + "step": 416 + }, + { + "epoch": 1.5735849056603772, + "grad_norm": 0.3317815959453583, + "learning_rate": 1.0660377358490567e-05, + "log_odds_chosen": 17.113929748535156, + "log_odds_ratio": -0.00808227900415659, + "logits/chosen": -1.527684211730957, + "logits/rejected": -6.576870918273926, + "logps/chosen": -1.5478432178497314, + "logps/rejected": -18.27701187133789, + "loss": 1.5447, + "nll_loss": 1.5438923835754395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15478432178497314, + "rewards/margins": 1.6729168891906738, + "rewards/rejected": -1.8277010917663574, + "step": 417 + }, + { + "epoch": 1.5773584905660378, + "grad_norm": 0.3883110582828522, + "learning_rate": 1.0566037735849058e-05, + "log_odds_chosen": 16.38488006591797, + "log_odds_ratio": -0.01739910989999771, + "logits/chosen": -2.0036072731018066, + "logits/rejected": -6.386971473693848, + "logps/chosen": -1.778263807296753, + "logps/rejected": -17.913105010986328, + "loss": 1.5031, + "nll_loss": 1.5013105869293213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1778264045715332, + "rewards/margins": 1.6134843826293945, + "rewards/rejected": -1.7913107872009277, + "step": 418 + }, + { + "epoch": 1.581132075471698, + "grad_norm": 0.3066483736038208, + "learning_rate": 1.0471698113207549e-05, + "log_odds_chosen": 19.433700561523438, + "log_odds_ratio": -1.0430817098949774e-07, + "logits/chosen": -1.0177987813949585, + "logits/rejected": -6.801183700561523, + "logps/chosen": -1.4599207639694214, + "logps/rejected": -20.431859970092773, + "loss": 1.3882, + "nll_loss": 1.3881503343582153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14599208533763885, + "rewards/margins": 1.8971937894821167, + "rewards/rejected": -2.0431859493255615, + "step": 419 + }, + { + "epoch": 1.5849056603773586, + "grad_norm": 0.40567484498023987, + "learning_rate": 1.0377358490566038e-05, + "log_odds_chosen": 13.711181640625, + "log_odds_ratio": -0.05775582417845726, + "logits/chosen": -0.9496514797210693, + "logits/rejected": -4.353882789611816, + "logps/chosen": -1.6069025993347168, + "logps/rejected": -15.056055068969727, + "loss": 1.3888, + "nll_loss": 1.3829797506332397, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16069024801254272, + "rewards/margins": 1.3449151515960693, + "rewards/rejected": -1.5056054592132568, + "step": 420 + }, + { + "epoch": 1.5886792452830187, + "grad_norm": 0.34252166748046875, + "learning_rate": 1.028301886792453e-05, + "log_odds_chosen": 19.151784896850586, + "log_odds_ratio": -1.4499668395728804e-05, + "logits/chosen": -1.0097556114196777, + "logits/rejected": -5.88823127746582, + "logps/chosen": -1.4318840503692627, + "logps/rejected": -20.140625, + "loss": 1.4247, + "nll_loss": 1.4247350692749023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14318840205669403, + "rewards/margins": 1.870874285697937, + "rewards/rejected": -2.0140626430511475, + "step": 421 + }, + { + "epoch": 1.5924528301886793, + "grad_norm": 0.4356919527053833, + "learning_rate": 1.018867924528302e-05, + "log_odds_chosen": 18.37000274658203, + "log_odds_ratio": -4.2022656998597085e-06, + "logits/chosen": -2.5176637172698975, + "logits/rejected": -5.485119819641113, + "logps/chosen": -1.699413776397705, + "logps/rejected": -19.841552734375, + "loss": 1.4642, + "nll_loss": 1.4642114639282227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16994138062000275, + "rewards/margins": 1.8142141103744507, + "rewards/rejected": -1.9841554164886475, + "step": 422 + }, + { + "epoch": 1.5962264150943395, + "grad_norm": 0.3340418338775635, + "learning_rate": 1.009433962264151e-05, + "log_odds_chosen": 17.76876449584961, + "log_odds_ratio": -1.0319773537048604e-05, + "logits/chosen": -1.0354773998260498, + "logits/rejected": -6.589682579040527, + "logps/chosen": -1.570968508720398, + "logps/rejected": -19.032516479492188, + "loss": 1.5785, + "nll_loss": 1.578470230102539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15709686279296875, + "rewards/margins": 1.74615478515625, + "rewards/rejected": -1.9032516479492188, + "step": 423 + }, + { + "epoch": 1.6, + "grad_norm": 0.3202284574508667, + "learning_rate": 1e-05, + "log_odds_chosen": 17.717674255371094, + "log_odds_ratio": -2.1941097656963393e-05, + "logits/chosen": -1.6594294309616089, + "logits/rejected": -5.430027484893799, + "logps/chosen": -1.4836103916168213, + "logps/rejected": -18.895004272460938, + "loss": 1.6185, + "nll_loss": 1.6185247898101807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14836102724075317, + "rewards/margins": 1.7411394119262695, + "rewards/rejected": -1.889500379562378, + "step": 424 + }, + { + "epoch": 1.6037735849056602, + "grad_norm": 0.3735036849975586, + "learning_rate": 9.905660377358492e-06, + "log_odds_chosen": 17.9097843170166, + "log_odds_ratio": -2.078727447951678e-06, + "logits/chosen": -1.3803160190582275, + "logits/rejected": -5.703221797943115, + "logps/chosen": -1.2858457565307617, + "logps/rejected": -18.800107955932617, + "loss": 1.1804, + "nll_loss": 1.1804119348526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1285845786333084, + "rewards/margins": 1.751426339149475, + "rewards/rejected": -1.880010962486267, + "step": 425 + }, + { + "epoch": 1.6075471698113208, + "grad_norm": 0.5746561288833618, + "learning_rate": 9.811320754716983e-06, + "log_odds_chosen": 17.22057342529297, + "log_odds_ratio": -3.956294222007273e-06, + "logits/chosen": -1.1769520044326782, + "logits/rejected": -5.837000846862793, + "logps/chosen": -1.6268696784973145, + "logps/rejected": -18.550504684448242, + "loss": 1.653, + "nll_loss": 1.6529629230499268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16268697381019592, + "rewards/margins": 1.6923635005950928, + "rewards/rejected": -1.8550504446029663, + "step": 426 + }, + { + "epoch": 1.611320754716981, + "grad_norm": 0.363587349653244, + "learning_rate": 9.716981132075472e-06, + "log_odds_chosen": 14.084954261779785, + "log_odds_ratio": -0.07172351330518723, + "logits/chosen": -1.2698285579681396, + "logits/rejected": -4.79845666885376, + "logps/chosen": -1.500381350517273, + "logps/rejected": -15.104291915893555, + "loss": 1.4545, + "nll_loss": 1.4473347663879395, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15003815293312073, + "rewards/margins": 1.3603911399841309, + "rewards/rejected": -1.5104291439056396, + "step": 427 + }, + { + "epoch": 1.6150943396226416, + "grad_norm": 0.3944726884365082, + "learning_rate": 9.622641509433963e-06, + "log_odds_chosen": 16.034841537475586, + "log_odds_ratio": -2.1783518604934216e-05, + "logits/chosen": -0.33273932337760925, + "logits/rejected": -4.981014251708984, + "logps/chosen": -1.6068644523620605, + "logps/rejected": -17.390186309814453, + "loss": 1.5603, + "nll_loss": 1.5603405237197876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1606864631175995, + "rewards/margins": 1.5783324241638184, + "rewards/rejected": -1.7390189170837402, + "step": 428 + }, + { + "epoch": 1.6188679245283017, + "grad_norm": 0.3169495463371277, + "learning_rate": 9.528301886792455e-06, + "log_odds_chosen": 17.651002883911133, + "log_odds_ratio": -0.08082529902458191, + "logits/chosen": -1.4152789115905762, + "logits/rejected": -5.411661148071289, + "logps/chosen": -1.3260101079940796, + "logps/rejected": -18.322998046875, + "loss": 1.4476, + "nll_loss": 1.439566731452942, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13260100781917572, + "rewards/margins": 1.6996989250183105, + "rewards/rejected": -1.8322997093200684, + "step": 429 + }, + { + "epoch": 1.6226415094339623, + "grad_norm": 0.3842892348766327, + "learning_rate": 9.433962264150944e-06, + "log_odds_chosen": 18.826631546020508, + "log_odds_ratio": -5.513446694749291e-07, + "logits/chosen": -1.7618896961212158, + "logits/rejected": -6.005032062530518, + "logps/chosen": -1.2967678308486938, + "logps/rejected": -19.6664981842041, + "loss": 1.4705, + "nll_loss": 1.4704593420028687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12967678904533386, + "rewards/margins": 1.8369731903076172, + "rewards/rejected": -1.9666498899459839, + "step": 430 + }, + { + "epoch": 1.6264150943396225, + "grad_norm": 0.38966941833496094, + "learning_rate": 9.339622641509433e-06, + "log_odds_chosen": 13.728066444396973, + "log_odds_ratio": -0.15279138088226318, + "logits/chosen": -1.5197663307189941, + "logits/rejected": -5.018875598907471, + "logps/chosen": -1.4149266481399536, + "logps/rejected": -14.751510620117188, + "loss": 1.2838, + "nll_loss": 1.2685606479644775, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14149266481399536, + "rewards/margins": 1.3336584568023682, + "rewards/rejected": -1.4751511812210083, + "step": 431 + }, + { + "epoch": 1.630188679245283, + "grad_norm": 0.3509092330932617, + "learning_rate": 9.245283018867924e-06, + "log_odds_chosen": 16.643827438354492, + "log_odds_ratio": -0.06135103851556778, + "logits/chosen": -2.0793323516845703, + "logits/rejected": -6.901341915130615, + "logps/chosen": -1.5519651174545288, + "logps/rejected": -17.924978256225586, + "loss": 1.3948, + "nll_loss": 1.3886274099349976, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15519651770591736, + "rewards/margins": 1.6373014450073242, + "rewards/rejected": -1.7924977540969849, + "step": 432 + }, + { + "epoch": 1.6339622641509433, + "grad_norm": 0.374100923538208, + "learning_rate": 9.150943396226416e-06, + "log_odds_chosen": 16.738018035888672, + "log_odds_ratio": -9.723278708406724e-06, + "logits/chosen": -0.8103672862052917, + "logits/rejected": -5.695781230926514, + "logps/chosen": -1.61466646194458, + "logps/rejected": -18.09328269958496, + "loss": 1.5884, + "nll_loss": 1.5884230136871338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16146664321422577, + "rewards/margins": 1.6478617191314697, + "rewards/rejected": -1.809328317642212, + "step": 433 + }, + { + "epoch": 1.6377358490566039, + "grad_norm": 0.348984956741333, + "learning_rate": 9.056603773584905e-06, + "log_odds_chosen": 15.502889633178711, + "log_odds_ratio": -0.0006235900218598545, + "logits/chosen": -0.9150213003158569, + "logits/rejected": -5.924428939819336, + "logps/chosen": -1.5006028413772583, + "logps/rejected": -16.678489685058594, + "loss": 1.3731, + "nll_loss": 1.3730108737945557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15006029605865479, + "rewards/margins": 1.5177886486053467, + "rewards/rejected": -1.6678489446640015, + "step": 434 + }, + { + "epoch": 1.641509433962264, + "grad_norm": 0.3935360312461853, + "learning_rate": 8.962264150943396e-06, + "log_odds_chosen": 15.421186447143555, + "log_odds_ratio": -4.3803676817333326e-05, + "logits/chosen": -2.2583491802215576, + "logits/rejected": -6.483213424682617, + "logps/chosen": -1.748963475227356, + "logps/rejected": -16.925840377807617, + "loss": 1.528, + "nll_loss": 1.52796471118927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17489634454250336, + "rewards/margins": 1.5176876783370972, + "rewards/rejected": -1.6925840377807617, + "step": 435 + }, + { + "epoch": 1.6452830188679246, + "grad_norm": 0.3647741973400116, + "learning_rate": 8.867924528301887e-06, + "log_odds_chosen": 18.231021881103516, + "log_odds_ratio": -1.4775437193748076e-05, + "logits/chosen": -1.7286620140075684, + "logits/rejected": -3.7592124938964844, + "logps/chosen": -1.3811477422714233, + "logps/rejected": -19.279529571533203, + "loss": 1.5674, + "nll_loss": 1.5673856735229492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1381147801876068, + "rewards/margins": 1.7898383140563965, + "rewards/rejected": -1.9279532432556152, + "step": 436 + }, + { + "epoch": 1.6490566037735848, + "grad_norm": 0.3980993330478668, + "learning_rate": 8.773584905660376e-06, + "log_odds_chosen": 18.004159927368164, + "log_odds_ratio": -9.220154606737196e-05, + "logits/chosen": -0.7007821202278137, + "logits/rejected": -4.726859092712402, + "logps/chosen": -1.5681556463241577, + "logps/rejected": -19.279773712158203, + "loss": 1.4785, + "nll_loss": 1.478508472442627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1568155735731125, + "rewards/margins": 1.7711620330810547, + "rewards/rejected": -1.9279775619506836, + "step": 437 + }, + { + "epoch": 1.6528301886792454, + "grad_norm": 0.38771992921829224, + "learning_rate": 8.679245283018868e-06, + "log_odds_chosen": 15.956310272216797, + "log_odds_ratio": -0.08997757732868195, + "logits/chosen": -2.8503222465515137, + "logits/rejected": -6.6901421546936035, + "logps/chosen": -1.7365132570266724, + "logps/rejected": -17.479639053344727, + "loss": 1.4619, + "nll_loss": 1.4529235363006592, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.173651322722435, + "rewards/margins": 1.574312448501587, + "rewards/rejected": -1.747963786125183, + "step": 438 + }, + { + "epoch": 1.6566037735849055, + "grad_norm": 0.40687695145606995, + "learning_rate": 8.584905660377359e-06, + "log_odds_chosen": 17.718862533569336, + "log_odds_ratio": -3.934121923521161e-05, + "logits/chosen": -1.7964673042297363, + "logits/rejected": -5.379039764404297, + "logps/chosen": -1.6427710056304932, + "logps/rejected": -19.057796478271484, + "loss": 1.3497, + "nll_loss": 1.349698781967163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1642771065235138, + "rewards/margins": 1.7415024042129517, + "rewards/rejected": -1.9057796001434326, + "step": 439 + }, + { + "epoch": 1.6603773584905661, + "grad_norm": 0.41751906275749207, + "learning_rate": 8.49056603773585e-06, + "log_odds_chosen": 17.8037109375, + "log_odds_ratio": -0.0002321999636478722, + "logits/chosen": -1.1646761894226074, + "logits/rejected": -5.253706455230713, + "logps/chosen": -1.5499441623687744, + "logps/rejected": -18.97268295288086, + "loss": 1.4037, + "nll_loss": 1.403627872467041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1549944281578064, + "rewards/margins": 1.7422740459442139, + "rewards/rejected": -1.897268533706665, + "step": 440 + }, + { + "epoch": 1.6641509433962263, + "grad_norm": 0.3891851007938385, + "learning_rate": 8.396226415094339e-06, + "log_odds_chosen": 15.996545791625977, + "log_odds_ratio": -0.0381770022213459, + "logits/chosen": -0.9191466569900513, + "logits/rejected": -4.70245361328125, + "logps/chosen": -1.4315601587295532, + "logps/rejected": -17.08255386352539, + "loss": 1.4039, + "nll_loss": 1.4000357389450073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1431560218334198, + "rewards/margins": 1.5650992393493652, + "rewards/rejected": -1.7082552909851074, + "step": 441 + }, + { + "epoch": 1.6679245283018869, + "grad_norm": 0.3916653096675873, + "learning_rate": 8.30188679245283e-06, + "log_odds_chosen": 16.95732879638672, + "log_odds_ratio": -0.040689367800951004, + "logits/chosen": -1.9281044006347656, + "logits/rejected": -6.584669589996338, + "logps/chosen": -1.4779269695281982, + "logps/rejected": -18.117380142211914, + "loss": 1.3331, + "nll_loss": 1.3289951086044312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14779271185398102, + "rewards/margins": 1.6639453172683716, + "rewards/rejected": -1.8117380142211914, + "step": 442 + }, + { + "epoch": 1.671698113207547, + "grad_norm": 0.4993988275527954, + "learning_rate": 8.207547169811321e-06, + "log_odds_chosen": 12.434319496154785, + "log_odds_ratio": -0.25445252656936646, + "logits/chosen": -1.8510253429412842, + "logits/rejected": -4.986669540405273, + "logps/chosen": -1.862165927886963, + "logps/rejected": -14.14360237121582, + "loss": 1.7, + "nll_loss": 1.674567461013794, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1862165927886963, + "rewards/margins": 1.2281436920166016, + "rewards/rejected": -1.4143602848052979, + "step": 443 + }, + { + "epoch": 1.6754716981132076, + "grad_norm": 0.37089723348617554, + "learning_rate": 8.11320754716981e-06, + "log_odds_chosen": 18.173118591308594, + "log_odds_ratio": -1.9893154785677325e-06, + "logits/chosen": -2.578376054763794, + "logits/rejected": -6.764125823974609, + "logps/chosen": -1.4696475267410278, + "logps/rejected": -19.300800323486328, + "loss": 1.4572, + "nll_loss": 1.4572360515594482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14696475863456726, + "rewards/margins": 1.783115267753601, + "rewards/rejected": -1.9300800561904907, + "step": 444 + }, + { + "epoch": 1.6792452830188678, + "grad_norm": 0.35598012804985046, + "learning_rate": 8.018867924528302e-06, + "log_odds_chosen": 16.349313735961914, + "log_odds_ratio": -4.495953908190131e-05, + "logits/chosen": -1.1268545389175415, + "logits/rejected": -6.885201454162598, + "logps/chosen": -1.6435253620147705, + "logps/rejected": -17.746475219726562, + "loss": 1.5433, + "nll_loss": 1.5432648658752441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16435253620147705, + "rewards/margins": 1.610295057296753, + "rewards/rejected": -1.77464759349823, + "step": 445 + }, + { + "epoch": 1.6830188679245284, + "grad_norm": 0.413074791431427, + "learning_rate": 7.924528301886793e-06, + "log_odds_chosen": 17.380868911743164, + "log_odds_ratio": -0.023090748116374016, + "logits/chosen": -0.9112629890441895, + "logits/rejected": -4.611825942993164, + "logps/chosen": -1.3334120512008667, + "logps/rejected": -18.372407913208008, + "loss": 1.3489, + "nll_loss": 1.3465766906738281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13334119319915771, + "rewards/margins": 1.703899621963501, + "rewards/rejected": -1.8372408151626587, + "step": 446 + }, + { + "epoch": 1.6867924528301885, + "grad_norm": 0.3995283246040344, + "learning_rate": 7.830188679245282e-06, + "log_odds_chosen": 19.585247039794922, + "log_odds_ratio": -8.114101547107566e-06, + "logits/chosen": -0.4880484342575073, + "logits/rejected": -4.196340084075928, + "logps/chosen": -1.2747726440429688, + "logps/rejected": -20.202425003051758, + "loss": 1.3193, + "nll_loss": 1.3193085193634033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1274772584438324, + "rewards/margins": 1.8927652835845947, + "rewards/rejected": -2.020242691040039, + "step": 447 + }, + { + "epoch": 1.6905660377358491, + "grad_norm": 0.3784193992614746, + "learning_rate": 7.735849056603773e-06, + "log_odds_chosen": 18.052352905273438, + "log_odds_ratio": -3.911668954970082e-06, + "logits/chosen": -1.333806037902832, + "logits/rejected": -6.3108344078063965, + "logps/chosen": -1.7495572566986084, + "logps/rejected": -19.464826583862305, + "loss": 1.5705, + "nll_loss": 1.5704904794692993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17495572566986084, + "rewards/margins": 1.7715270519256592, + "rewards/rejected": -1.94648277759552, + "step": 448 + }, + { + "epoch": 1.6943396226415093, + "grad_norm": 0.37193769216537476, + "learning_rate": 7.641509433962264e-06, + "log_odds_chosen": 15.382280349731445, + "log_odds_ratio": -0.00010735265095718205, + "logits/chosen": -1.019675374031067, + "logits/rejected": -5.034616470336914, + "logps/chosen": -1.6141021251678467, + "logps/rejected": -16.724472045898438, + "loss": 1.5671, + "nll_loss": 1.5670870542526245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16141021251678467, + "rewards/margins": 1.511036992073059, + "rewards/rejected": -1.6724472045898438, + "step": 449 + }, + { + "epoch": 1.6981132075471699, + "grad_norm": 0.3708736300468445, + "learning_rate": 7.547169811320755e-06, + "log_odds_chosen": 17.476974487304688, + "log_odds_ratio": -0.018685974180698395, + "logits/chosen": -1.8253635168075562, + "logits/rejected": -5.652876853942871, + "logps/chosen": -1.1619367599487305, + "logps/rejected": -18.148921966552734, + "loss": 1.23, + "nll_loss": 1.228140115737915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11619366705417633, + "rewards/margins": 1.69869863986969, + "rewards/rejected": -1.814892292022705, + "step": 450 + }, + { + "epoch": 1.70188679245283, + "grad_norm": 0.4171108603477478, + "learning_rate": 7.452830188679246e-06, + "log_odds_chosen": 18.037120819091797, + "log_odds_ratio": -4.321344704294461e-07, + "logits/chosen": -2.2186176776885986, + "logits/rejected": -7.5618438720703125, + "logps/chosen": -1.7550156116485596, + "logps/rejected": -19.57200050354004, + "loss": 1.4835, + "nll_loss": 1.4834994077682495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17550158500671387, + "rewards/margins": 1.7816987037658691, + "rewards/rejected": -1.957200050354004, + "step": 451 + }, + { + "epoch": 1.7056603773584906, + "grad_norm": 0.40649089217185974, + "learning_rate": 7.358490566037736e-06, + "log_odds_chosen": 17.768571853637695, + "log_odds_ratio": -3.509271664370317e-06, + "logits/chosen": -1.5206046104431152, + "logits/rejected": -6.697060585021973, + "logps/chosen": -1.5520222187042236, + "logps/rejected": -19.050880432128906, + "loss": 1.4503, + "nll_loss": 1.4502997398376465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1552022248506546, + "rewards/margins": 1.7498859167099, + "rewards/rejected": -1.9050880670547485, + "step": 452 + }, + { + "epoch": 1.7094339622641508, + "grad_norm": 0.379649817943573, + "learning_rate": 7.264150943396226e-06, + "log_odds_chosen": 18.14077377319336, + "log_odds_ratio": -0.002329126000404358, + "logits/chosen": -1.208418846130371, + "logits/rejected": -4.804601669311523, + "logps/chosen": -1.673492431640625, + "logps/rejected": -19.51561737060547, + "loss": 1.5746, + "nll_loss": 1.5743178129196167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16734924912452698, + "rewards/margins": 1.7842124700546265, + "rewards/rejected": -1.9515619277954102, + "step": 453 + }, + { + "epoch": 1.7132075471698114, + "grad_norm": 0.37834683060646057, + "learning_rate": 7.169811320754717e-06, + "log_odds_chosen": 17.196395874023438, + "log_odds_ratio": -8.45340036903508e-05, + "logits/chosen": -1.6690289974212646, + "logits/rejected": -5.095861434936523, + "logps/chosen": -1.4087400436401367, + "logps/rejected": -18.211074829101562, + "loss": 1.4106, + "nll_loss": 1.410545825958252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1408740133047104, + "rewards/margins": 1.6802334785461426, + "rewards/rejected": -1.8211076259613037, + "step": 454 + }, + { + "epoch": 1.7169811320754715, + "grad_norm": 0.3911416530609131, + "learning_rate": 7.0754716981132075e-06, + "log_odds_chosen": 14.482208251953125, + "log_odds_ratio": -0.18219685554504395, + "logits/chosen": -1.772078514099121, + "logits/rejected": -4.952263832092285, + "logps/chosen": -1.774475336074829, + "logps/rejected": -16.061283111572266, + "loss": 1.6118, + "nll_loss": 1.5935897827148438, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17744752764701843, + "rewards/margins": 1.4286808967590332, + "rewards/rejected": -1.6061283349990845, + "step": 455 + }, + { + "epoch": 1.7207547169811321, + "grad_norm": 0.3706720471382141, + "learning_rate": 6.981132075471699e-06, + "log_odds_chosen": 15.722382545471191, + "log_odds_ratio": -0.003935753367841244, + "logits/chosen": -2.7362916469573975, + "logits/rejected": -6.182605743408203, + "logps/chosen": -1.4580714702606201, + "logps/rejected": -16.79517364501953, + "loss": 1.4383, + "nll_loss": 1.4378933906555176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.145807147026062, + "rewards/margins": 1.533710241317749, + "rewards/rejected": -1.6795175075531006, + "step": 456 + }, + { + "epoch": 1.7245283018867923, + "grad_norm": 0.40554314851760864, + "learning_rate": 6.886792452830189e-06, + "log_odds_chosen": 18.29325294494629, + "log_odds_ratio": -8.650338713778183e-06, + "logits/chosen": -0.5920654535293579, + "logits/rejected": -4.844081401824951, + "logps/chosen": -1.4798316955566406, + "logps/rejected": -19.450267791748047, + "loss": 1.2259, + "nll_loss": 1.2258515357971191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14798316359519958, + "rewards/margins": 1.7970435619354248, + "rewards/rejected": -1.9450266361236572, + "step": 457 + }, + { + "epoch": 1.728301886792453, + "grad_norm": 0.42811644077301025, + "learning_rate": 6.792452830188679e-06, + "log_odds_chosen": 17.65870475769043, + "log_odds_ratio": -1.3024438885622658e-05, + "logits/chosen": -1.6862386465072632, + "logits/rejected": -4.607665061950684, + "logps/chosen": -1.463801383972168, + "logps/rejected": -18.766820907592773, + "loss": 1.473, + "nll_loss": 1.472968578338623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14638014137744904, + "rewards/margins": 1.7303019762039185, + "rewards/rejected": -1.8766820430755615, + "step": 458 + }, + { + "epoch": 1.732075471698113, + "grad_norm": 0.34931331872940063, + "learning_rate": 6.69811320754717e-06, + "log_odds_chosen": 18.853530883789062, + "log_odds_ratio": -1.6465862699988065e-06, + "logits/chosen": -1.1972850561141968, + "logits/rejected": -5.578787326812744, + "logps/chosen": -1.192124366760254, + "logps/rejected": -19.51374626159668, + "loss": 1.2305, + "nll_loss": 1.2305028438568115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11921243369579315, + "rewards/margins": 1.8321621417999268, + "rewards/rejected": -1.9513745307922363, + "step": 459 + }, + { + "epoch": 1.7358490566037736, + "grad_norm": 0.38165482878685, + "learning_rate": 6.60377358490566e-06, + "log_odds_chosen": 17.29033851623535, + "log_odds_ratio": -0.003331738291308284, + "logits/chosen": -0.4877992570400238, + "logits/rejected": -6.1247406005859375, + "logps/chosen": -1.6295177936553955, + "logps/rejected": -18.68689727783203, + "loss": 1.559, + "nll_loss": 1.5586189031600952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1629517823457718, + "rewards/margins": 1.7057380676269531, + "rewards/rejected": -1.8686898946762085, + "step": 460 + }, + { + "epoch": 1.7396226415094338, + "grad_norm": 0.372586727142334, + "learning_rate": 6.5094339622641515e-06, + "log_odds_chosen": 16.901809692382812, + "log_odds_ratio": -0.0549037791788578, + "logits/chosen": -0.6773020625114441, + "logits/rejected": -4.9583740234375, + "logps/chosen": -1.572051763534546, + "logps/rejected": -18.240394592285156, + "loss": 1.5153, + "nll_loss": 1.509831428527832, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15720517933368683, + "rewards/margins": 1.6668341159820557, + "rewards/rejected": -1.8240394592285156, + "step": 461 + }, + { + "epoch": 1.7433962264150944, + "grad_norm": 0.3676230013370514, + "learning_rate": 6.415094339622642e-06, + "log_odds_chosen": 17.052555084228516, + "log_odds_ratio": -0.0029449001885950565, + "logits/chosen": -1.5178229808807373, + "logits/rejected": -6.080841541290283, + "logps/chosen": -1.308215618133545, + "logps/rejected": -17.94374656677246, + "loss": 1.4127, + "nll_loss": 1.4124376773834229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1308215707540512, + "rewards/margins": 1.663553237915039, + "rewards/rejected": -1.794374704360962, + "step": 462 + }, + { + "epoch": 1.7471698113207546, + "grad_norm": 0.3529452085494995, + "learning_rate": 6.320754716981132e-06, + "log_odds_chosen": 17.932321548461914, + "log_odds_ratio": -0.034649647772312164, + "logits/chosen": -0.5845901966094971, + "logits/rejected": -5.674325466156006, + "logps/chosen": -1.6330788135528564, + "logps/rejected": -19.286094665527344, + "loss": 1.5689, + "nll_loss": 1.5654515027999878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16330787539482117, + "rewards/margins": 1.7653017044067383, + "rewards/rejected": -1.9286096096038818, + "step": 463 + }, + { + "epoch": 1.7509433962264151, + "grad_norm": 0.5019151568412781, + "learning_rate": 6.226415094339623e-06, + "log_odds_chosen": 14.161150932312012, + "log_odds_ratio": -0.0002686173829715699, + "logits/chosen": -1.4875158071517944, + "logits/rejected": -5.882111549377441, + "logps/chosen": -1.6882565021514893, + "logps/rejected": -15.569530487060547, + "loss": 1.6722, + "nll_loss": 1.6721980571746826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1688256412744522, + "rewards/margins": 1.388127326965332, + "rewards/rejected": -1.556952953338623, + "step": 464 + }, + { + "epoch": 1.7547169811320755, + "grad_norm": 0.32335349917411804, + "learning_rate": 6.132075471698113e-06, + "log_odds_chosen": 19.670560836791992, + "log_odds_ratio": -9.08325964701362e-05, + "logits/chosen": -1.4163541793823242, + "logits/rejected": -6.159734725952148, + "logps/chosen": -1.548288106918335, + "logps/rejected": -20.88932991027832, + "loss": 1.4723, + "nll_loss": 1.4722576141357422, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15482881665229797, + "rewards/margins": 1.9341042041778564, + "rewards/rejected": -2.088933229446411, + "step": 465 + }, + { + "epoch": 1.758490566037736, + "grad_norm": 0.4526292681694031, + "learning_rate": 6.037735849056604e-06, + "log_odds_chosen": 15.761448860168457, + "log_odds_ratio": -0.07157308608293533, + "logits/chosen": -2.5339674949645996, + "logits/rejected": -6.691962242126465, + "logps/chosen": -1.6193218231201172, + "logps/rejected": -17.078277587890625, + "loss": 1.5394, + "nll_loss": 1.5322078466415405, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16193217039108276, + "rewards/margins": 1.5458955764770508, + "rewards/rejected": -1.7078275680541992, + "step": 466 + }, + { + "epoch": 1.7622641509433963, + "grad_norm": 0.3781883120536804, + "learning_rate": 5.943396226415095e-06, + "log_odds_chosen": 17.7440128326416, + "log_odds_ratio": -2.7567359666136326e-06, + "logits/chosen": -0.3988521695137024, + "logits/rejected": -3.945779800415039, + "logps/chosen": -1.4811797142028809, + "logps/rejected": -18.924360275268555, + "loss": 1.2401, + "nll_loss": 1.2400920391082764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14811795949935913, + "rewards/margins": 1.7443180084228516, + "rewards/rejected": -1.8924360275268555, + "step": 467 + }, + { + "epoch": 1.7660377358490567, + "grad_norm": 0.33292269706726074, + "learning_rate": 5.849056603773585e-06, + "log_odds_chosen": 17.42066192626953, + "log_odds_ratio": -0.016988694667816162, + "logits/chosen": -0.5932101011276245, + "logits/rejected": -6.109551429748535, + "logps/chosen": -1.484053611755371, + "logps/rejected": -18.59859275817871, + "loss": 1.4429, + "nll_loss": 1.4412463903427124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14840535819530487, + "rewards/margins": 1.711454153060913, + "rewards/rejected": -1.8598594665527344, + "step": 468 + }, + { + "epoch": 1.769811320754717, + "grad_norm": 0.33539214730262756, + "learning_rate": 5.754716981132076e-06, + "log_odds_chosen": 18.23859977722168, + "log_odds_ratio": -0.058486614376306534, + "logits/chosen": -0.5683273077011108, + "logits/rejected": -6.111762046813965, + "logps/chosen": -1.6735715866088867, + "logps/rejected": -19.64827537536621, + "loss": 1.5931, + "nll_loss": 1.5872840881347656, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1673571765422821, + "rewards/margins": 1.7974703311920166, + "rewards/rejected": -1.9648274183273315, + "step": 469 + }, + { + "epoch": 1.7735849056603774, + "grad_norm": 0.3669726252555847, + "learning_rate": 5.660377358490566e-06, + "log_odds_chosen": 16.35857391357422, + "log_odds_ratio": -0.0035181858111172915, + "logits/chosen": -2.055553913116455, + "logits/rejected": -6.248283386230469, + "logps/chosen": -1.3732891082763672, + "logps/rejected": -17.36093521118164, + "loss": 1.3191, + "nll_loss": 1.318752408027649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13732890784740448, + "rewards/margins": 1.5987646579742432, + "rewards/rejected": -1.736093521118164, + "step": 470 + }, + { + "epoch": 1.7773584905660378, + "grad_norm": 0.3617085814476013, + "learning_rate": 5.566037735849057e-06, + "log_odds_chosen": 16.787891387939453, + "log_odds_ratio": -9.142341696133371e-06, + "logits/chosen": -1.4523394107818604, + "logits/rejected": -7.092175006866455, + "logps/chosen": -1.681269884109497, + "logps/rejected": -18.210691452026367, + "loss": 1.5766, + "nll_loss": 1.5765758752822876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16812700033187866, + "rewards/margins": 1.652942180633545, + "rewards/rejected": -1.8210691213607788, + "step": 471 + }, + { + "epoch": 1.7811320754716982, + "grad_norm": 0.3479359447956085, + "learning_rate": 5.4716981132075475e-06, + "log_odds_chosen": 16.96417808532715, + "log_odds_ratio": -0.00023999404220376164, + "logits/chosen": -1.1192617416381836, + "logits/rejected": -5.284356117248535, + "logps/chosen": -1.3302626609802246, + "logps/rejected": -17.81631088256836, + "loss": 1.3527, + "nll_loss": 1.3526952266693115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13302627205848694, + "rewards/margins": 1.6486049890518188, + "rewards/rejected": -1.7816312313079834, + "step": 472 + }, + { + "epoch": 1.7849056603773585, + "grad_norm": 0.41884541511535645, + "learning_rate": 5.377358490566039e-06, + "log_odds_chosen": 18.43604278564453, + "log_odds_ratio": -3.442222805460915e-06, + "logits/chosen": -1.2943966388702393, + "logits/rejected": -5.607792377471924, + "logps/chosen": -1.663355827331543, + "logps/rejected": -19.86046600341797, + "loss": 1.4899, + "nll_loss": 1.4899128675460815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1663355678319931, + "rewards/margins": 1.8197109699249268, + "rewards/rejected": -1.986046552658081, + "step": 473 + }, + { + "epoch": 1.788679245283019, + "grad_norm": 0.39448869228363037, + "learning_rate": 5.283018867924529e-06, + "log_odds_chosen": 16.650920867919922, + "log_odds_ratio": -0.0011353573063388467, + "logits/chosen": -3.2340540885925293, + "logits/rejected": -6.999566078186035, + "logps/chosen": -1.4379664659500122, + "logps/rejected": -17.618440628051758, + "loss": 1.4977, + "nll_loss": 1.4976294040679932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1437966525554657, + "rewards/margins": 1.6180473566055298, + "rewards/rejected": -1.7618440389633179, + "step": 474 + }, + { + "epoch": 1.7924528301886793, + "grad_norm": 0.4289974570274353, + "learning_rate": 5.188679245283019e-06, + "log_odds_chosen": 17.761314392089844, + "log_odds_ratio": -0.03405510634183884, + "logits/chosen": -1.8382487297058105, + "logits/rejected": -6.273689270019531, + "logps/chosen": -1.5025469064712524, + "logps/rejected": -18.807750701904297, + "loss": 1.3993, + "nll_loss": 1.3959349393844604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15025469660758972, + "rewards/margins": 1.7305206060409546, + "rewards/rejected": -1.8807752132415771, + "step": 475 + }, + { + "epoch": 1.7962264150943397, + "grad_norm": 0.3701404333114624, + "learning_rate": 5.09433962264151e-06, + "log_odds_chosen": 18.288116455078125, + "log_odds_ratio": -0.08911100029945374, + "logits/chosen": -2.354094982147217, + "logits/rejected": -4.444386959075928, + "logps/chosen": -1.5296881198883057, + "logps/rejected": -19.461259841918945, + "loss": 1.4101, + "nll_loss": 1.401210069656372, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15296879410743713, + "rewards/margins": 1.7931572198867798, + "rewards/rejected": -1.946125864982605, + "step": 476 + }, + { + "epoch": 1.8, + "grad_norm": 0.3996965289115906, + "learning_rate": 5e-06, + "log_odds_chosen": 18.797988891601562, + "log_odds_ratio": -3.426304101594724e-05, + "logits/chosen": -0.7937726378440857, + "logits/rejected": -4.634369373321533, + "logps/chosen": -1.5706990957260132, + "logps/rejected": -20.071020126342773, + "loss": 1.4157, + "nll_loss": 1.4157121181488037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15706990659236908, + "rewards/margins": 1.850032091140747, + "rewards/rejected": -2.0071020126342773, + "step": 477 + }, + { + "epoch": 1.8037735849056604, + "grad_norm": 0.35529616475105286, + "learning_rate": 4.9056603773584915e-06, + "log_odds_chosen": 18.047014236450195, + "log_odds_ratio": -0.0028157131746411324, + "logits/chosen": -2.256852149963379, + "logits/rejected": -5.895351886749268, + "logps/chosen": -1.6656599044799805, + "logps/rejected": -19.412460327148438, + "loss": 1.448, + "nll_loss": 1.4476892948150635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16656598448753357, + "rewards/margins": 1.7746801376342773, + "rewards/rejected": -1.9412461519241333, + "step": 478 + }, + { + "epoch": 1.8075471698113208, + "grad_norm": 0.4262011647224426, + "learning_rate": 4.811320754716982e-06, + "log_odds_chosen": 18.934986114501953, + "log_odds_ratio": -2.272450956297689e-06, + "logits/chosen": -3.147275924682617, + "logits/rejected": -7.059483528137207, + "logps/chosen": -1.4018633365631104, + "logps/rejected": -19.886085510253906, + "loss": 1.3952, + "nll_loss": 1.395203948020935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14018632471561432, + "rewards/margins": 1.8484222888946533, + "rewards/rejected": -1.988608479499817, + "step": 479 + }, + { + "epoch": 1.8113207547169812, + "grad_norm": 0.47693830728530884, + "learning_rate": 4.716981132075472e-06, + "log_odds_chosen": 19.023094177246094, + "log_odds_ratio": -9.76030378296855e-07, + "logits/chosen": -2.5737247467041016, + "logits/rejected": -6.724173545837402, + "logps/chosen": -1.2855720520019531, + "logps/rejected": -19.879941940307617, + "loss": 1.3567, + "nll_loss": 1.3567023277282715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1285572052001953, + "rewards/margins": 1.8594369888305664, + "rewards/rejected": -1.9879940748214722, + "step": 480 + }, + { + "epoch": 1.8150943396226416, + "grad_norm": 0.33889421820640564, + "learning_rate": 4.622641509433962e-06, + "log_odds_chosen": 18.900848388671875, + "log_odds_ratio": -2.5331991082566674e-07, + "logits/chosen": -1.0899468660354614, + "logits/rejected": -6.098740100860596, + "logps/chosen": -1.4805465936660767, + "logps/rejected": -20.047718048095703, + "loss": 1.3192, + "nll_loss": 1.3191674947738647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14805465936660767, + "rewards/margins": 1.8567172288894653, + "rewards/rejected": -2.0047719478607178, + "step": 481 + }, + { + "epoch": 1.818867924528302, + "grad_norm": 0.4076652526855469, + "learning_rate": 4.5283018867924524e-06, + "log_odds_chosen": 17.234874725341797, + "log_odds_ratio": -2.1215018932707608e-05, + "logits/chosen": -0.29048505425453186, + "logits/rejected": -5.432765007019043, + "logps/chosen": -1.6638712882995605, + "logps/rejected": -18.638235092163086, + "loss": 1.631, + "nll_loss": 1.631001591682434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.166387140750885, + "rewards/margins": 1.6974364519119263, + "rewards/rejected": -1.863823413848877, + "step": 482 + }, + { + "epoch": 1.8226415094339623, + "grad_norm": 0.35861098766326904, + "learning_rate": 4.4339622641509435e-06, + "log_odds_chosen": 18.069866180419922, + "log_odds_ratio": -2.9603910661535338e-05, + "logits/chosen": -1.9246575832366943, + "logits/rejected": -4.993512153625488, + "logps/chosen": -1.2558917999267578, + "logps/rejected": -18.758874893188477, + "loss": 1.5623, + "nll_loss": 1.5623277425765991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12558917701244354, + "rewards/margins": 1.750298261642456, + "rewards/rejected": -1.8758875131607056, + "step": 483 + }, + { + "epoch": 1.8264150943396227, + "grad_norm": 0.374891459941864, + "learning_rate": 4.339622641509434e-06, + "log_odds_chosen": 17.780948638916016, + "log_odds_ratio": -7.957565685501322e-06, + "logits/chosen": -2.0383100509643555, + "logits/rejected": -5.80636739730835, + "logps/chosen": -1.346254825592041, + "logps/rejected": -18.59038543701172, + "loss": 1.2654, + "nll_loss": 1.2653898000717163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13462547957897186, + "rewards/margins": 1.7244129180908203, + "rewards/rejected": -1.8590385913848877, + "step": 484 + }, + { + "epoch": 1.830188679245283, + "grad_norm": 0.37461966276168823, + "learning_rate": 4.245283018867925e-06, + "log_odds_chosen": 16.527099609375, + "log_odds_ratio": -0.003529702080413699, + "logits/chosen": -3.2636947631835938, + "logits/rejected": -5.3443403244018555, + "logps/chosen": -1.5424690246582031, + "logps/rejected": -17.72702407836914, + "loss": 1.532, + "nll_loss": 1.5316277742385864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15424689650535583, + "rewards/margins": 1.6184556484222412, + "rewards/rejected": -1.7727024555206299, + "step": 485 + }, + { + "epoch": 1.8339622641509434, + "grad_norm": 0.42353299260139465, + "learning_rate": 4.150943396226415e-06, + "log_odds_chosen": 17.351669311523438, + "log_odds_ratio": -0.02633853070437908, + "logits/chosen": -1.5371365547180176, + "logits/rejected": -6.403836250305176, + "logps/chosen": -1.605021357536316, + "logps/rejected": -18.7138671875, + "loss": 1.4473, + "nll_loss": 1.4446651935577393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1605021357536316, + "rewards/margins": 1.7108845710754395, + "rewards/rejected": -1.8713867664337158, + "step": 486 + }, + { + "epoch": 1.8377358490566038, + "grad_norm": 0.34952792525291443, + "learning_rate": 4.056603773584905e-06, + "log_odds_chosen": 15.732725143432617, + "log_odds_ratio": -0.01928078942000866, + "logits/chosen": -3.1891517639160156, + "logits/rejected": -6.666206359863281, + "logps/chosen": -1.8744854927062988, + "logps/rejected": -17.41217041015625, + "loss": 1.664, + "nll_loss": 1.6620932817459106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18744854629039764, + "rewards/margins": 1.553768515586853, + "rewards/rejected": -1.741217017173767, + "step": 487 + }, + { + "epoch": 1.8415094339622642, + "grad_norm": 0.36892521381378174, + "learning_rate": 3.962264150943396e-06, + "log_odds_chosen": 19.160884857177734, + "log_odds_ratio": -9.086857608053833e-05, + "logits/chosen": -3.0095064640045166, + "logits/rejected": -5.858531475067139, + "logps/chosen": -1.4025964736938477, + "logps/rejected": -20.25691032409668, + "loss": 1.5139, + "nll_loss": 1.513940691947937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14025965332984924, + "rewards/margins": 1.8854316473007202, + "rewards/rejected": -2.025691270828247, + "step": 488 + }, + { + "epoch": 1.8452830188679246, + "grad_norm": 0.4228150546550751, + "learning_rate": 3.867924528301887e-06, + "log_odds_chosen": 12.711286544799805, + "log_odds_ratio": -0.03767332434654236, + "logits/chosen": -0.7741556763648987, + "logits/rejected": -4.395710468292236, + "logps/chosen": -1.6399070024490356, + "logps/rejected": -14.126274108886719, + "loss": 1.4961, + "nll_loss": 1.492356538772583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16399070620536804, + "rewards/margins": 1.2486367225646973, + "rewards/rejected": -1.4126274585723877, + "step": 489 + }, + { + "epoch": 1.849056603773585, + "grad_norm": 0.3612087666988373, + "learning_rate": 3.7735849056603773e-06, + "log_odds_chosen": 20.139719009399414, + "log_odds_ratio": -2.384188633186568e-07, + "logits/chosen": -1.845539927482605, + "logits/rejected": -6.139976978302002, + "logps/chosen": -1.3304532766342163, + "logps/rejected": -21.026758193969727, + "loss": 1.3543, + "nll_loss": 1.3543174266815186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13304531574249268, + "rewards/margins": 1.969630479812622, + "rewards/rejected": -2.1026759147644043, + "step": 490 + }, + { + "epoch": 1.8528301886792453, + "grad_norm": 0.33791664242744446, + "learning_rate": 3.679245283018868e-06, + "log_odds_chosen": 17.015119552612305, + "log_odds_ratio": -0.00011756140156649053, + "logits/chosen": -0.6114660501480103, + "logits/rejected": -5.934682369232178, + "logps/chosen": -1.4735393524169922, + "logps/rejected": -18.142919540405273, + "loss": 1.493, + "nll_loss": 1.4929838180541992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14735394716262817, + "rewards/margins": 1.666938066482544, + "rewards/rejected": -1.814292073249817, + "step": 491 + }, + { + "epoch": 1.8566037735849057, + "grad_norm": 0.42551860213279724, + "learning_rate": 3.5849056603773586e-06, + "log_odds_chosen": 15.526764869689941, + "log_odds_ratio": -0.06459427624940872, + "logits/chosen": -0.3648894727230072, + "logits/rejected": -5.722745418548584, + "logps/chosen": -1.61395263671875, + "logps/rejected": -16.911762237548828, + "loss": 1.4662, + "nll_loss": 1.4597229957580566, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16139525175094604, + "rewards/margins": 1.5297811031341553, + "rewards/rejected": -1.691176414489746, + "step": 492 + }, + { + "epoch": 1.860377358490566, + "grad_norm": 0.3618764579296112, + "learning_rate": 3.4905660377358493e-06, + "log_odds_chosen": 17.239789962768555, + "log_odds_ratio": -1.4826684946456226e-06, + "logits/chosen": -0.21888774633407593, + "logits/rejected": -4.190219879150391, + "logps/chosen": -1.3131389617919922, + "logps/rejected": -18.119144439697266, + "loss": 1.271, + "nll_loss": 1.270983099937439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13131389021873474, + "rewards/margins": 1.6806005239486694, + "rewards/rejected": -1.8119144439697266, + "step": 493 + }, + { + "epoch": 1.8641509433962264, + "grad_norm": 0.4203263223171234, + "learning_rate": 3.3962264150943395e-06, + "log_odds_chosen": 17.666906356811523, + "log_odds_ratio": -4.238808833179064e-05, + "logits/chosen": -2.081303119659424, + "logits/rejected": -6.445936679840088, + "logps/chosen": -1.6142175197601318, + "logps/rejected": -18.978389739990234, + "loss": 1.6097, + "nll_loss": 1.6096843481063843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1614217460155487, + "rewards/margins": 1.736417293548584, + "rewards/rejected": -1.897839069366455, + "step": 494 + }, + { + "epoch": 1.8679245283018868, + "grad_norm": 0.32720765471458435, + "learning_rate": 3.30188679245283e-06, + "log_odds_chosen": 18.920677185058594, + "log_odds_ratio": -1.1772006018873071e-06, + "logits/chosen": 0.03428873419761658, + "logits/rejected": -4.708007335662842, + "logps/chosen": -1.2015572786331177, + "logps/rejected": -19.660449981689453, + "loss": 1.3305, + "nll_loss": 1.3305293321609497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12015574425458908, + "rewards/margins": 1.8458893299102783, + "rewards/rejected": -1.9660451412200928, + "step": 495 + }, + { + "epoch": 1.8716981132075472, + "grad_norm": 0.4030371308326721, + "learning_rate": 3.207547169811321e-06, + "log_odds_chosen": 17.407262802124023, + "log_odds_ratio": -9.635530295781791e-05, + "logits/chosen": -1.2139384746551514, + "logits/rejected": -5.052234172821045, + "logps/chosen": -1.58628511428833, + "logps/rejected": -18.708999633789062, + "loss": 1.4232, + "nll_loss": 1.4231928586959839, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15862850844860077, + "rewards/margins": 1.7122715711593628, + "rewards/rejected": -1.87090003490448, + "step": 496 + }, + { + "epoch": 1.8754716981132076, + "grad_norm": 0.3452160954475403, + "learning_rate": 3.1132075471698115e-06, + "log_odds_chosen": 19.35751724243164, + "log_odds_ratio": -2.719517624427681e-06, + "logits/chosen": -1.754660725593567, + "logits/rejected": -6.674939155578613, + "logps/chosen": -1.4923593997955322, + "logps/rejected": -20.52996826171875, + "loss": 1.3139, + "nll_loss": 1.3138580322265625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14923594892024994, + "rewards/margins": 1.9037607908248901, + "rewards/rejected": -2.0529966354370117, + "step": 497 + }, + { + "epoch": 1.879245283018868, + "grad_norm": 0.31435737013816833, + "learning_rate": 3.018867924528302e-06, + "log_odds_chosen": 17.633760452270508, + "log_odds_ratio": -0.07248206436634064, + "logits/chosen": -1.433829665184021, + "logits/rejected": -6.44128942489624, + "logps/chosen": -1.4513802528381348, + "logps/rejected": -18.749958038330078, + "loss": 1.5574, + "nll_loss": 1.5501039028167725, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14513802528381348, + "rewards/margins": 1.7298578023910522, + "rewards/rejected": -1.8749958276748657, + "step": 498 + }, + { + "epoch": 1.8830188679245283, + "grad_norm": 0.415445476770401, + "learning_rate": 2.9245283018867924e-06, + "log_odds_chosen": 16.431304931640625, + "log_odds_ratio": -0.002142214449122548, + "logits/chosen": -0.9190161824226379, + "logits/rejected": -5.215124130249023, + "logps/chosen": -1.5988054275512695, + "logps/rejected": -17.76313018798828, + "loss": 1.5031, + "nll_loss": 1.5028951168060303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15988054871559143, + "rewards/margins": 1.6164326667785645, + "rewards/rejected": -1.776313066482544, + "step": 499 + }, + { + "epoch": 1.8867924528301887, + "grad_norm": 0.34781375527381897, + "learning_rate": 2.830188679245283e-06, + "log_odds_chosen": 16.619857788085938, + "log_odds_ratio": -0.0440763421356678, + "logits/chosen": -1.902965784072876, + "logits/rejected": -7.038515090942383, + "logps/chosen": -1.5125619173049927, + "logps/rejected": -17.874229431152344, + "loss": 1.45, + "nll_loss": 1.4455456733703613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15125618875026703, + "rewards/margins": 1.6361666917800903, + "rewards/rejected": -1.7874228954315186, + "step": 500 + }, + { + "epoch": 1.890566037735849, + "grad_norm": 0.4033926725387573, + "learning_rate": 2.7358490566037738e-06, + "log_odds_chosen": 18.919782638549805, + "log_odds_ratio": -0.04001227393746376, + "logits/chosen": -1.2150888442993164, + "logits/rejected": -4.071748733520508, + "logps/chosen": -1.1845972537994385, + "logps/rejected": -19.4738712310791, + "loss": 1.2712, + "nll_loss": 1.2671959400177002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11845972388982773, + "rewards/margins": 1.8289273977279663, + "rewards/rejected": -1.9473869800567627, + "step": 501 + }, + { + "epoch": 1.8943396226415095, + "grad_norm": 0.4240841865539551, + "learning_rate": 2.6415094339622644e-06, + "log_odds_chosen": 18.493478775024414, + "log_odds_ratio": -5.438935772872355e-07, + "logits/chosen": -1.6389120817184448, + "logits/rejected": -5.454832077026367, + "logps/chosen": -1.660611629486084, + "logps/rejected": -19.89701271057129, + "loss": 1.6124, + "nll_loss": 1.6123967170715332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1660611629486084, + "rewards/margins": 1.8236401081085205, + "rewards/rejected": -1.989701271057129, + "step": 502 + }, + { + "epoch": 1.8981132075471698, + "grad_norm": 0.379930317401886, + "learning_rate": 2.547169811320755e-06, + "log_odds_chosen": 18.02582550048828, + "log_odds_ratio": -5.4391039157053456e-06, + "logits/chosen": -0.07657934725284576, + "logits/rejected": -4.238829135894775, + "logps/chosen": -1.2943615913391113, + "logps/rejected": -18.893966674804688, + "loss": 1.2938, + "nll_loss": 1.2937697172164917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12943615019321442, + "rewards/margins": 1.759960412979126, + "rewards/rejected": -1.8893966674804688, + "step": 503 + }, + { + "epoch": 1.9018867924528302, + "grad_norm": 0.4327671527862549, + "learning_rate": 2.4528301886792457e-06, + "log_odds_chosen": 17.240550994873047, + "log_odds_ratio": -0.08509234338998795, + "logits/chosen": -0.8051078915596008, + "logits/rejected": -4.25758695602417, + "logps/chosen": -1.6059653759002686, + "logps/rejected": -18.553573608398438, + "loss": 1.4932, + "nll_loss": 1.4846444129943848, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16059653460979462, + "rewards/margins": 1.694760799407959, + "rewards/rejected": -1.85535728931427, + "step": 504 + }, + { + "epoch": 1.9056603773584906, + "grad_norm": 0.3554707169532776, + "learning_rate": 2.358490566037736e-06, + "log_odds_chosen": 15.833232879638672, + "log_odds_ratio": -0.008360692299902439, + "logits/chosen": -2.8660082817077637, + "logits/rejected": -5.381879806518555, + "logps/chosen": -1.5323365926742554, + "logps/rejected": -17.071136474609375, + "loss": 1.5256, + "nll_loss": 1.5248125791549683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15323364734649658, + "rewards/margins": 1.553879976272583, + "rewards/rejected": -1.70711350440979, + "step": 505 + }, + { + "epoch": 1.909433962264151, + "grad_norm": 0.40750032663345337, + "learning_rate": 2.2641509433962262e-06, + "log_odds_chosen": 18.917707443237305, + "log_odds_ratio": -1.3187664080760442e-06, + "logits/chosen": -2.4501075744628906, + "logits/rejected": -4.550923824310303, + "logps/chosen": -1.6083648204803467, + "logps/rejected": -20.180192947387695, + "loss": 1.422, + "nll_loss": 1.4219931364059448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16083648800849915, + "rewards/margins": 1.8571827411651611, + "rewards/rejected": -2.018019199371338, + "step": 506 + }, + { + "epoch": 1.9132075471698113, + "grad_norm": 0.3912452459335327, + "learning_rate": 2.169811320754717e-06, + "log_odds_chosen": 16.282840728759766, + "log_odds_ratio": -0.0001460638886783272, + "logits/chosen": -1.1887915134429932, + "logits/rejected": -5.86929988861084, + "logps/chosen": -1.3933725357055664, + "logps/rejected": -17.320636749267578, + "loss": 1.3639, + "nll_loss": 1.3638955354690552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13933727145195007, + "rewards/margins": 1.5927265882492065, + "rewards/rejected": -1.7320637702941895, + "step": 507 + }, + { + "epoch": 1.9169811320754717, + "grad_norm": 0.3855321705341339, + "learning_rate": 2.0754716981132075e-06, + "log_odds_chosen": 17.623600006103516, + "log_odds_ratio": -4.701381840277463e-06, + "logits/chosen": -0.25701072812080383, + "logits/rejected": -5.022630214691162, + "logps/chosen": -1.5275490283966064, + "logps/rejected": -18.818309783935547, + "loss": 1.3883, + "nll_loss": 1.3883423805236816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15275491774082184, + "rewards/margins": 1.7290761470794678, + "rewards/rejected": -1.8818310499191284, + "step": 508 + }, + { + "epoch": 1.920754716981132, + "grad_norm": 0.4456048607826233, + "learning_rate": 1.981132075471698e-06, + "log_odds_chosen": 19.397144317626953, + "log_odds_ratio": -0.08647830039262772, + "logits/chosen": -2.1884102821350098, + "logits/rejected": -4.864468574523926, + "logps/chosen": -1.1446378231048584, + "logps/rejected": -19.82569694519043, + "loss": 1.1951, + "nll_loss": 1.1864542961120605, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.11446378380060196, + "rewards/margins": 1.8681057691574097, + "rewards/rejected": -1.982569694519043, + "step": 509 + }, + { + "epoch": 1.9245283018867925, + "grad_norm": 0.38309693336486816, + "learning_rate": 1.8867924528301887e-06, + "log_odds_chosen": 17.333568572998047, + "log_odds_ratio": -0.0018029811326414347, + "logits/chosen": -1.732748031616211, + "logits/rejected": -5.647907733917236, + "logps/chosen": -1.6717514991760254, + "logps/rejected": -18.71475601196289, + "loss": 1.6, + "nll_loss": 1.5998656749725342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16717512905597687, + "rewards/margins": 1.7043005228042603, + "rewards/rejected": -1.8714755773544312, + "step": 510 + }, + { + "epoch": 1.9283018867924528, + "grad_norm": 0.42033860087394714, + "learning_rate": 1.7924528301886793e-06, + "log_odds_chosen": 18.408260345458984, + "log_odds_ratio": -0.07517862319946289, + "logits/chosen": -1.3036704063415527, + "logits/rejected": -6.073916435241699, + "logps/chosen": -1.500823736190796, + "logps/rejected": -19.579364776611328, + "loss": 1.3745, + "nll_loss": 1.3670070171356201, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.15008236467838287, + "rewards/margins": 1.807854175567627, + "rewards/rejected": -1.957936406135559, + "step": 511 + }, + { + "epoch": 1.9320754716981132, + "grad_norm": 0.43365299701690674, + "learning_rate": 1.6981132075471698e-06, + "log_odds_chosen": 16.9204158782959, + "log_odds_ratio": -0.010048388503491879, + "logits/chosen": -1.4353740215301514, + "logits/rejected": -4.844537734985352, + "logps/chosen": -1.4958455562591553, + "logps/rejected": -18.103038787841797, + "loss": 1.2225, + "nll_loss": 1.2214566469192505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14958456158638, + "rewards/margins": 1.6607191562652588, + "rewards/rejected": -1.810303807258606, + "step": 512 + }, + { + "epoch": 1.9358490566037736, + "grad_norm": 0.41454991698265076, + "learning_rate": 1.6037735849056604e-06, + "log_odds_chosen": 17.356592178344727, + "log_odds_ratio": -0.02275564707815647, + "logits/chosen": -2.385758399963379, + "logits/rejected": -6.4152421951293945, + "logps/chosen": -1.799842119216919, + "logps/rejected": -18.904577255249023, + "loss": 1.6663, + "nll_loss": 1.664039134979248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1799842119216919, + "rewards/margins": 1.7104735374450684, + "rewards/rejected": -1.8904577493667603, + "step": 513 + }, + { + "epoch": 1.939622641509434, + "grad_norm": 0.3880457878112793, + "learning_rate": 1.509433962264151e-06, + "log_odds_chosen": 15.238203048706055, + "log_odds_ratio": -0.01757655292749405, + "logits/chosen": -1.1916530132293701, + "logits/rejected": -5.214702606201172, + "logps/chosen": -1.4446446895599365, + "logps/rejected": -16.36115264892578, + "loss": 1.5477, + "nll_loss": 1.5459623336791992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14446447789669037, + "rewards/margins": 1.4916508197784424, + "rewards/rejected": -1.636115312576294, + "step": 514 + }, + { + "epoch": 1.9433962264150944, + "grad_norm": 0.47617748379707336, + "learning_rate": 1.4150943396226415e-06, + "log_odds_chosen": 16.34947967529297, + "log_odds_ratio": -0.0004449410189408809, + "logits/chosen": -2.4440126419067383, + "logits/rejected": -5.788022994995117, + "logps/chosen": -1.6996843814849854, + "logps/rejected": -17.788955688476562, + "loss": 1.4768, + "nll_loss": 1.4767930507659912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16996845602989197, + "rewards/margins": 1.6089271306991577, + "rewards/rejected": -1.778895616531372, + "step": 515 + }, + { + "epoch": 1.9471698113207547, + "grad_norm": 0.40238529443740845, + "learning_rate": 1.3207547169811322e-06, + "log_odds_chosen": 18.627809524536133, + "log_odds_ratio": -4.4853818508272525e-06, + "logits/chosen": -1.0439683198928833, + "logits/rejected": -5.652813911437988, + "logps/chosen": -1.4960782527923584, + "logps/rejected": -19.751792907714844, + "loss": 1.3031, + "nll_loss": 1.3030521869659424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1496078073978424, + "rewards/margins": 1.8255715370178223, + "rewards/rejected": -1.9751791954040527, + "step": 516 + }, + { + "epoch": 1.950943396226415, + "grad_norm": 0.41020315885543823, + "learning_rate": 1.2264150943396229e-06, + "log_odds_chosen": 17.56089973449707, + "log_odds_ratio": -0.049375876784324646, + "logits/chosen": -0.404629647731781, + "logits/rejected": -5.214582443237305, + "logps/chosen": -1.3766056299209595, + "logps/rejected": -18.530118942260742, + "loss": 1.2775, + "nll_loss": 1.2725828886032104, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13766056299209595, + "rewards/margins": 1.7153512239456177, + "rewards/rejected": -1.8530117273330688, + "step": 517 + }, + { + "epoch": 1.9547169811320755, + "grad_norm": 0.5101109147071838, + "learning_rate": 1.1320754716981131e-06, + "log_odds_chosen": 19.110702514648438, + "log_odds_ratio": -1.9727249309653416e-05, + "logits/chosen": -2.067298412322998, + "logits/rejected": -5.900681018829346, + "logps/chosen": -1.4211063385009766, + "logps/rejected": -20.093542098999023, + "loss": 1.5492, + "nll_loss": 1.5492104291915894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14211063086986542, + "rewards/margins": 1.867243766784668, + "rewards/rejected": -2.00935435295105, + "step": 518 + }, + { + "epoch": 1.9584905660377359, + "grad_norm": 0.37587234377861023, + "learning_rate": 1.0377358490566038e-06, + "log_odds_chosen": 17.110137939453125, + "log_odds_ratio": -0.0024836428929120302, + "logits/chosen": -1.4974017143249512, + "logits/rejected": -4.80449914932251, + "logps/chosen": -1.6387498378753662, + "logps/rejected": -18.401721954345703, + "loss": 1.3765, + "nll_loss": 1.376255989074707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16387496888637543, + "rewards/margins": 1.6762971878051758, + "rewards/rejected": -1.8401721715927124, + "step": 519 + }, + { + "epoch": 1.9622641509433962, + "grad_norm": 0.35912734270095825, + "learning_rate": 9.433962264150943e-07, + "log_odds_chosen": 14.829183578491211, + "log_odds_ratio": -0.15696510672569275, + "logits/chosen": -1.3042566776275635, + "logits/rejected": -6.660241603851318, + "logps/chosen": -1.6939544677734375, + "logps/rejected": -16.27517318725586, + "loss": 1.5792, + "nll_loss": 1.5635175704956055, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.16939544677734375, + "rewards/margins": 1.4581220149993896, + "rewards/rejected": -1.6275173425674438, + "step": 520 + }, + { + "epoch": 1.9660377358490566, + "grad_norm": 0.35668516159057617, + "learning_rate": 8.490566037735849e-07, + "log_odds_chosen": 18.270919799804688, + "log_odds_ratio": -0.09477733820676804, + "logits/chosen": -1.378293752670288, + "logits/rejected": -4.495629787445068, + "logps/chosen": -1.3341569900512695, + "logps/rejected": -19.200576782226562, + "loss": 1.3796, + "nll_loss": 1.3701521158218384, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.13341571390628815, + "rewards/margins": 1.786642074584961, + "rewards/rejected": -1.9200576543807983, + "step": 521 + }, + { + "epoch": 1.969811320754717, + "grad_norm": 0.40886542201042175, + "learning_rate": 7.547169811320755e-07, + "log_odds_chosen": 17.687740325927734, + "log_odds_ratio": -0.02107013203203678, + "logits/chosen": -1.3524720668792725, + "logits/rejected": -5.940802574157715, + "logps/chosen": -1.4188027381896973, + "logps/rejected": -18.724929809570312, + "loss": 1.2466, + "nll_loss": 1.2445337772369385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14188028872013092, + "rewards/margins": 1.730612874031067, + "rewards/rejected": -1.8724932670593262, + "step": 522 + }, + { + "epoch": 1.9735849056603774, + "grad_norm": 0.4124062657356262, + "learning_rate": 6.603773584905661e-07, + "log_odds_chosen": 15.467195510864258, + "log_odds_ratio": -0.046547312289476395, + "logits/chosen": -1.658618450164795, + "logits/rejected": -6.218527793884277, + "logps/chosen": -1.8022215366363525, + "logps/rejected": -16.846336364746094, + "loss": 1.6649, + "nll_loss": 1.660265326499939, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.18022215366363525, + "rewards/margins": 1.5044115781784058, + "rewards/rejected": -1.684633731842041, + "step": 523 + }, + { + "epoch": 1.9773584905660377, + "grad_norm": 0.38052645325660706, + "learning_rate": 5.660377358490566e-07, + "log_odds_chosen": 19.262311935424805, + "log_odds_ratio": -5.662463422595465e-07, + "logits/chosen": -2.7431163787841797, + "logits/rejected": -7.38714075088501, + "logps/chosen": -1.8242018222808838, + "logps/rejected": -20.831439971923828, + "loss": 1.4274, + "nll_loss": 1.4274204969406128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18242016434669495, + "rewards/margins": 1.9007238149642944, + "rewards/rejected": -2.083144187927246, + "step": 524 + }, + { + "epoch": 1.9811320754716981, + "grad_norm": 0.37092652916908264, + "learning_rate": 4.7169811320754717e-07, + "log_odds_chosen": 18.2442626953125, + "log_odds_ratio": -0.06912226229906082, + "logits/chosen": -2.5067853927612305, + "logits/rejected": -7.028432369232178, + "logps/chosen": -1.737499475479126, + "logps/rejected": -19.720117568969727, + "loss": 1.5818, + "nll_loss": 1.5748425722122192, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1737499237060547, + "rewards/margins": 1.7982620000839233, + "rewards/rejected": -1.9720118045806885, + "step": 525 + }, + { + "epoch": 1.9849056603773585, + "grad_norm": 0.39420637488365173, + "learning_rate": 3.773584905660378e-07, + "log_odds_chosen": 16.574909210205078, + "log_odds_ratio": -0.07863669842481613, + "logits/chosen": -1.2284739017486572, + "logits/rejected": -4.368472099304199, + "logps/chosen": -1.423435926437378, + "logps/rejected": -17.645301818847656, + "loss": 1.3318, + "nll_loss": 1.323962688446045, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14234358072280884, + "rewards/margins": 1.6221866607666016, + "rewards/rejected": -1.7645303010940552, + "step": 526 + }, + { + "epoch": 1.9886792452830189, + "grad_norm": 0.39243245124816895, + "learning_rate": 2.830188679245283e-07, + "log_odds_chosen": 17.68594741821289, + "log_odds_ratio": -3.010060481756227e-06, + "logits/chosen": -2.5417604446411133, + "logits/rejected": -5.660012722015381, + "logps/chosen": -1.545115351676941, + "logps/rejected": -18.896028518676758, + "loss": 1.6222, + "nll_loss": 1.6222350597381592, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15451154112815857, + "rewards/margins": 1.735091209411621, + "rewards/rejected": -1.889602780342102, + "step": 527 + }, + { + "epoch": 1.9924528301886792, + "grad_norm": 0.37130141258239746, + "learning_rate": 1.886792452830189e-07, + "log_odds_chosen": 19.309329986572266, + "log_odds_ratio": -3.0204251743271016e-05, + "logits/chosen": -1.1784321069717407, + "logits/rejected": -4.860876083374023, + "logps/chosen": -1.4668664932250977, + "logps/rejected": -20.424339294433594, + "loss": 1.4702, + "nll_loss": 1.47021484375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1466866433620453, + "rewards/margins": 1.895747184753418, + "rewards/rejected": -2.042433977127075, + "step": 528 + }, + { + "epoch": 1.9962264150943396, + "grad_norm": 0.3702877461910248, + "learning_rate": 9.433962264150944e-08, + "log_odds_chosen": 20.07143783569336, + "log_odds_ratio": -8.866221037351352e-07, + "logits/chosen": -0.7111749649047852, + "logits/rejected": -4.296523094177246, + "logps/chosen": -1.6536825895309448, + "logps/rejected": -21.461524963378906, + "loss": 1.5899, + "nll_loss": 1.5898981094360352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16536825895309448, + "rewards/margins": 1.9807841777801514, + "rewards/rejected": -2.1461524963378906, + "step": 529 + }, + { + "epoch": 2.0, + "grad_norm": 0.5285991430282593, + "learning_rate": 0.0, + "log_odds_chosen": 17.8989315032959, + "log_odds_ratio": -0.0035720046143978834, + "logits/chosen": -1.8316022157669067, + "logits/rejected": -4.168578147888184, + "logps/chosen": -1.5039995908737183, + "logps/rejected": -19.07382583618164, + "loss": 1.3711, + "nll_loss": 1.3707059621810913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15039995312690735, + "rewards/margins": 1.7569825649261475, + "rewards/rejected": -1.9073827266693115, + "step": 530 + } + ], + "logging_steps": 1, + "max_steps": 530, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}