diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7590 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9989289539450197, + "eval_steps": 500, + "global_step": 4200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 6.3125, + "learning_rate": 4.997501873438867e-06, + "log_odds_chosen": 0.027651220560073853, + "log_odds_ratio": -0.7304251194000244, + "logits/chosen": -3.0225749015808105, + "logits/rejected": -3.0150020122528076, + "logps/chosen": -0.815263569355011, + "logps/rejected": -0.8314116597175598, + "loss": 1.153, + "nll_loss": 1.0209743976593018, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1630527228116989, + "rewards/margins": 0.0032296099234372377, + "rewards/rejected": -0.16628232598304749, + "step": 10 + }, + { + "epoch": 0.01, + "grad_norm": 5.40625, + "learning_rate": 4.995007487521836e-06, + "log_odds_chosen": 0.12160005420446396, + "log_odds_ratio": -0.6930473446846008, + "logits/chosen": -3.121931791305542, + "logits/rejected": -3.1250596046447754, + "logps/chosen": -0.7088677883148193, + "logps/rejected": -0.7795363664627075, + "loss": 0.6108, + "nll_loss": 0.46274280548095703, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.14177358150482178, + "rewards/margins": 0.014133691787719727, + "rewards/rejected": -0.15590724349021912, + "step": 20 + }, + { + "epoch": 0.02, + "grad_norm": 6.9375, + "learning_rate": 4.992516832922945e-06, + "log_odds_chosen": 0.03587382286787033, + "log_odds_ratio": -0.7343233227729797, + "logits/chosen": -3.130347728729248, + "logits/rejected": -3.114506244659424, + "logps/chosen": -0.7532758116722107, + "logps/rejected": -0.7803520560264587, + "loss": 0.6097, + "nll_loss": 0.462548166513443, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -0.15065516531467438, + "rewards/margins": 0.005415262188762426, + "rewards/rejected": -0.15607044100761414, + "step": 30 + }, + { + "epoch": 0.03, + "grad_norm": 5.65625, + "learning_rate": 4.990029900348746e-06, + "log_odds_chosen": 0.06624144315719604, + "log_odds_ratio": -0.7134107351303101, + "logits/chosen": -3.125946521759033, + "logits/rejected": -3.1197519302368164, + "logps/chosen": -0.7543585896492004, + "logps/rejected": -0.8100128173828125, + "loss": 0.595, + "nll_loss": 0.47750869393348694, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.15087170898914337, + "rewards/margins": 0.011130847968161106, + "rewards/rejected": -0.1620025634765625, + "step": 40 + }, + { + "epoch": 0.04, + "grad_norm": 5.21875, + "learning_rate": 4.987546680538165e-06, + "log_odds_chosen": 0.14277830719947815, + "log_odds_ratio": -0.6764869093894958, + "logits/chosen": -3.073333263397217, + "logits/rejected": -3.081392288208008, + "logps/chosen": -0.7409430146217346, + "logps/rejected": -0.831874668598175, + "loss": 0.6027, + "nll_loss": 0.4561688005924225, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.14818862080574036, + "rewards/margins": 0.018186338245868683, + "rewards/rejected": -0.16637495160102844, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 5.65625, + "learning_rate": 4.985067164262359e-06, + "log_odds_chosen": 0.09204573929309845, + "log_odds_ratio": -0.7011358737945557, + "logits/chosen": -3.172628879547119, + "logits/rejected": -3.173214912414551, + "logps/chosen": -0.7226709127426147, + "logps/rejected": -0.7736715078353882, + "loss": 0.5972, + "nll_loss": 0.4593353867530823, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1445341557264328, + "rewards/margins": 0.010200141929090023, + "rewards/rejected": -0.15473432838916779, + "step": 60 + }, + { + "epoch": 0.05, + "grad_norm": 5.59375, + "learning_rate": 4.98259134232457e-06, + "log_odds_chosen": 0.09953074157238007, + "log_odds_ratio": -0.6920744776725769, + "logits/chosen": -3.1694746017456055, + "logits/rejected": -3.185354471206665, + "logps/chosen": -0.7267014980316162, + "logps/rejected": -0.7774870991706848, + "loss": 0.6119, + "nll_loss": 0.46830782294273376, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.14534030854701996, + "rewards/margins": 0.010157106444239616, + "rewards/rejected": -0.15549740195274353, + "step": 70 + }, + { + "epoch": 0.06, + "grad_norm": 5.53125, + "learning_rate": 4.980119205559974e-06, + "log_odds_chosen": 0.07614605873823166, + "log_odds_ratio": -0.7038400769233704, + "logits/chosen": -3.1640992164611816, + "logits/rejected": -3.1609697341918945, + "logps/chosen": -0.7791558504104614, + "logps/rejected": -0.8135568499565125, + "loss": 0.6066, + "nll_loss": 0.461737722158432, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.15583118796348572, + "rewards/margins": 0.006880161818116903, + "rewards/rejected": -0.16271135210990906, + "step": 80 + }, + { + "epoch": 0.06, + "grad_norm": 5.3125, + "learning_rate": 4.977650744835555e-06, + "log_odds_chosen": 0.07529701292514801, + "log_odds_ratio": -0.6971887946128845, + "logits/chosen": -3.1437430381774902, + "logits/rejected": -3.14058518409729, + "logps/chosen": -0.7142292261123657, + "logps/rejected": -0.7589999437332153, + "loss": 0.5784, + "nll_loss": 0.4407345652580261, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.14284583926200867, + "rewards/margins": 0.008954131975769997, + "rewards/rejected": -0.1517999768257141, + "step": 90 + }, + { + "epoch": 0.07, + "grad_norm": 5.375, + "learning_rate": 4.975185951049947e-06, + "log_odds_chosen": 0.06737220287322998, + "log_odds_ratio": -0.7220064401626587, + "logits/chosen": -3.1887149810791016, + "logits/rejected": -3.1891226768493652, + "logps/chosen": -0.7800450921058655, + "logps/rejected": -0.8231565356254578, + "loss": 0.551, + "nll_loss": 0.4563975930213928, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1560090035200119, + "rewards/margins": 0.008622287772595882, + "rewards/rejected": -0.16463130712509155, + "step": 100 + }, + { + "epoch": 0.08, + "grad_norm": 5.9375, + "learning_rate": 4.972724815133302e-06, + "log_odds_chosen": 0.09414532780647278, + "log_odds_ratio": -0.7080024480819702, + "logits/chosen": -3.1148273944854736, + "logits/rejected": -3.1315975189208984, + "logps/chosen": -0.7162569761276245, + "logps/rejected": -0.7553704977035522, + "loss": 0.5325, + "nll_loss": 0.43158403038978577, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14325138926506042, + "rewards/margins": 0.00782269798219204, + "rewards/rejected": -0.1510740965604782, + "step": 110 + }, + { + "epoch": 0.09, + "grad_norm": 5.8125, + "learning_rate": 4.970267328047151e-06, + "log_odds_chosen": 0.10901321470737457, + "log_odds_ratio": -0.6950778961181641, + "logits/chosen": -3.14462947845459, + "logits/rejected": -3.137329578399658, + "logps/chosen": -0.7042136192321777, + "logps/rejected": -0.7720807194709778, + "loss": 0.5508, + "nll_loss": 0.40320760011672974, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.1408427208662033, + "rewards/margins": 0.0135734211653471, + "rewards/rejected": -0.15441615879535675, + "step": 120 + }, + { + "epoch": 0.09, + "grad_norm": 5.65625, + "learning_rate": 4.9678134807842575e-06, + "log_odds_chosen": 0.07895953953266144, + "log_odds_ratio": -0.7133638262748718, + "logits/chosen": -3.125828266143799, + "logits/rejected": -3.1391282081604004, + "logps/chosen": -0.7350977063179016, + "logps/rejected": -0.7833020091056824, + "loss": 0.5682, + "nll_loss": 0.41112464666366577, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.14701953530311584, + "rewards/margins": 0.009640867821872234, + "rewards/rejected": -0.15666040778160095, + "step": 130 + }, + { + "epoch": 0.1, + "grad_norm": 6.84375, + "learning_rate": 4.965363264368484e-06, + "log_odds_chosen": 0.12207289040088654, + "log_odds_ratio": -0.6769169569015503, + "logits/chosen": -3.14564847946167, + "logits/rejected": -3.1439242362976074, + "logps/chosen": -0.6801249980926514, + "logps/rejected": -0.7512098550796509, + "loss": 0.5738, + "nll_loss": 0.4280276298522949, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.13602499663829803, + "rewards/margins": 0.014216972514986992, + "rewards/rejected": -0.15024198591709137, + "step": 140 + }, + { + "epoch": 0.11, + "grad_norm": 5.71875, + "learning_rate": 4.962916669854652e-06, + "log_odds_chosen": 0.1224343553185463, + "log_odds_ratio": -0.6913517713546753, + "logits/chosen": -3.1227478981018066, + "logits/rejected": -3.1305549144744873, + "logps/chosen": -0.7392982840538025, + "logps/rejected": -0.8002193570137024, + "loss": 0.5956, + "nll_loss": 0.43565186858177185, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14785966277122498, + "rewards/margins": 0.01218421570956707, + "rewards/rejected": -0.1600438803434372, + "step": 150 + }, + { + "epoch": 0.11, + "grad_norm": 5.96875, + "learning_rate": 4.960473688328407e-06, + "log_odds_chosen": 0.0026047558058053255, + "log_odds_ratio": -0.7348340153694153, + "logits/chosen": -3.0425188541412354, + "logits/rejected": -3.0503063201904297, + "logps/chosen": -0.7252613306045532, + "logps/rejected": -0.7286208271980286, + "loss": 0.5688, + "nll_loss": 0.41286152601242065, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1450522541999817, + "rewards/margins": 0.0006719084340147674, + "rewards/rejected": -0.14572417736053467, + "step": 160 + }, + { + "epoch": 0.12, + "grad_norm": 5.1875, + "learning_rate": 4.95803431090608e-06, + "log_odds_chosen": 0.16818314790725708, + "log_odds_ratio": -0.6584939956665039, + "logits/chosen": -3.142646074295044, + "logits/rejected": -3.161475419998169, + "logps/chosen": -0.7002934217453003, + "logps/rejected": -0.7836463451385498, + "loss": 0.5958, + "nll_loss": 0.4563008248806, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.14005866646766663, + "rewards/margins": 0.016670597717165947, + "rewards/rejected": -0.15672926604747772, + "step": 170 + }, + { + "epoch": 0.13, + "grad_norm": 5.4375, + "learning_rate": 4.955598528734554e-06, + "log_odds_chosen": 0.1538088172674179, + "log_odds_ratio": -0.6675732135772705, + "logits/chosen": -3.100310802459717, + "logits/rejected": -3.117645740509033, + "logps/chosen": -0.6987863183021545, + "logps/rejected": -0.7786335945129395, + "loss": 0.5698, + "nll_loss": 0.45257002115249634, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.13975724577903748, + "rewards/margins": 0.01596945896744728, + "rewards/rejected": -0.15572671592235565, + "step": 180 + }, + { + "epoch": 0.14, + "grad_norm": 6.28125, + "learning_rate": 4.953166332991125e-06, + "log_odds_chosen": 0.12078976631164551, + "log_odds_ratio": -0.6859411001205444, + "logits/chosen": -3.134028434753418, + "logits/rejected": -3.137901782989502, + "logps/chosen": -0.6982489824295044, + "logps/rejected": -0.7687091827392578, + "loss": 0.5676, + "nll_loss": 0.43058595061302185, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.13964977860450745, + "rewards/margins": 0.014092043042182922, + "rewards/rejected": -0.15374182164669037, + "step": 190 + }, + { + "epoch": 0.14, + "grad_norm": 6.28125, + "learning_rate": 4.950737714883372e-06, + "log_odds_chosen": 0.17639026045799255, + "log_odds_ratio": -0.655586838722229, + "logits/chosen": -3.102017641067505, + "logits/rejected": -3.1128413677215576, + "logps/chosen": -0.6888889670372009, + "logps/rejected": -0.7917199730873108, + "loss": 0.5431, + "nll_loss": 0.39507898688316345, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.13777779042720795, + "rewards/margins": 0.020566195249557495, + "rewards/rejected": -0.15834400057792664, + "step": 200 + }, + { + "epoch": 0.15, + "grad_norm": 6.21875, + "learning_rate": 4.948312665649022e-06, + "log_odds_chosen": 0.00438351184129715, + "log_odds_ratio": -0.731238603591919, + "logits/chosen": -3.072948932647705, + "logits/rejected": -3.0700042247772217, + "logps/chosen": -0.7648705244064331, + "logps/rejected": -0.7703164219856262, + "loss": 0.5744, + "nll_loss": 0.41652363538742065, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.15297409892082214, + "rewards/margins": 0.001089166384190321, + "rewards/rejected": -0.15406326949596405, + "step": 210 + }, + { + "epoch": 0.16, + "grad_norm": 5.0, + "learning_rate": 4.945891176555817e-06, + "log_odds_chosen": 0.09887596219778061, + "log_odds_ratio": -0.6929382085800171, + "logits/chosen": -3.109269618988037, + "logits/rejected": -3.0964112281799316, + "logps/chosen": -0.7023528218269348, + "logps/rejected": -0.7567712068557739, + "loss": 0.5629, + "nll_loss": 0.4561777710914612, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14047057926654816, + "rewards/margins": 0.010883677750825882, + "rewards/rejected": -0.15135423839092255, + "step": 220 + }, + { + "epoch": 0.16, + "grad_norm": 6.5, + "learning_rate": 4.943473238901383e-06, + "log_odds_chosen": 0.12996645271778107, + "log_odds_ratio": -0.6799687147140503, + "logits/chosen": -3.0217673778533936, + "logits/rejected": -3.0064945220947266, + "logps/chosen": -0.6958892345428467, + "logps/rejected": -0.7603468298912048, + "loss": 0.5877, + "nll_loss": 0.43383994698524475, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1391778290271759, + "rewards/margins": 0.012891518883407116, + "rewards/rejected": -0.15206937491893768, + "step": 230 + }, + { + "epoch": 0.17, + "grad_norm": 6.40625, + "learning_rate": 4.941058844013094e-06, + "log_odds_chosen": 0.15019556879997253, + "log_odds_ratio": -0.665575385093689, + "logits/chosen": -3.062437057495117, + "logits/rejected": -3.067084789276123, + "logps/chosen": -0.7049697041511536, + "logps/rejected": -0.7816177606582642, + "loss": 0.6147, + "nll_loss": 0.48391756415367126, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.14099393784999847, + "rewards/margins": 0.015329602174460888, + "rewards/rejected": -0.15632352232933044, + "step": 240 + }, + { + "epoch": 0.18, + "grad_norm": 6.9375, + "learning_rate": 4.938647983247949e-06, + "log_odds_chosen": 0.0968349426984787, + "log_odds_ratio": -0.6998175382614136, + "logits/chosen": -2.946869134902954, + "logits/rejected": -2.954613208770752, + "logps/chosen": -0.7149877548217773, + "logps/rejected": -0.7582697868347168, + "loss": 0.592, + "nll_loss": 0.4492533206939697, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.14299754798412323, + "rewards/margins": 0.008656383492052555, + "rewards/rejected": -0.1516539454460144, + "step": 250 + }, + { + "epoch": 0.19, + "grad_norm": 5.4375, + "learning_rate": 4.936240647992436e-06, + "log_odds_chosen": 0.13414113223552704, + "log_odds_ratio": -0.670508861541748, + "logits/chosen": -3.0646378993988037, + "logits/rejected": -3.0543510913848877, + "logps/chosen": -0.686463475227356, + "logps/rejected": -0.7646616697311401, + "loss": 0.577, + "nll_loss": 0.4495324194431305, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.13729269802570343, + "rewards/margins": 0.015639634802937508, + "rewards/rejected": -0.1529323160648346, + "step": 260 + }, + { + "epoch": 0.19, + "grad_norm": 5.65625, + "learning_rate": 4.933836829662409e-06, + "log_odds_chosen": 0.11647520214319229, + "log_odds_ratio": -0.6881713271141052, + "logits/chosen": -2.9797616004943848, + "logits/rejected": -2.9717679023742676, + "logps/chosen": -0.6882697939872742, + "logps/rejected": -0.7442315220832825, + "loss": 0.5775, + "nll_loss": 0.42454642057418823, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.13765396177768707, + "rewards/margins": 0.01119234412908554, + "rewards/rejected": -0.14884629845619202, + "step": 270 + }, + { + "epoch": 0.2, + "grad_norm": 5.40625, + "learning_rate": 4.9314365197029475e-06, + "log_odds_chosen": 0.12243346869945526, + "log_odds_ratio": -0.6820363998413086, + "logits/chosen": -3.0487539768218994, + "logits/rejected": -3.0383288860321045, + "logps/chosen": -0.6958305239677429, + "logps/rejected": -0.7610360980033875, + "loss": 0.5734, + "nll_loss": 0.4472366273403168, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.13916611671447754, + "rewards/margins": 0.013041108846664429, + "rewards/rejected": -0.15220721065998077, + "step": 280 + }, + { + "epoch": 0.21, + "grad_norm": 5.46875, + "learning_rate": 4.9290397095882446e-06, + "log_odds_chosen": 0.02017252705991268, + "log_odds_ratio": -0.7348771095275879, + "logits/chosen": -2.9625377655029297, + "logits/rejected": -2.9633870124816895, + "logps/chosen": -0.7289597988128662, + "logps/rejected": -0.7401062250137329, + "loss": 0.5777, + "nll_loss": 0.47131404280662537, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.14579197764396667, + "rewards/margins": 0.0022292803041636944, + "rewards/rejected": -0.14802125096321106, + "step": 290 + }, + { + "epoch": 0.21, + "grad_norm": 5.625, + "learning_rate": 4.9266463908214664e-06, + "log_odds_chosen": 0.13380487263202667, + "log_odds_ratio": -0.6815378069877625, + "logits/chosen": -3.0238821506500244, + "logits/rejected": -3.012429714202881, + "logps/chosen": -0.6980360746383667, + "logps/rejected": -0.772484540939331, + "loss": 0.5416, + "nll_loss": 0.3923744261264801, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.13960722088813782, + "rewards/margins": 0.014889700338244438, + "rewards/rejected": -0.1544969230890274, + "step": 300 + }, + { + "epoch": 0.22, + "grad_norm": 4.8125, + "learning_rate": 4.924256554934632e-06, + "log_odds_chosen": 0.10960634052753448, + "log_odds_ratio": -0.6801453828811646, + "logits/chosen": -2.9736227989196777, + "logits/rejected": -2.9606356620788574, + "logps/chosen": -0.7132256031036377, + "logps/rejected": -0.7729824781417847, + "loss": 0.5434, + "nll_loss": 0.38369157910346985, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.14264513552188873, + "rewards/margins": 0.01195137295871973, + "rewards/rejected": -0.1545964926481247, + "step": 310 + }, + { + "epoch": 0.23, + "grad_norm": 5.84375, + "learning_rate": 4.9218701934884865e-06, + "log_odds_chosen": 0.15664125978946686, + "log_odds_ratio": -0.6632459759712219, + "logits/chosen": -3.026071071624756, + "logits/rejected": -3.011920690536499, + "logps/chosen": -0.7300186157226562, + "logps/rejected": -0.8273404836654663, + "loss": 0.57, + "nll_loss": 0.42472705245018005, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.14600373804569244, + "rewards/margins": 0.01946437731385231, + "rewards/rejected": -0.16546811163425446, + "step": 320 + }, + { + "epoch": 0.24, + "grad_norm": 6.65625, + "learning_rate": 4.919487298072377e-06, + "log_odds_chosen": 0.16137509047985077, + "log_odds_ratio": -0.672458291053772, + "logits/chosen": -2.949476718902588, + "logits/rejected": -2.9505724906921387, + "logps/chosen": -0.6964123249053955, + "logps/rejected": -0.7866235971450806, + "loss": 0.5429, + "nll_loss": 0.4337022304534912, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.1392824798822403, + "rewards/margins": 0.018042229115962982, + "rewards/rejected": -0.15732471644878387, + "step": 330 + }, + { + "epoch": 0.24, + "grad_norm": 6.25, + "learning_rate": 4.917107860304125e-06, + "log_odds_chosen": 0.12605342268943787, + "log_odds_ratio": -0.672644317150116, + "logits/chosen": -3.1077914237976074, + "logits/rejected": -3.0998547077178955, + "logps/chosen": -0.6887668967247009, + "logps/rejected": -0.7507665753364563, + "loss": 0.5602, + "nll_loss": 0.40362483263015747, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.13775338232517242, + "rewards/margins": 0.012399917468428612, + "rewards/rejected": -0.15015330910682678, + "step": 340 + }, + { + "epoch": 0.25, + "grad_norm": 5.625, + "learning_rate": 4.914731871829905e-06, + "log_odds_chosen": 0.0800851434469223, + "log_odds_ratio": -0.6991033554077148, + "logits/chosen": -3.1771771907806396, + "logits/rejected": -3.183584451675415, + "logps/chosen": -0.7016042470932007, + "logps/rejected": -0.7426560521125793, + "loss": 0.5623, + "nll_loss": 0.38557925820350647, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.14032086730003357, + "rewards/margins": 0.008210362866520882, + "rewards/rejected": -0.1485312283039093, + "step": 350 + }, + { + "epoch": 0.26, + "grad_norm": 5.625, + "learning_rate": 4.912359324324121e-06, + "log_odds_chosen": 0.17609532177448273, + "log_odds_ratio": -0.6462699174880981, + "logits/chosen": -3.1602416038513184, + "logits/rejected": -3.1689493656158447, + "logps/chosen": -0.6804074048995972, + "logps/rejected": -0.7664049863815308, + "loss": 0.5685, + "nll_loss": 0.4449498653411865, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1360814869403839, + "rewards/margins": 0.01719951257109642, + "rewards/rejected": -0.15328100323677063, + "step": 360 + }, + { + "epoch": 0.26, + "grad_norm": 5.84375, + "learning_rate": 4.909990209489284e-06, + "log_odds_chosen": 0.09661266207695007, + "log_odds_ratio": -0.6989172697067261, + "logits/chosen": -3.1702992916107178, + "logits/rejected": -3.1816422939300537, + "logps/chosen": -0.6909779906272888, + "logps/rejected": -0.7484806776046753, + "loss": 0.5448, + "nll_loss": 0.3755747377872467, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.13819561898708344, + "rewards/margins": 0.011500529944896698, + "rewards/rejected": -0.14969615638256073, + "step": 370 + }, + { + "epoch": 0.27, + "grad_norm": 5.4375, + "learning_rate": 4.907624519055888e-06, + "log_odds_chosen": 0.053890157490968704, + "log_odds_ratio": -0.7154654860496521, + "logits/chosen": -3.1567158699035645, + "logits/rejected": -3.165017604827881, + "logps/chosen": -0.7759397625923157, + "logps/rejected": -0.8097392320632935, + "loss": 0.5697, + "nll_loss": 0.42835354804992676, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1551879346370697, + "rewards/margins": 0.0067598940804600716, + "rewards/rejected": -0.1619478464126587, + "step": 380 + }, + { + "epoch": 0.28, + "grad_norm": 5.75, + "learning_rate": 4.905262244782294e-06, + "log_odds_chosen": 0.10774964094161987, + "log_odds_ratio": -0.6780427694320679, + "logits/chosen": -3.1538772583007812, + "logits/rejected": -3.139371395111084, + "logps/chosen": -0.72501140832901, + "logps/rejected": -0.7887372970581055, + "loss": 0.5558, + "nll_loss": 0.4034719467163086, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14500227570533752, + "rewards/margins": 0.01274518109858036, + "rewards/rejected": -0.15774747729301453, + "step": 390 + }, + { + "epoch": 0.29, + "grad_norm": 6.4375, + "learning_rate": 4.902903378454601e-06, + "log_odds_chosen": 0.013013715855777264, + "log_odds_ratio": -0.7487560510635376, + "logits/chosen": -3.1208155155181885, + "logits/rejected": -3.1211535930633545, + "logps/chosen": -0.7125126719474792, + "logps/rejected": -0.7153986692428589, + "loss": 0.5383, + "nll_loss": 0.3862176537513733, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.142502561211586, + "rewards/margins": 0.0005772082367911935, + "rewards/rejected": -0.1430797278881073, + "step": 400 + }, + { + "epoch": 0.29, + "grad_norm": 4.53125, + "learning_rate": 4.900547911886537e-06, + "log_odds_chosen": 0.14288419485092163, + "log_odds_ratio": -0.6765764951705933, + "logits/chosen": -3.208242416381836, + "logits/rejected": -3.2231342792510986, + "logps/chosen": -0.7180224657058716, + "logps/rejected": -0.7839994430541992, + "loss": 0.5571, + "nll_loss": 0.4236370921134949, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.14360451698303223, + "rewards/margins": 0.013195383362472057, + "rewards/rejected": -0.15679989755153656, + "step": 410 + }, + { + "epoch": 0.3, + "grad_norm": 4.875, + "learning_rate": 4.898195836919327e-06, + "log_odds_chosen": 0.03629889339208603, + "log_odds_ratio": -0.7236992716789246, + "logits/chosen": -3.2515082359313965, + "logits/rejected": -3.2554900646209717, + "logps/chosen": -0.7256291508674622, + "logps/rejected": -0.7619522213935852, + "loss": 0.5552, + "nll_loss": 0.41900959610939026, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.14512582123279572, + "rewards/margins": 0.0072646364569664, + "rewards/rejected": -0.1523904651403427, + "step": 420 + }, + { + "epoch": 0.31, + "grad_norm": 5.21875, + "learning_rate": 4.895847145421587e-06, + "log_odds_chosen": 0.18708305060863495, + "log_odds_ratio": -0.6497930288314819, + "logits/chosen": -3.263317108154297, + "logits/rejected": -3.263544797897339, + "logps/chosen": -0.6555394530296326, + "logps/rejected": -0.7548009753227234, + "loss": 0.5175, + "nll_loss": 0.386501282453537, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1311078816652298, + "rewards/margins": 0.01985231228172779, + "rewards/rejected": -0.15096020698547363, + "step": 430 + }, + { + "epoch": 0.31, + "grad_norm": 5.46875, + "learning_rate": 4.893501829289195e-06, + "log_odds_chosen": 0.10671345144510269, + "log_odds_ratio": -0.696262001991272, + "logits/chosen": -3.2451579570770264, + "logits/rejected": -3.2367336750030518, + "logps/chosen": -0.711258590221405, + "logps/rejected": -0.7748275995254517, + "loss": 0.5643, + "nll_loss": 0.435871422290802, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14225171506404877, + "rewards/margins": 0.012713788077235222, + "rewards/rejected": -0.15496549010276794, + "step": 440 + }, + { + "epoch": 0.32, + "grad_norm": 5.15625, + "learning_rate": 4.891159880445185e-06, + "log_odds_chosen": -0.009761805646121502, + "log_odds_ratio": -0.7526240348815918, + "logits/chosen": -3.3140366077423096, + "logits/rejected": -3.3279337882995605, + "logps/chosen": -0.7342787981033325, + "logps/rejected": -0.7224690318107605, + "loss": 0.5655, + "nll_loss": 0.3994046151638031, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": -0.14685577154159546, + "rewards/margins": -0.002361953491345048, + "rewards/rejected": -0.14449380338191986, + "step": 450 + }, + { + "epoch": 0.33, + "grad_norm": 5.3125, + "learning_rate": 4.888821290839617e-06, + "log_odds_chosen": 0.08875492960214615, + "log_odds_ratio": -0.7100639343261719, + "logits/chosen": -3.2668724060058594, + "logits/rejected": -3.266616106033325, + "logps/chosen": -0.6991187334060669, + "logps/rejected": -0.7543323040008545, + "loss": 0.5456, + "nll_loss": 0.39074766635894775, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.13982374966144562, + "rewards/margins": 0.011042733676731586, + "rewards/rejected": -0.15086647868156433, + "step": 460 + }, + { + "epoch": 0.34, + "grad_norm": 4.84375, + "learning_rate": 4.886486052449469e-06, + "log_odds_chosen": 0.04209429770708084, + "log_odds_ratio": -0.7359583973884583, + "logits/chosen": -3.2799735069274902, + "logits/rejected": -3.2890267372131348, + "logps/chosen": -0.7149407267570496, + "logps/rejected": -0.7467728853225708, + "loss": 0.5785, + "nll_loss": 0.3849171996116638, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.1429881602525711, + "rewards/margins": 0.006366416811943054, + "rewards/rejected": -0.14935457706451416, + "step": 470 + }, + { + "epoch": 0.34, + "grad_norm": 5.0, + "learning_rate": 4.8841541572785224e-06, + "log_odds_chosen": 0.08705852925777435, + "log_odds_ratio": -0.7012760043144226, + "logits/chosen": -3.229048490524292, + "logits/rejected": -3.2325732707977295, + "logps/chosen": -0.7453920245170593, + "logps/rejected": -0.7918086051940918, + "loss": 0.6028, + "nll_loss": 0.49805039167404175, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14907841384410858, + "rewards/margins": 0.009283310733735561, + "rewards/rejected": -0.15836171805858612, + "step": 480 + }, + { + "epoch": 0.35, + "grad_norm": 5.71875, + "learning_rate": 4.881825597357242e-06, + "log_odds_chosen": 0.12286486476659775, + "log_odds_ratio": -0.6923102736473083, + "logits/chosen": -3.2311508655548096, + "logits/rejected": -3.241130828857422, + "logps/chosen": -0.7206076979637146, + "logps/rejected": -0.7855247259140015, + "loss": 0.5515, + "nll_loss": 0.4401041865348816, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.14412155747413635, + "rewards/margins": 0.012983402237296104, + "rewards/rejected": -0.15710493922233582, + "step": 490 + }, + { + "epoch": 0.36, + "grad_norm": 6.03125, + "learning_rate": 4.8795003647426654e-06, + "log_odds_chosen": 0.08800844103097916, + "log_odds_ratio": -0.71002596616745, + "logits/chosen": -3.240417957305908, + "logits/rejected": -3.249009609222412, + "logps/chosen": -0.7202855348587036, + "logps/rejected": -0.7685345411300659, + "loss": 0.593, + "nll_loss": 0.4520091116428375, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.14405712485313416, + "rewards/margins": 0.00964980386197567, + "rewards/rejected": -0.15370693802833557, + "step": 500 + }, + { + "epoch": 0.36, + "grad_norm": 5.46875, + "learning_rate": 4.877178451518289e-06, + "log_odds_chosen": 0.017683375626802444, + "log_odds_ratio": -0.7400511503219604, + "logits/chosen": -3.2630767822265625, + "logits/rejected": -3.2636375427246094, + "logps/chosen": -0.7481754422187805, + "logps/rejected": -0.7609735727310181, + "loss": 0.5502, + "nll_loss": 0.403683602809906, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14963507652282715, + "rewards/margins": 0.0025596513878554106, + "rewards/rejected": -0.15219472348690033, + "step": 510 + }, + { + "epoch": 0.37, + "grad_norm": 6.5, + "learning_rate": 4.8748598497939494e-06, + "log_odds_chosen": 0.08491934835910797, + "log_odds_ratio": -0.6992539763450623, + "logits/chosen": -3.2163689136505127, + "logits/rejected": -3.2333626747131348, + "logps/chosen": -0.7194134593009949, + "logps/rejected": -0.7636415958404541, + "loss": 0.5908, + "nll_loss": 0.4157852232456207, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.14388270676136017, + "rewards/margins": 0.008845620788633823, + "rewards/rejected": -0.15272831916809082, + "step": 520 + }, + { + "epoch": 0.38, + "grad_norm": 4.84375, + "learning_rate": 4.872544551705718e-06, + "log_odds_chosen": 0.12621939182281494, + "log_odds_ratio": -0.6822875738143921, + "logits/chosen": -3.219284772872925, + "logits/rejected": -3.2353568077087402, + "logps/chosen": -0.7198256254196167, + "logps/rejected": -0.770844578742981, + "loss": 0.5348, + "nll_loss": 0.38905125856399536, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.14396515488624573, + "rewards/margins": 0.010203775018453598, + "rewards/rejected": -0.15416888892650604, + "step": 530 + }, + { + "epoch": 0.39, + "grad_norm": 5.8125, + "learning_rate": 4.870232549415787e-06, + "log_odds_chosen": 0.039147090166807175, + "log_odds_ratio": -0.7419449687004089, + "logits/chosen": -3.215195894241333, + "logits/rejected": -3.216196060180664, + "logps/chosen": -0.7427582144737244, + "logps/rejected": -0.7526665925979614, + "loss": 0.5612, + "nll_loss": 0.4239347577095032, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.14855164289474487, + "rewards/margins": 0.001981673762202263, + "rewards/rejected": -0.15053331851959229, + "step": 540 + }, + { + "epoch": 0.39, + "grad_norm": 5.1875, + "learning_rate": 4.867923835112355e-06, + "log_odds_chosen": 0.12316079437732697, + "log_odds_ratio": -0.6851805448532104, + "logits/chosen": -3.174053907394409, + "logits/rejected": -3.1798911094665527, + "logps/chosen": -0.6940979957580566, + "logps/rejected": -0.7675840258598328, + "loss": 0.5704, + "nll_loss": 0.46909135580062866, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1388196051120758, + "rewards/margins": 0.01469721831381321, + "rewards/rejected": -0.15351681411266327, + "step": 550 + }, + { + "epoch": 0.4, + "grad_norm": 5.4375, + "learning_rate": 4.865618401009519e-06, + "log_odds_chosen": 0.06827814131975174, + "log_odds_ratio": -0.7105705738067627, + "logits/chosen": -3.1506597995758057, + "logits/rejected": -3.1666228771209717, + "logps/chosen": -0.749689519405365, + "logps/rejected": -0.7806918621063232, + "loss": 0.5884, + "nll_loss": 0.44312652945518494, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14993791282176971, + "rewards/margins": 0.0062004560604691505, + "rewards/rejected": -0.1561383754014969, + "step": 560 + }, + { + "epoch": 0.41, + "grad_norm": 4.96875, + "learning_rate": 4.863316239347163e-06, + "log_odds_chosen": 0.12389856576919556, + "log_odds_ratio": -0.6848828196525574, + "logits/chosen": -3.1486778259277344, + "logits/rejected": -3.143662452697754, + "logps/chosen": -0.6603912115097046, + "logps/rejected": -0.7258492112159729, + "loss": 0.5353, + "nll_loss": 0.3620496392250061, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.13207824528217316, + "rewards/margins": 0.013091601431369781, + "rewards/rejected": -0.14516983926296234, + "step": 570 + }, + { + "epoch": 0.41, + "grad_norm": 5.0, + "learning_rate": 4.861017342390847e-06, + "log_odds_chosen": 0.14408716559410095, + "log_odds_ratio": -0.6706200242042542, + "logits/chosen": -3.1252732276916504, + "logits/rejected": -3.120234966278076, + "logps/chosen": -0.700475811958313, + "logps/rejected": -0.7832350730895996, + "loss": 0.5601, + "nll_loss": 0.41865190863609314, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14009517431259155, + "rewards/margins": 0.016551848500967026, + "rewards/rejected": -0.15664701163768768, + "step": 580 + }, + { + "epoch": 0.42, + "grad_norm": 5.4375, + "learning_rate": 4.858721702431704e-06, + "log_odds_chosen": 0.19907112419605255, + "log_odds_ratio": -0.6365267634391785, + "logits/chosen": -3.1869750022888184, + "logits/rejected": -3.1916017532348633, + "logps/chosen": -0.6768631339073181, + "logps/rejected": -0.7860314249992371, + "loss": 0.5486, + "nll_loss": 0.4141230583190918, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.13537263870239258, + "rewards/margins": 0.02183363400399685, + "rewards/rejected": -0.15720628201961517, + "step": 590 + }, + { + "epoch": 0.43, + "grad_norm": 5.1875, + "learning_rate": 4.856429311786322e-06, + "log_odds_chosen": 0.20475217700004578, + "log_odds_ratio": -0.6470059752464294, + "logits/chosen": -3.0911927223205566, + "logits/rejected": -3.088820219039917, + "logps/chosen": -0.7005254030227661, + "logps/rejected": -0.8171119689941406, + "loss": 0.5372, + "nll_loss": 0.4075283110141754, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.14010508358478546, + "rewards/margins": 0.023317309096455574, + "rewards/rejected": -0.1634223908185959, + "step": 600 + }, + { + "epoch": 0.44, + "grad_norm": 5.3125, + "learning_rate": 4.8541401627966426e-06, + "log_odds_chosen": 0.16298583149909973, + "log_odds_ratio": -0.6658545732498169, + "logits/chosen": -3.041769504547119, + "logits/rejected": -3.043297290802002, + "logps/chosen": -0.689085066318512, + "logps/rejected": -0.7849712371826172, + "loss": 0.5575, + "nll_loss": 0.4643743634223938, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13781702518463135, + "rewards/margins": 0.01917722448706627, + "rewards/rejected": -0.15699425339698792, + "step": 610 + }, + { + "epoch": 0.44, + "grad_norm": 4.78125, + "learning_rate": 4.85185424782985e-06, + "log_odds_chosen": 0.12417051941156387, + "log_odds_ratio": -0.68137526512146, + "logits/chosen": -2.961411714553833, + "logits/rejected": -2.9629132747650146, + "logps/chosen": -0.6715599298477173, + "logps/rejected": -0.7414580583572388, + "loss": 0.5462, + "nll_loss": 0.3696933686733246, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.1343119889497757, + "rewards/margins": 0.013979624025523663, + "rewards/rejected": -0.14829161763191223, + "step": 620 + }, + { + "epoch": 0.45, + "grad_norm": 5.28125, + "learning_rate": 4.8495715592782715e-06, + "log_odds_chosen": 0.129594087600708, + "log_odds_ratio": -0.6814179420471191, + "logits/chosen": -3.0220203399658203, + "logits/rejected": -3.007021427154541, + "logps/chosen": -0.7019616365432739, + "logps/rejected": -0.7783767580986023, + "loss": 0.581, + "nll_loss": 0.43684515357017517, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.14039233326911926, + "rewards/margins": 0.0152830109000206, + "rewards/rejected": -0.15567535161972046, + "step": 630 + }, + { + "epoch": 0.46, + "grad_norm": 5.34375, + "learning_rate": 4.847292089559258e-06, + "log_odds_chosen": 0.14772093296051025, + "log_odds_ratio": -0.6776904463768005, + "logits/chosen": -3.015796661376953, + "logits/rejected": -2.998617649078369, + "logps/chosen": -0.6798470616340637, + "logps/rejected": -0.7485564351081848, + "loss": 0.5124, + "nll_loss": 0.3927190601825714, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.13596941530704498, + "rewards/margins": 0.01374187134206295, + "rewards/rejected": -0.14971129596233368, + "step": 640 + }, + { + "epoch": 0.46, + "grad_norm": 5.8125, + "learning_rate": 4.845015831115093e-06, + "log_odds_chosen": 0.22233247756958008, + "log_odds_ratio": -0.6407415866851807, + "logits/chosen": -3.0206289291381836, + "logits/rejected": -2.993927478790283, + "logps/chosen": -0.7096582055091858, + "logps/rejected": -0.8297877311706543, + "loss": 0.5503, + "nll_loss": 0.4199894070625305, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.1419316530227661, + "rewards/margins": 0.024025900289416313, + "rewards/rejected": -0.16595754027366638, + "step": 650 + }, + { + "epoch": 0.47, + "grad_norm": 7.5, + "learning_rate": 4.842742776412874e-06, + "log_odds_chosen": 0.19071760773658752, + "log_odds_ratio": -0.6609346270561218, + "logits/chosen": -2.93975567817688, + "logits/rejected": -2.927455425262451, + "logps/chosen": -0.6851130723953247, + "logps/rejected": -0.7924318313598633, + "loss": 0.5098, + "nll_loss": 0.39896029233932495, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.13702259957790375, + "rewards/margins": 0.02146376296877861, + "rewards/rejected": -0.15848635137081146, + "step": 660 + }, + { + "epoch": 0.48, + "grad_norm": 5.03125, + "learning_rate": 4.840472917944417e-06, + "log_odds_chosen": 0.035539038479328156, + "log_odds_ratio": -0.7433319687843323, + "logits/chosen": -2.91545033454895, + "logits/rejected": -2.904900074005127, + "logps/chosen": -0.7592023015022278, + "logps/rejected": -0.778312087059021, + "loss": 0.5748, + "nll_loss": 0.43543463945388794, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.151840478181839, + "rewards/margins": 0.0038219629786908627, + "rewards/rejected": -0.1556624323129654, + "step": 670 + }, + { + "epoch": 0.49, + "grad_norm": 5.0, + "learning_rate": 4.838206248226147e-06, + "log_odds_chosen": 0.12000226974487305, + "log_odds_ratio": -0.6970423460006714, + "logits/chosen": -2.918330430984497, + "logits/rejected": -2.909175395965576, + "logps/chosen": -0.7081884145736694, + "logps/rejected": -0.7808458209037781, + "loss": 0.5926, + "nll_loss": 0.4864211976528168, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1416376680135727, + "rewards/margins": 0.01453147642314434, + "rewards/rejected": -0.15616916120052338, + "step": 680 + }, + { + "epoch": 0.49, + "grad_norm": 5.125, + "learning_rate": 4.835942759799002e-06, + "log_odds_chosen": 0.12994246184825897, + "log_odds_ratio": -0.679962694644928, + "logits/chosen": -2.9056477546691895, + "logits/rejected": -2.907723903656006, + "logps/chosen": -0.6788502335548401, + "logps/rejected": -0.7463597059249878, + "loss": 0.5609, + "nll_loss": 0.4340883791446686, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.1357700526714325, + "rewards/margins": 0.013501903042197227, + "rewards/rejected": -0.14927193522453308, + "step": 690 + }, + { + "epoch": 0.5, + "grad_norm": 4.6875, + "learning_rate": 4.833682445228318e-06, + "log_odds_chosen": 0.1527804434299469, + "log_odds_ratio": -0.6715155243873596, + "logits/chosen": -2.8825762271881104, + "logits/rejected": -2.878715991973877, + "logps/chosen": -0.7160866856575012, + "logps/rejected": -0.7908951044082642, + "loss": 0.52, + "nll_loss": 0.3986133933067322, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14321734011173248, + "rewards/margins": 0.014961689710617065, + "rewards/rejected": -0.15817902982234955, + "step": 700 + }, + { + "epoch": 0.51, + "grad_norm": 4.84375, + "learning_rate": 4.831425297103738e-06, + "log_odds_chosen": 0.07408870756626129, + "log_odds_ratio": -0.7036418318748474, + "logits/chosen": -2.915374279022217, + "logits/rejected": -2.8941776752471924, + "logps/chosen": -0.7258971929550171, + "logps/rejected": -0.776419460773468, + "loss": 0.5574, + "nll_loss": 0.4164521098136902, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14517942070960999, + "rewards/margins": 0.010104473680257797, + "rewards/rejected": -0.1552838832139969, + "step": 710 + }, + { + "epoch": 0.51, + "grad_norm": 5.25, + "learning_rate": 4.829171308039099e-06, + "log_odds_chosen": 0.11705788224935532, + "log_odds_ratio": -0.6934496164321899, + "logits/chosen": -2.8689353466033936, + "logits/rejected": -2.855515480041504, + "logps/chosen": -0.7816129326820374, + "logps/rejected": -0.8508423566818237, + "loss": 0.5618, + "nll_loss": 0.41237854957580566, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.15632258355617523, + "rewards/margins": 0.013845880515873432, + "rewards/rejected": -0.1701684594154358, + "step": 720 + }, + { + "epoch": 0.52, + "grad_norm": 4.90625, + "learning_rate": 4.826920470672344e-06, + "log_odds_chosen": 0.14946161210536957, + "log_odds_ratio": -0.6791409254074097, + "logits/chosen": -2.920727252960205, + "logits/rejected": -2.9093258380889893, + "logps/chosen": -0.721591055393219, + "logps/rejected": -0.8128422498703003, + "loss": 0.5434, + "nll_loss": 0.4294136166572571, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.14431820809841156, + "rewards/margins": 0.018250230699777603, + "rewards/rejected": -0.16256846487522125, + "step": 730 + }, + { + "epoch": 0.53, + "grad_norm": 4.96875, + "learning_rate": 4.824672777665406e-06, + "log_odds_chosen": 0.1518145054578781, + "log_odds_ratio": -0.6733521819114685, + "logits/chosen": -2.8987679481506348, + "logits/rejected": -2.8861145973205566, + "logps/chosen": -0.7268251180648804, + "logps/rejected": -0.8102623224258423, + "loss": 0.592, + "nll_loss": 0.4659670889377594, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.14536501467227936, + "rewards/margins": 0.01668746955692768, + "rewards/rejected": -0.1620524823665619, + "step": 740 + }, + { + "epoch": 0.54, + "grad_norm": 5.46875, + "learning_rate": 4.822428221704122e-06, + "log_odds_chosen": 0.10895228385925293, + "log_odds_ratio": -0.6847666501998901, + "logits/chosen": -2.9176247119903564, + "logits/rejected": -2.9068551063537598, + "logps/chosen": -0.6662013530731201, + "logps/rejected": -0.7180477976799011, + "loss": 0.5466, + "nll_loss": 0.4058578908443451, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.13324028253555298, + "rewards/margins": 0.010369287803769112, + "rewards/rejected": -0.14360955357551575, + "step": 750 + }, + { + "epoch": 0.54, + "grad_norm": 5.5, + "learning_rate": 4.820186795498119e-06, + "log_odds_chosen": 0.07982759177684784, + "log_odds_ratio": -0.6936464905738831, + "logits/chosen": -2.900437831878662, + "logits/rejected": -2.9012367725372314, + "logps/chosen": -0.684615433216095, + "logps/rejected": -0.7419982552528381, + "loss": 0.5572, + "nll_loss": 0.4167535901069641, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.13692307472229004, + "rewards/margins": 0.011476578190922737, + "rewards/rejected": -0.14839965105056763, + "step": 760 + }, + { + "epoch": 0.55, + "grad_norm": 5.5625, + "learning_rate": 4.817948491780728e-06, + "log_odds_chosen": 0.07513515651226044, + "log_odds_ratio": -0.700663685798645, + "logits/chosen": -2.92631196975708, + "logits/rejected": -2.9068970680236816, + "logps/chosen": -0.701633095741272, + "logps/rejected": -0.7442451119422913, + "loss": 0.5573, + "nll_loss": 0.41745367646217346, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.1403266191482544, + "rewards/margins": 0.008522395975887775, + "rewards/rejected": -0.1488490104675293, + "step": 770 + }, + { + "epoch": 0.56, + "grad_norm": 5.5625, + "learning_rate": 4.815713303308872e-06, + "log_odds_chosen": 0.07833166420459747, + "log_odds_ratio": -0.7058992385864258, + "logits/chosen": -2.877963066101074, + "logits/rejected": -2.8587679862976074, + "logps/chosen": -0.7175201773643494, + "logps/rejected": -0.7572144865989685, + "loss": 0.558, + "nll_loss": 0.4125480055809021, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1435040533542633, + "rewards/margins": 0.00793885625898838, + "rewards/rejected": -0.15144291520118713, + "step": 780 + }, + { + "epoch": 0.56, + "grad_norm": 4.84375, + "learning_rate": 4.813481222862981e-06, + "log_odds_chosen": 0.11849744617938995, + "log_odds_ratio": -0.6830799579620361, + "logits/chosen": -2.9524035453796387, + "logits/rejected": -2.9341073036193848, + "logps/chosen": -0.6966606378555298, + "logps/rejected": -0.7586569786071777, + "loss": 0.5619, + "nll_loss": 0.43152493238449097, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.1393321305513382, + "rewards/margins": 0.012399254366755486, + "rewards/rejected": -0.15173138678073883, + "step": 790 + }, + { + "epoch": 0.57, + "grad_norm": 5.53125, + "learning_rate": 4.811252243246881e-06, + "log_odds_chosen": 0.135534405708313, + "log_odds_ratio": -0.6746788620948792, + "logits/chosen": -2.8978843688964844, + "logits/rejected": -2.8979437351226807, + "logps/chosen": -0.7271771430969238, + "logps/rejected": -0.7996577024459839, + "loss": 0.5428, + "nll_loss": 0.4260443150997162, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.14543543756008148, + "rewards/margins": 0.01449611783027649, + "rewards/rejected": -0.15993155539035797, + "step": 800 + }, + { + "epoch": 0.58, + "grad_norm": 4.3125, + "learning_rate": 4.809026357287709e-06, + "log_odds_chosen": 0.11346729844808578, + "log_odds_ratio": -0.6924766898155212, + "logits/chosen": -2.9434874057769775, + "logits/rejected": -2.931755781173706, + "logps/chosen": -0.6964095830917358, + "logps/rejected": -0.763741135597229, + "loss": 0.5738, + "nll_loss": 0.4434167444705963, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.13928191363811493, + "rewards/margins": 0.013466304168105125, + "rewards/rejected": -0.1527482271194458, + "step": 810 + }, + { + "epoch": 0.59, + "grad_norm": 5.65625, + "learning_rate": 4.806803557835802e-06, + "log_odds_chosen": 0.053010594099760056, + "log_odds_ratio": -0.7078055143356323, + "logits/chosen": -2.951209545135498, + "logits/rejected": -2.9389567375183105, + "logps/chosen": -0.7215791344642639, + "logps/rejected": -0.7523022890090942, + "loss": 0.5626, + "nll_loss": 0.45763856172561646, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14431582391262054, + "rewards/margins": 0.0061446288600564, + "rewards/rejected": -0.15046045184135437, + "step": 820 + }, + { + "epoch": 0.59, + "grad_norm": 5.0625, + "learning_rate": 4.804583837764616e-06, + "log_odds_chosen": 0.12964418530464172, + "log_odds_ratio": -0.6815978288650513, + "logits/chosen": -2.977019786834717, + "logits/rejected": -2.9383797645568848, + "logps/chosen": -0.7318437099456787, + "logps/rejected": -0.8120439648628235, + "loss": 0.5538, + "nll_loss": 0.42555293440818787, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.14636874198913574, + "rewards/margins": 0.01604006253182888, + "rewards/rejected": -0.16240879893302917, + "step": 830 + }, + { + "epoch": 0.6, + "grad_norm": 5.75, + "learning_rate": 4.802367189970616e-06, + "log_odds_chosen": 0.12411677837371826, + "log_odds_ratio": -0.6854800581932068, + "logits/chosen": -2.913602828979492, + "logits/rejected": -2.901832342147827, + "logps/chosen": -0.7199221849441528, + "logps/rejected": -0.792190432548523, + "loss": 0.5405, + "nll_loss": 0.42834392189979553, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.14398446679115295, + "rewards/margins": 0.014453647658228874, + "rewards/rejected": -0.15843810141086578, + "step": 840 + }, + { + "epoch": 0.61, + "grad_norm": 5.3125, + "learning_rate": 4.8001536073731936e-06, + "log_odds_chosen": 0.1903192102909088, + "log_odds_ratio": -0.6434763669967651, + "logits/chosen": -2.9723453521728516, + "logits/rejected": -2.952861785888672, + "logps/chosen": -0.6776723265647888, + "logps/rejected": -0.7700678110122681, + "loss": 0.5276, + "nll_loss": 0.4004877209663391, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13553445041179657, + "rewards/margins": 0.018479080870747566, + "rewards/rejected": -0.15401355922222137, + "step": 850 + }, + { + "epoch": 0.61, + "grad_norm": 4.84375, + "learning_rate": 4.797943082914558e-06, + "log_odds_chosen": 0.1074991226196289, + "log_odds_ratio": -0.6918960809707642, + "logits/chosen": -2.931121349334717, + "logits/rejected": -2.9277279376983643, + "logps/chosen": -0.7141292691230774, + "logps/rejected": -0.7773590087890625, + "loss": 0.5386, + "nll_loss": 0.4104904532432556, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.1428258717060089, + "rewards/margins": 0.012645942158997059, + "rewards/rejected": -0.1554718017578125, + "step": 860 + }, + { + "epoch": 0.62, + "grad_norm": 5.0625, + "learning_rate": 4.795735609559657e-06, + "log_odds_chosen": 0.1122872605919838, + "log_odds_ratio": -0.6973878145217896, + "logits/chosen": -2.918083429336548, + "logits/rejected": -2.9120912551879883, + "logps/chosen": -0.749970555305481, + "logps/rejected": -0.8086145520210266, + "loss": 0.5636, + "nll_loss": 0.4259462356567383, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.1499941051006317, + "rewards/margins": 0.011728787794709206, + "rewards/rejected": -0.16172286868095398, + "step": 870 + }, + { + "epoch": 0.63, + "grad_norm": 5.375, + "learning_rate": 4.793531180296065e-06, + "log_odds_chosen": 0.17277751863002777, + "log_odds_ratio": -0.6646739840507507, + "logits/chosen": -2.9382827281951904, + "logits/rejected": -2.936652421951294, + "logps/chosen": -0.7452108860015869, + "logps/rejected": -0.8357378840446472, + "loss": 0.5698, + "nll_loss": 0.42103928327560425, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14904220402240753, + "rewards/margins": 0.018105393275618553, + "rewards/rejected": -0.16714756190776825, + "step": 880 + }, + { + "epoch": 0.64, + "grad_norm": 5.3125, + "learning_rate": 4.7913297881339085e-06, + "log_odds_chosen": 0.23507757484912872, + "log_odds_ratio": -0.6391969323158264, + "logits/chosen": -2.9719607830047607, + "logits/rejected": -2.9441778659820557, + "logps/chosen": -0.6970862746238708, + "logps/rejected": -0.8258684873580933, + "loss": 0.5789, + "nll_loss": 0.4702020287513733, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.13941726088523865, + "rewards/margins": 0.02575644478201866, + "rewards/rejected": -0.1651737093925476, + "step": 890 + }, + { + "epoch": 0.64, + "grad_norm": 5.40625, + "learning_rate": 4.789131426105757e-06, + "log_odds_chosen": 0.1419156789779663, + "log_odds_ratio": -0.681240975856781, + "logits/chosen": -2.9537596702575684, + "logits/rejected": -2.9550156593322754, + "logps/chosen": -0.6770733594894409, + "logps/rejected": -0.7640056610107422, + "loss": 0.5749, + "nll_loss": 0.40316563844680786, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.13541468977928162, + "rewards/margins": 0.01738644763827324, + "rewards/rejected": -0.15280112624168396, + "step": 900 + }, + { + "epoch": 0.65, + "grad_norm": 5.1875, + "learning_rate": 4.786936087266542e-06, + "log_odds_chosen": 0.16488385200500488, + "log_odds_ratio": -0.6757220029830933, + "logits/chosen": -2.9237289428710938, + "logits/rejected": -2.918750047683716, + "logps/chosen": -0.6665478944778442, + "logps/rejected": -0.7528942823410034, + "loss": 0.5518, + "nll_loss": 0.4029026925563812, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.13330957293510437, + "rewards/margins": 0.017269287258386612, + "rewards/rejected": -0.15057885646820068, + "step": 910 + }, + { + "epoch": 0.66, + "grad_norm": 4.53125, + "learning_rate": 4.784743764693455e-06, + "log_odds_chosen": 0.1091703400015831, + "log_odds_ratio": -0.6948757767677307, + "logits/chosen": -2.9501147270202637, + "logits/rejected": -2.9290926456451416, + "logps/chosen": -0.7138906717300415, + "logps/rejected": -0.7786569595336914, + "loss": 0.5813, + "nll_loss": 0.45604902505874634, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.14277812838554382, + "rewards/margins": 0.012953246012330055, + "rewards/rejected": -0.15573139488697052, + "step": 920 + }, + { + "epoch": 0.66, + "grad_norm": 4.78125, + "learning_rate": 4.7825544514858655e-06, + "log_odds_chosen": 0.12012086063623428, + "log_odds_ratio": -0.6921867728233337, + "logits/chosen": -2.9667954444885254, + "logits/rejected": -2.9569287300109863, + "logps/chosen": -0.7008910179138184, + "logps/rejected": -0.7661630511283875, + "loss": 0.5731, + "nll_loss": 0.44391852617263794, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.14017818868160248, + "rewards/margins": 0.013054423034191132, + "rewards/rejected": -0.153232604265213, + "step": 930 + }, + { + "epoch": 0.67, + "grad_norm": 5.875, + "learning_rate": 4.780368140765222e-06, + "log_odds_chosen": 0.0706237182021141, + "log_odds_ratio": -0.714358925819397, + "logits/chosen": -2.932490348815918, + "logits/rejected": -2.9120583534240723, + "logps/chosen": -0.6620159149169922, + "logps/rejected": -0.7102792859077454, + "loss": 0.5259, + "nll_loss": 0.40104350447654724, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1324031949043274, + "rewards/margins": 0.009652670472860336, + "rewards/rejected": -0.14205586910247803, + "step": 940 + }, + { + "epoch": 0.68, + "grad_norm": 4.875, + "learning_rate": 4.778184825674966e-06, + "log_odds_chosen": 0.17702895402908325, + "log_odds_ratio": -0.6546781063079834, + "logits/chosen": -2.94228196144104, + "logits/rejected": -2.9221675395965576, + "logps/chosen": -0.6707956194877625, + "logps/rejected": -0.7657662630081177, + "loss": 0.5465, + "nll_loss": 0.39552420377731323, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1341591328382492, + "rewards/margins": 0.01899414323270321, + "rewards/rejected": -0.15315327048301697, + "step": 950 + }, + { + "epoch": 0.69, + "grad_norm": 5.09375, + "learning_rate": 4.776004499380439e-06, + "log_odds_chosen": 0.019583452492952347, + "log_odds_ratio": -0.7346916794776917, + "logits/chosen": -2.9015583992004395, + "logits/rejected": -2.9088730812072754, + "logps/chosen": -0.7036567330360413, + "logps/rejected": -0.7168689966201782, + "loss": 0.5455, + "nll_loss": 0.3951405882835388, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.14073136448860168, + "rewards/margins": 0.002642437582835555, + "rewards/rejected": -0.1433737874031067, + "step": 960 + }, + { + "epoch": 0.69, + "grad_norm": 5.40625, + "learning_rate": 4.773827155068793e-06, + "log_odds_chosen": 0.06732301414012909, + "log_odds_ratio": -0.7026981115341187, + "logits/chosen": -2.9094488620758057, + "logits/rejected": -2.902775287628174, + "logps/chosen": -0.7069900631904602, + "logps/rejected": -0.7331331372261047, + "loss": 0.5518, + "nll_loss": 0.3954068422317505, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.14139801263809204, + "rewards/margins": 0.005228628404438496, + "rewards/rejected": -0.14662663638591766, + "step": 970 + }, + { + "epoch": 0.7, + "grad_norm": 4.9375, + "learning_rate": 4.771652785948902e-06, + "log_odds_chosen": 0.11821585893630981, + "log_odds_ratio": -0.6919859647750854, + "logits/chosen": -2.9587864875793457, + "logits/rejected": -2.948873519897461, + "logps/chosen": -0.7012097835540771, + "logps/rejected": -0.7723513841629028, + "loss": 0.5381, + "nll_loss": 0.38139674067497253, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14024195075035095, + "rewards/margins": 0.014228323474526405, + "rewards/rejected": -0.154470294713974, + "step": 980 + }, + { + "epoch": 0.71, + "grad_norm": 5.09375, + "learning_rate": 4.769481385251275e-06, + "log_odds_chosen": 0.10404877364635468, + "log_odds_ratio": -0.7043722867965698, + "logits/chosen": -2.9584336280822754, + "logits/rejected": -2.9519834518432617, + "logps/chosen": -0.6719237565994263, + "logps/rejected": -0.7380385994911194, + "loss": 0.5493, + "nll_loss": 0.43441280722618103, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.13438475131988525, + "rewards/margins": 0.013222972862422466, + "rewards/rejected": -0.1476077288389206, + "step": 990 + }, + { + "epoch": 0.71, + "grad_norm": 5.53125, + "learning_rate": 4.767312946227961e-06, + "log_odds_chosen": 0.12437696754932404, + "log_odds_ratio": -0.6797572374343872, + "logits/chosen": -2.9542737007141113, + "logits/rejected": -2.944174289703369, + "logps/chosen": -0.6895853877067566, + "logps/rejected": -0.7687771916389465, + "loss": 0.5247, + "nll_loss": 0.397632360458374, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13791708648204803, + "rewards/margins": 0.01583835482597351, + "rewards/rejected": -0.15375544130802155, + "step": 1000 + }, + { + "epoch": 0.72, + "grad_norm": 5.59375, + "learning_rate": 4.765147462152471e-06, + "log_odds_chosen": 0.03156871721148491, + "log_odds_ratio": -0.7198628187179565, + "logits/chosen": -2.9656319618225098, + "logits/rejected": -2.9585325717926025, + "logps/chosen": -0.7327839732170105, + "logps/rejected": -0.7419841885566711, + "loss": 0.5595, + "nll_loss": 0.45351147651672363, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.1465567946434021, + "rewards/margins": 0.0018400519620627165, + "rewards/rejected": -0.148396834731102, + "step": 1010 + }, + { + "epoch": 0.73, + "grad_norm": 6.03125, + "learning_rate": 4.762984926319677e-06, + "log_odds_chosen": 0.1008574515581131, + "log_odds_ratio": -0.7012881636619568, + "logits/chosen": -2.900951623916626, + "logits/rejected": -2.913351058959961, + "logps/chosen": -0.7476619482040405, + "logps/rejected": -0.7988759279251099, + "loss": 0.5313, + "nll_loss": 0.3712484538555145, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.14953239262104034, + "rewards/margins": 0.01024280209094286, + "rewards/rejected": -0.15977518260478973, + "step": 1020 + }, + { + "epoch": 0.74, + "grad_norm": 4.875, + "learning_rate": 4.760825332045738e-06, + "log_odds_chosen": 0.1647627353668213, + "log_odds_ratio": -0.6773719787597656, + "logits/chosen": -2.916574239730835, + "logits/rejected": -2.9114716053009033, + "logps/chosen": -0.732185959815979, + "logps/rejected": -0.820814311504364, + "loss": 0.563, + "nll_loss": 0.40981560945510864, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.14643718302249908, + "rewards/margins": 0.01772570051252842, + "rewards/rejected": -0.16416288912296295, + "step": 1030 + }, + { + "epoch": 0.74, + "grad_norm": 4.59375, + "learning_rate": 4.758668672668006e-06, + "log_odds_chosen": -0.007136444561183453, + "log_odds_ratio": -0.7459918260574341, + "logits/chosen": -2.950622797012329, + "logits/rejected": -2.935281276702881, + "logps/chosen": -0.7756951451301575, + "logps/rejected": -0.7845336198806763, + "loss": 0.5498, + "nll_loss": 0.4428860545158386, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.1551390290260315, + "rewards/margins": 0.001767703564837575, + "rewards/rejected": -0.15690675377845764, + "step": 1040 + }, + { + "epoch": 0.75, + "grad_norm": 5.3125, + "learning_rate": 4.756514941544941e-06, + "log_odds_chosen": 0.22850975394248962, + "log_odds_ratio": -0.6434667110443115, + "logits/chosen": -2.916316509246826, + "logits/rejected": -2.900111675262451, + "logps/chosen": -0.7090237736701965, + "logps/rejected": -0.8277368545532227, + "loss": 0.5347, + "nll_loss": 0.39760535955429077, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.1418047398328781, + "rewards/margins": 0.02374262548983097, + "rewards/rejected": -0.16554740071296692, + "step": 1050 + }, + { + "epoch": 0.76, + "grad_norm": 5.0, + "learning_rate": 4.754364132056025e-06, + "log_odds_chosen": 0.23557403683662415, + "log_odds_ratio": -0.6366820931434631, + "logits/chosen": -2.9666476249694824, + "logits/rejected": -2.964963436126709, + "logps/chosen": -0.6721023917198181, + "logps/rejected": -0.7975376844406128, + "loss": 0.5546, + "nll_loss": 0.43326544761657715, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.1344204843044281, + "rewards/margins": 0.025087062269449234, + "rewards/rejected": -0.15950754284858704, + "step": 1060 + }, + { + "epoch": 0.76, + "grad_norm": 4.375, + "learning_rate": 4.752216237601676e-06, + "log_odds_chosen": 0.17871122062206268, + "log_odds_ratio": -0.6607886552810669, + "logits/chosen": -2.9594788551330566, + "logits/rejected": -2.949906349182129, + "logps/chosen": -0.6754817366600037, + "logps/rejected": -0.761304497718811, + "loss": 0.5284, + "nll_loss": 0.41694697737693787, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.13509634137153625, + "rewards/margins": 0.0171645637601614, + "rewards/rejected": -0.1522609144449234, + "step": 1070 + }, + { + "epoch": 0.77, + "grad_norm": 5.21875, + "learning_rate": 4.750071251603165e-06, + "log_odds_chosen": 0.10223817825317383, + "log_odds_ratio": -0.6892545819282532, + "logits/chosen": -2.91923189163208, + "logits/rejected": -2.9072306156158447, + "logps/chosen": -0.700354814529419, + "logps/rejected": -0.7621926069259644, + "loss": 0.5481, + "nll_loss": 0.4226892590522766, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.14007095992565155, + "rewards/margins": 0.01236753724515438, + "rewards/rejected": -0.15243850648403168, + "step": 1080 + }, + { + "epoch": 0.78, + "grad_norm": 5.5625, + "learning_rate": 4.7479291675025314e-06, + "log_odds_chosen": 0.16114802658557892, + "log_odds_ratio": -0.6601124405860901, + "logits/chosen": -2.9457671642303467, + "logits/rejected": -2.9389617443084717, + "logps/chosen": -0.6818099617958069, + "logps/rejected": -0.7652055025100708, + "loss": 0.5778, + "nll_loss": 0.4147875905036926, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.1363619863986969, + "rewards/margins": 0.01667911931872368, + "rewards/rejected": -0.15304109454154968, + "step": 1090 + }, + { + "epoch": 0.79, + "grad_norm": 4.625, + "learning_rate": 4.745789978762496e-06, + "log_odds_chosen": 0.08631005138158798, + "log_odds_ratio": -0.7082911729812622, + "logits/chosen": -2.927237033843994, + "logits/rejected": -2.929316997528076, + "logps/chosen": -0.7573009729385376, + "logps/rejected": -0.8193367719650269, + "loss": 0.5399, + "nll_loss": 0.4314785897731781, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.151460200548172, + "rewards/margins": 0.012407159432768822, + "rewards/rejected": -0.16386735439300537, + "step": 1100 + }, + { + "epoch": 0.79, + "grad_norm": 4.5625, + "learning_rate": 4.7436536788663765e-06, + "log_odds_chosen": 0.07461805641651154, + "log_odds_ratio": -0.710638701915741, + "logits/chosen": -2.954481840133667, + "logits/rejected": -2.947251796722412, + "logps/chosen": -0.7126928567886353, + "logps/rejected": -0.7473545074462891, + "loss": 0.5331, + "nll_loss": 0.39857620000839233, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.14253857731819153, + "rewards/margins": 0.006932335905730724, + "rewards/rejected": -0.14947089552879333, + "step": 1110 + }, + { + "epoch": 0.8, + "grad_norm": 5.59375, + "learning_rate": 4.74152026131801e-06, + "log_odds_chosen": 0.06558915972709656, + "log_odds_ratio": -0.7165695428848267, + "logits/chosen": -2.9481871128082275, + "logits/rejected": -2.934065341949463, + "logps/chosen": -0.7117661237716675, + "logps/rejected": -0.7454390525817871, + "loss": 0.5532, + "nll_loss": 0.41643962264060974, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.14235322177410126, + "rewards/margins": 0.006734585855156183, + "rewards/rejected": -0.1490878164768219, + "step": 1120 + }, + { + "epoch": 0.81, + "grad_norm": 5.25, + "learning_rate": 4.739389719641665e-06, + "log_odds_chosen": 0.038577549159526825, + "log_odds_ratio": -0.7274538278579712, + "logits/chosen": -2.9647698402404785, + "logits/rejected": -2.953193187713623, + "logps/chosen": -0.7552378177642822, + "logps/rejected": -0.767500102519989, + "loss": 0.5665, + "nll_loss": 0.42185306549072266, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.15104755759239197, + "rewards/margins": 0.002452465472742915, + "rewards/rejected": -0.1535000503063202, + "step": 1130 + }, + { + "epoch": 0.81, + "grad_norm": 5.78125, + "learning_rate": 4.7372620473819615e-06, + "log_odds_chosen": 0.14568562805652618, + "log_odds_ratio": -0.6789036989212036, + "logits/chosen": -2.9319028854370117, + "logits/rejected": -2.9229094982147217, + "logps/chosen": -0.6954749822616577, + "logps/rejected": -0.7834473848342896, + "loss": 0.5403, + "nll_loss": 0.4035823345184326, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.1390949785709381, + "rewards/margins": 0.01759449765086174, + "rewards/rejected": -0.15668947994709015, + "step": 1140 + }, + { + "epoch": 0.82, + "grad_norm": 5.09375, + "learning_rate": 4.735137238103785e-06, + "log_odds_chosen": -0.04453912377357483, + "log_odds_ratio": -0.7690579295158386, + "logits/chosen": -2.9604084491729736, + "logits/rejected": -2.9683547019958496, + "logps/chosen": -0.7146620750427246, + "logps/rejected": -0.6833995580673218, + "loss": 0.5543, + "nll_loss": 0.4193571209907532, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.14293241500854492, + "rewards/margins": -0.006252489052712917, + "rewards/rejected": -0.13667991757392883, + "step": 1150 + }, + { + "epoch": 0.83, + "grad_norm": 4.59375, + "learning_rate": 4.7330152853922064e-06, + "log_odds_chosen": 0.10607735067605972, + "log_odds_ratio": -0.6870900988578796, + "logits/chosen": -2.933436870574951, + "logits/rejected": -2.926907777786255, + "logps/chosen": -0.707848846912384, + "logps/rejected": -0.7633363008499146, + "loss": 0.5204, + "nll_loss": 0.40916410088539124, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.14156976342201233, + "rewards/margins": 0.011097497306764126, + "rewards/rejected": -0.15266726911067963, + "step": 1160 + }, + { + "epoch": 0.84, + "grad_norm": 5.03125, + "learning_rate": 4.730896182852409e-06, + "log_odds_chosen": 0.11698383092880249, + "log_odds_ratio": -0.6935534477233887, + "logits/chosen": -2.9313931465148926, + "logits/rejected": -2.911740779876709, + "logps/chosen": -0.7008348107337952, + "logps/rejected": -0.7741026282310486, + "loss": 0.5431, + "nll_loss": 0.4040239453315735, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1401669681072235, + "rewards/margins": 0.014653565362095833, + "rewards/rejected": -0.1548205316066742, + "step": 1170 + }, + { + "epoch": 0.84, + "grad_norm": 4.28125, + "learning_rate": 4.72877992410959e-06, + "log_odds_chosen": 0.10059946775436401, + "log_odds_ratio": -0.7010030150413513, + "logits/chosen": -2.946838855743408, + "logits/rejected": -2.9204533100128174, + "logps/chosen": -0.6578399538993835, + "logps/rejected": -0.7220724821090698, + "loss": 0.5386, + "nll_loss": 0.3835596740245819, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.13156801462173462, + "rewards/margins": 0.012846499681472778, + "rewards/rejected": -0.1444145143032074, + "step": 1180 + }, + { + "epoch": 0.85, + "grad_norm": 4.84375, + "learning_rate": 4.7266665028088985e-06, + "log_odds_chosen": 0.08397103101015091, + "log_odds_ratio": -0.7051702737808228, + "logits/chosen": -2.93192982673645, + "logits/rejected": -2.9253973960876465, + "logps/chosen": -0.6880505681037903, + "logps/rejected": -0.7314938306808472, + "loss": 0.5324, + "nll_loss": 0.3771182596683502, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.13761012256145477, + "rewards/margins": 0.008688644506037235, + "rewards/rejected": -0.14629876613616943, + "step": 1190 + }, + { + "epoch": 0.86, + "grad_norm": 5.0, + "learning_rate": 4.72455591261534e-06, + "log_odds_chosen": 0.10587283223867416, + "log_odds_ratio": -0.6958147287368774, + "logits/chosen": -2.9291391372680664, + "logits/rejected": -2.931337833404541, + "logps/chosen": -0.6905062794685364, + "logps/rejected": -0.7565279603004456, + "loss": 0.5432, + "nll_loss": 0.36201146245002747, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.138101264834404, + "rewards/margins": 0.013204338029026985, + "rewards/rejected": -0.15130558609962463, + "step": 1200 + }, + { + "epoch": 0.86, + "grad_norm": 5.28125, + "learning_rate": 4.722448147213712e-06, + "log_odds_chosen": 0.16628727316856384, + "log_odds_ratio": -0.6715134382247925, + "logits/chosen": -2.9251606464385986, + "logits/rejected": -2.924400806427002, + "logps/chosen": -0.7159217000007629, + "logps/rejected": -0.7908354997634888, + "loss": 0.5327, + "nll_loss": 0.4329577088356018, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.1431843340396881, + "rewards/margins": 0.01498276274651289, + "rewards/rejected": -0.15816709399223328, + "step": 1210 + }, + { + "epoch": 0.87, + "grad_norm": 5.34375, + "learning_rate": 4.720343200308507e-06, + "log_odds_chosen": 0.09120135009288788, + "log_odds_ratio": -0.7072020769119263, + "logits/chosen": -2.918830394744873, + "logits/rejected": -2.9098925590515137, + "logps/chosen": -0.7446082830429077, + "logps/rejected": -0.7945619821548462, + "loss": 0.5726, + "nll_loss": 0.4186829924583435, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.1489216834306717, + "rewards/margins": 0.009990743361413479, + "rewards/rejected": -0.15891240537166595, + "step": 1220 + }, + { + "epoch": 0.88, + "grad_norm": 5.15625, + "learning_rate": 4.7182410656238484e-06, + "log_odds_chosen": 0.08639942854642868, + "log_odds_ratio": -0.7023371458053589, + "logits/chosen": -2.883063793182373, + "logits/rejected": -2.876861095428467, + "logps/chosen": -0.7035251259803772, + "logps/rejected": -0.7428823113441467, + "loss": 0.5529, + "nll_loss": 0.3646206855773926, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.14070501923561096, + "rewards/margins": 0.007871445268392563, + "rewards/rejected": -0.14857646822929382, + "step": 1230 + }, + { + "epoch": 0.89, + "grad_norm": 5.03125, + "learning_rate": 4.716141736903407e-06, + "log_odds_chosen": 0.17533931136131287, + "log_odds_ratio": -0.6885548830032349, + "logits/chosen": -2.8661389350891113, + "logits/rejected": -2.851670980453491, + "logps/chosen": -0.7199305295944214, + "logps/rejected": -0.8365727663040161, + "loss": 0.5332, + "nll_loss": 0.41772204637527466, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.14398609101772308, + "rewards/margins": 0.02332843840122223, + "rewards/rejected": -0.1673145294189453, + "step": 1240 + }, + { + "epoch": 0.89, + "grad_norm": 5.125, + "learning_rate": 4.714045207910318e-06, + "log_odds_chosen": 0.13246873021125793, + "log_odds_ratio": -0.6706294417381287, + "logits/chosen": -2.8975651264190674, + "logits/rejected": -2.872434616088867, + "logps/chosen": -0.6649607419967651, + "logps/rejected": -0.7357169389724731, + "loss": 0.5467, + "nll_loss": 0.41414278745651245, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.13299211859703064, + "rewards/margins": 0.014151250943541527, + "rewards/rejected": -0.1471433937549591, + "step": 1250 + }, + { + "epoch": 0.9, + "grad_norm": 5.0625, + "learning_rate": 4.71195147242711e-06, + "log_odds_chosen": 0.040602125227451324, + "log_odds_ratio": -0.7303158640861511, + "logits/chosen": -2.8778679370880127, + "logits/rejected": -2.8803482055664062, + "logps/chosen": -0.7029106020927429, + "logps/rejected": -0.7276321649551392, + "loss": 0.559, + "nll_loss": 0.4467160701751709, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1405821144580841, + "rewards/margins": 0.004944324027746916, + "rewards/rejected": -0.1455264538526535, + "step": 1260 + }, + { + "epoch": 0.91, + "grad_norm": 8.0, + "learning_rate": 4.709860524255622e-06, + "log_odds_chosen": 0.09002572298049927, + "log_odds_ratio": -0.7032243609428406, + "logits/chosen": -2.8805668354034424, + "logits/rejected": -2.860121965408325, + "logps/chosen": -0.718974232673645, + "logps/rejected": -0.7800055742263794, + "loss": 0.5319, + "nll_loss": 0.3927960991859436, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14379484951496124, + "rewards/margins": 0.012206263840198517, + "rewards/rejected": -0.15600110590457916, + "step": 1270 + }, + { + "epoch": 0.91, + "grad_norm": 5.125, + "learning_rate": 4.707772357216934e-06, + "log_odds_chosen": 0.2139289379119873, + "log_odds_ratio": -0.6507673859596252, + "logits/chosen": -2.9283065795898438, + "logits/rejected": -2.909491777420044, + "logps/chosen": -0.666816234588623, + "logps/rejected": -0.7714813947677612, + "loss": 0.5357, + "nll_loss": 0.4001480042934418, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.1333632469177246, + "rewards/margins": 0.020933035761117935, + "rewards/rejected": -0.15429629385471344, + "step": 1280 + }, + { + "epoch": 0.92, + "grad_norm": 5.21875, + "learning_rate": 4.705686965151282e-06, + "log_odds_chosen": 0.08337760716676712, + "log_odds_ratio": -0.6977395415306091, + "logits/chosen": -2.866161584854126, + "logits/rejected": -2.8568389415740967, + "logps/chosen": -0.6980268955230713, + "logps/rejected": -0.7350268959999084, + "loss": 0.5858, + "nll_loss": 0.44848671555519104, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.13960537314414978, + "rewards/margins": 0.007400007452815771, + "rewards/rejected": -0.14700539410114288, + "step": 1290 + }, + { + "epoch": 0.93, + "grad_norm": 5.09375, + "learning_rate": 4.703604341917987e-06, + "log_odds_chosen": 0.16698376834392548, + "log_odds_ratio": -0.6709255576133728, + "logits/chosen": -2.8881678581237793, + "logits/rejected": -2.8717174530029297, + "logps/chosen": -0.6411119699478149, + "logps/rejected": -0.7108407616615295, + "loss": 0.5404, + "nll_loss": 0.39268070459365845, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.12822240591049194, + "rewards/margins": 0.01394575648009777, + "rewards/rejected": -0.14216816425323486, + "step": 1300 + }, + { + "epoch": 0.94, + "grad_norm": 5.125, + "learning_rate": 4.701524481395374e-06, + "log_odds_chosen": 0.04918034002184868, + "log_odds_ratio": -0.7179148197174072, + "logits/chosen": -2.8727264404296875, + "logits/rejected": -2.858916759490967, + "logps/chosen": -0.7131275534629822, + "logps/rejected": -0.7290714979171753, + "loss": 0.5486, + "nll_loss": 0.37463003396987915, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14262551069259644, + "rewards/margins": 0.0031887758523225784, + "rewards/rejected": -0.14581426978111267, + "step": 1310 + }, + { + "epoch": 0.94, + "grad_norm": 4.875, + "learning_rate": 4.699447377480703e-06, + "log_odds_chosen": 0.1447034478187561, + "log_odds_ratio": -0.6646226644515991, + "logits/chosen": -2.8924479484558105, + "logits/rejected": -2.8799147605895996, + "logps/chosen": -0.7005911469459534, + "logps/rejected": -0.773948073387146, + "loss": 0.5247, + "nll_loss": 0.38711321353912354, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.14011821150779724, + "rewards/margins": 0.01467139832675457, + "rewards/rejected": -0.15478962659835815, + "step": 1320 + }, + { + "epoch": 0.95, + "grad_norm": 5.125, + "learning_rate": 4.6973730240900876e-06, + "log_odds_chosen": 0.18853728473186493, + "log_odds_ratio": -0.6440411806106567, + "logits/chosen": -2.9185266494750977, + "logits/rejected": -2.888824939727783, + "logps/chosen": -0.6820311546325684, + "logps/rejected": -0.7798157334327698, + "loss": 0.5488, + "nll_loss": 0.4185566008090973, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.13640624284744263, + "rewards/margins": 0.019556904211640358, + "rewards/rejected": -0.15596315264701843, + "step": 1330 + }, + { + "epoch": 0.96, + "grad_norm": 5.3125, + "learning_rate": 4.695301415158426e-06, + "log_odds_chosen": 0.10568451881408691, + "log_odds_ratio": -0.7053590416908264, + "logits/chosen": -2.9057796001434326, + "logits/rejected": -2.896315813064575, + "logps/chosen": -0.7402433753013611, + "logps/rejected": -0.7852991223335266, + "loss": 0.5757, + "nll_loss": 0.44732385873794556, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.14804866909980774, + "rewards/margins": 0.00901114847511053, + "rewards/rejected": -0.15705981850624084, + "step": 1340 + }, + { + "epoch": 0.96, + "grad_norm": 4.9375, + "learning_rate": 4.693232544639321e-06, + "log_odds_chosen": 0.1334969699382782, + "log_odds_ratio": -0.6749390959739685, + "logits/chosen": -2.9262282848358154, + "logits/rejected": -2.9031224250793457, + "logps/chosen": -0.7033289670944214, + "logps/rejected": -0.7720553874969482, + "loss": 0.5335, + "nll_loss": 0.38161200284957886, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.14066579937934875, + "rewards/margins": 0.013745295815169811, + "rewards/rejected": -0.15441109240055084, + "step": 1350 + }, + { + "epoch": 0.97, + "grad_norm": 5.53125, + "learning_rate": 4.691166406505011e-06, + "log_odds_chosen": 0.20113444328308105, + "log_odds_ratio": -0.6593700051307678, + "logits/chosen": -2.8720855712890625, + "logits/rejected": -2.865278959274292, + "logps/chosen": -0.6543204188346863, + "logps/rejected": -0.7732547521591187, + "loss": 0.5593, + "nll_loss": 0.4312739372253418, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.13086409866809845, + "rewards/margins": 0.023786863312125206, + "rewards/rejected": -0.1546509563922882, + "step": 1360 + }, + { + "epoch": 0.98, + "grad_norm": 5.5, + "learning_rate": 4.689102994746289e-06, + "log_odds_chosen": 0.11934226751327515, + "log_odds_ratio": -0.6696735620498657, + "logits/chosen": -2.879538059234619, + "logits/rejected": -2.8727288246154785, + "logps/chosen": -0.673319399356842, + "logps/rejected": -0.7296326756477356, + "loss": 0.5975, + "nll_loss": 0.44223251938819885, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.1346638798713684, + "rewards/margins": 0.011262651532888412, + "rewards/rejected": -0.14592652022838593, + "step": 1370 + }, + { + "epoch": 0.99, + "grad_norm": 5.5, + "learning_rate": 4.687042303372439e-06, + "log_odds_chosen": 0.11940214782953262, + "log_odds_ratio": -0.6887474060058594, + "logits/chosen": -2.9131243228912354, + "logits/rejected": -2.9096319675445557, + "logps/chosen": -0.6941181421279907, + "logps/rejected": -0.7482253313064575, + "loss": 0.5402, + "nll_loss": 0.40905994176864624, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.13882364332675934, + "rewards/margins": 0.010821421630680561, + "rewards/rejected": -0.14964506030082703, + "step": 1380 + }, + { + "epoch": 0.99, + "grad_norm": 4.25, + "learning_rate": 4.684984326411154e-06, + "log_odds_chosen": 0.09887897968292236, + "log_odds_ratio": -0.6886764764785767, + "logits/chosen": -2.8908066749572754, + "logits/rejected": -2.8787262439727783, + "logps/chosen": -0.679020881652832, + "logps/rejected": -0.7298253774642944, + "loss": 0.5263, + "nll_loss": 0.39224857091903687, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.1358041763305664, + "rewards/margins": 0.010160907171666622, + "rewards/rejected": -0.1459650695323944, + "step": 1390 + }, + { + "epoch": 1.0, + "grad_norm": 4.65625, + "learning_rate": 4.68292905790847e-06, + "log_odds_chosen": 0.2600487768650055, + "log_odds_ratio": -0.6203187704086304, + "logits/chosen": -2.9187633991241455, + "logits/rejected": -2.9031124114990234, + "logps/chosen": -0.635817289352417, + "logps/rejected": -0.757480800151825, + "loss": 0.5639, + "nll_loss": 0.42110905051231384, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.12716346979141235, + "rewards/margins": 0.02433270588517189, + "rewards/rejected": -0.15149617195129395, + "step": 1400 + }, + { + "epoch": 1.01, + "grad_norm": 6.03125, + "learning_rate": 4.6808764919286885e-06, + "log_odds_chosen": 0.8848162889480591, + "log_odds_ratio": -0.37814536690711975, + "logits/chosen": -2.9017772674560547, + "logits/rejected": -2.8844223022460938, + "logps/chosen": -0.5128706097602844, + "logps/rejected": -0.9588940739631653, + "loss": 0.4309, + "nll_loss": 0.3441932797431946, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10257412493228912, + "rewards/margins": 0.08920470625162125, + "rewards/rejected": -0.19177880883216858, + "step": 1410 + }, + { + "epoch": 1.01, + "grad_norm": 4.53125, + "learning_rate": 4.678826622554307e-06, + "log_odds_chosen": 0.9211248159408569, + "log_odds_ratio": -0.3970174491405487, + "logits/chosen": -2.8897476196289062, + "logits/rejected": -2.877735137939453, + "logps/chosen": -0.47735509276390076, + "logps/rejected": -0.9233940839767456, + "loss": 0.4206, + "nll_loss": 0.31756311655044556, + "rewards/accuracies": 0.893750011920929, + "rewards/chosen": -0.09547100961208344, + "rewards/margins": 0.08920779824256897, + "rewards/rejected": -0.1846788227558136, + "step": 1420 + }, + { + "epoch": 1.02, + "grad_norm": 4.9375, + "learning_rate": 4.676779443885949e-06, + "log_odds_chosen": 1.0763452053070068, + "log_odds_ratio": -0.3526865839958191, + "logits/chosen": -2.8960213661193848, + "logits/rejected": -2.8801820278167725, + "logps/chosen": -0.512055516242981, + "logps/rejected": -1.0564101934432983, + "loss": 0.3946, + "nll_loss": 0.3245953619480133, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.10241111367940903, + "rewards/margins": 0.1088709607720375, + "rewards/rejected": -0.21128205955028534, + "step": 1430 + }, + { + "epoch": 1.03, + "grad_norm": 5.4375, + "learning_rate": 4.674734950042287e-06, + "log_odds_chosen": 1.138904333114624, + "log_odds_ratio": -0.33328309655189514, + "logits/chosen": -2.8903861045837402, + "logits/rejected": -2.884766101837158, + "logps/chosen": -0.4885633885860443, + "logps/rejected": -1.0382764339447021, + "loss": 0.3688, + "nll_loss": 0.31521254777908325, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.0977126881480217, + "rewards/margins": 0.10994259268045425, + "rewards/rejected": -0.20765526592731476, + "step": 1440 + }, + { + "epoch": 1.04, + "grad_norm": 4.78125, + "learning_rate": 4.672693135159978e-06, + "log_odds_chosen": 1.0414109230041504, + "log_odds_ratio": -0.3578895330429077, + "logits/chosen": -2.8687214851379395, + "logits/rejected": -2.853811264038086, + "logps/chosen": -0.5137967467308044, + "logps/rejected": -1.0513535737991333, + "loss": 0.3982, + "nll_loss": 0.3380189538002014, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10275934636592865, + "rewards/margins": 0.10751136392354965, + "rewards/rejected": -0.2102707177400589, + "step": 1450 + }, + { + "epoch": 1.04, + "grad_norm": 6.65625, + "learning_rate": 4.67065399339359e-06, + "log_odds_chosen": 1.1234166622161865, + "log_odds_ratio": -0.35121458768844604, + "logits/chosen": -2.861860752105713, + "logits/rejected": -2.8768668174743652, + "logps/chosen": -0.49000921845436096, + "logps/rejected": -1.0480070114135742, + "loss": 0.3922, + "nll_loss": 0.31293317675590515, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09800183773040771, + "rewards/margins": 0.11159957945346832, + "rewards/rejected": -0.20960143208503723, + "step": 1460 + }, + { + "epoch": 1.05, + "grad_norm": 6.96875, + "learning_rate": 4.668617518915533e-06, + "log_odds_chosen": 1.04032301902771, + "log_odds_ratio": -0.35156646370887756, + "logits/chosen": -2.8752763271331787, + "logits/rejected": -2.8781304359436035, + "logps/chosen": -0.5017446279525757, + "logps/rejected": -1.0297907590866089, + "loss": 0.3846, + "nll_loss": 0.31080394983291626, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.10034892708063126, + "rewards/margins": 0.1056092381477356, + "rewards/rejected": -0.20595815777778625, + "step": 1470 + }, + { + "epoch": 1.06, + "grad_norm": 5.84375, + "learning_rate": 4.666583705915985e-06, + "log_odds_chosen": 1.1882513761520386, + "log_odds_ratio": -0.32196754217147827, + "logits/chosen": -2.867017984390259, + "logits/rejected": -2.8608901500701904, + "logps/chosen": -0.4866812825202942, + "logps/rejected": -1.108413577079773, + "loss": 0.4034, + "nll_loss": 0.30916067957878113, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.09733626246452332, + "rewards/margins": 0.12434647977352142, + "rewards/rejected": -0.22168274223804474, + "step": 1480 + }, + { + "epoch": 1.06, + "grad_norm": 5.90625, + "learning_rate": 4.664552548602825e-06, + "log_odds_chosen": 1.2755037546157837, + "log_odds_ratio": -0.3192000985145569, + "logits/chosen": -2.8904194831848145, + "logits/rejected": -2.887146472930908, + "logps/chosen": -0.4415016174316406, + "logps/rejected": -1.0573655366897583, + "loss": 0.375, + "nll_loss": 0.30936262011528015, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.08830033242702484, + "rewards/margins": 0.12317276000976562, + "rewards/rejected": -0.21147307753562927, + "step": 1490 + }, + { + "epoch": 1.07, + "grad_norm": 6.3125, + "learning_rate": 4.662524041201569e-06, + "log_odds_chosen": 1.2209784984588623, + "log_odds_ratio": -0.31655097007751465, + "logits/chosen": -2.8983540534973145, + "logits/rejected": -2.876600742340088, + "logps/chosen": -0.5172096490859985, + "logps/rejected": -1.1844242811203003, + "loss": 0.416, + "nll_loss": 0.3785460293292999, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.10344193130731583, + "rewards/margins": 0.1334429532289505, + "rewards/rejected": -0.23688487708568573, + "step": 1500 + }, + { + "epoch": 1.08, + "grad_norm": 6.1875, + "learning_rate": 4.660498177955291e-06, + "log_odds_chosen": 1.1757891178131104, + "log_odds_ratio": -0.316308856010437, + "logits/chosen": -2.8644537925720215, + "logits/rejected": -2.8410959243774414, + "logps/chosen": -0.49577221274375916, + "logps/rejected": -1.1109702587127686, + "loss": 0.4093, + "nll_loss": 0.3233625888824463, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.09915443509817123, + "rewards/margins": 0.12303964048624039, + "rewards/rejected": -0.22219407558441162, + "step": 1510 + }, + { + "epoch": 1.09, + "grad_norm": 4.8125, + "learning_rate": 4.658474953124562e-06, + "log_odds_chosen": 1.1299588680267334, + "log_odds_ratio": -0.3389926254749298, + "logits/chosen": -2.9047207832336426, + "logits/rejected": -2.9168930053710938, + "logps/chosen": -0.4910566806793213, + "logps/rejected": -1.0821782350540161, + "loss": 0.3979, + "nll_loss": 0.36464497447013855, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.09821134060621262, + "rewards/margins": 0.11822430789470673, + "rewards/rejected": -0.21643562614917755, + "step": 1520 + }, + { + "epoch": 1.09, + "grad_norm": 6.25, + "learning_rate": 4.656454360987378e-06, + "log_odds_chosen": 1.2231553792953491, + "log_odds_ratio": -0.30703750252723694, + "logits/chosen": -2.8862717151641846, + "logits/rejected": -2.880195140838623, + "logps/chosen": -0.45338621735572815, + "logps/rejected": -1.0659048557281494, + "loss": 0.3926, + "nll_loss": 0.32782015204429626, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.09067724645137787, + "rewards/margins": 0.12250375747680664, + "rewards/rejected": -0.21318098902702332, + "step": 1530 + }, + { + "epoch": 1.1, + "grad_norm": 5.90625, + "learning_rate": 4.654436395839094e-06, + "log_odds_chosen": 1.2722887992858887, + "log_odds_ratio": -0.3012635111808777, + "logits/chosen": -2.8365585803985596, + "logits/rejected": -2.8223185539245605, + "logps/chosen": -0.498770147562027, + "logps/rejected": -1.186909556388855, + "loss": 0.3876, + "nll_loss": 0.34141403436660767, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.09975402057170868, + "rewards/margins": 0.13762789964675903, + "rewards/rejected": -0.23738190531730652, + "step": 1540 + }, + { + "epoch": 1.11, + "grad_norm": 5.25, + "learning_rate": 4.652421051992354e-06, + "log_odds_chosen": 1.367235541343689, + "log_odds_ratio": -0.2841276526451111, + "logits/chosen": -2.8690426349639893, + "logits/rejected": -2.860100507736206, + "logps/chosen": -0.42585864663124084, + "logps/rejected": -1.0907236337661743, + "loss": 0.3664, + "nll_loss": 0.2977783679962158, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.08517173677682877, + "rewards/margins": 0.13297298550605774, + "rewards/rejected": -0.2181447297334671, + "step": 1550 + }, + { + "epoch": 1.11, + "grad_norm": 6.9375, + "learning_rate": 4.650408323777029e-06, + "log_odds_chosen": 1.1410921812057495, + "log_odds_ratio": -0.3362571597099304, + "logits/chosen": -2.853015184402466, + "logits/rejected": -2.8460183143615723, + "logps/chosen": -0.5159534215927124, + "logps/rejected": -1.1035113334655762, + "loss": 0.392, + "nll_loss": 0.33157429099082947, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10319069772958755, + "rewards/margins": 0.11751158535480499, + "rewards/rejected": -0.22070229053497314, + "step": 1560 + }, + { + "epoch": 1.12, + "grad_norm": 4.875, + "learning_rate": 4.6483982055401415e-06, + "log_odds_chosen": 1.3359148502349854, + "log_odds_ratio": -0.3214777410030365, + "logits/chosen": -2.807814359664917, + "logits/rejected": -2.8036892414093018, + "logps/chosen": -0.48363837599754333, + "logps/rejected": -1.1991592645645142, + "loss": 0.3751, + "nll_loss": 0.2966926693916321, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.09672766178846359, + "rewards/margins": 0.1431041657924652, + "rewards/rejected": -0.23983187973499298, + "step": 1570 + }, + { + "epoch": 1.13, + "grad_norm": 5.65625, + "learning_rate": 4.646390691645805e-06, + "log_odds_chosen": 1.219293236732483, + "log_odds_ratio": -0.34068965911865234, + "logits/chosen": -2.826866626739502, + "logits/rejected": -2.8263888359069824, + "logps/chosen": -0.4953809678554535, + "logps/rejected": -1.1199510097503662, + "loss": 0.3645, + "nll_loss": 0.2939409017562866, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09907619655132294, + "rewards/margins": 0.1249140128493309, + "rewards/rejected": -0.22399020195007324, + "step": 1580 + }, + { + "epoch": 1.14, + "grad_norm": 5.875, + "learning_rate": 4.644385776475159e-06, + "log_odds_chosen": 1.1653202772140503, + "log_odds_ratio": -0.3451498746871948, + "logits/chosen": -2.8366851806640625, + "logits/rejected": -2.835325002670288, + "logps/chosen": -0.5062024593353271, + "logps/rejected": -1.1195242404937744, + "loss": 0.4054, + "nll_loss": 0.3258489966392517, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10124049335718155, + "rewards/margins": 0.12266434729099274, + "rewards/rejected": -0.2239048182964325, + "step": 1590 + }, + { + "epoch": 1.14, + "grad_norm": 6.75, + "learning_rate": 4.642383454426297e-06, + "log_odds_chosen": 1.174912452697754, + "log_odds_ratio": -0.3339281678199768, + "logits/chosen": -2.861581325531006, + "logits/rejected": -2.8604111671447754, + "logps/chosen": -0.5135446786880493, + "logps/rejected": -1.1017265319824219, + "loss": 0.396, + "nll_loss": 0.3384125828742981, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.10270893573760986, + "rewards/margins": 0.11763636022806168, + "rewards/rejected": -0.22034530341625214, + "step": 1600 + }, + { + "epoch": 1.15, + "grad_norm": 8.125, + "learning_rate": 4.640383719914205e-06, + "log_odds_chosen": 1.2484989166259766, + "log_odds_ratio": -0.30592483282089233, + "logits/chosen": -2.807584285736084, + "logits/rejected": -2.813006639480591, + "logps/chosen": -0.5197979807853699, + "logps/rejected": -1.181796669960022, + "loss": 0.4215, + "nll_loss": 0.3560883402824402, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10395960509777069, + "rewards/margins": 0.13239972293376923, + "rewards/rejected": -0.23635932803153992, + "step": 1610 + }, + { + "epoch": 1.16, + "grad_norm": 6.25, + "learning_rate": 4.638386567370694e-06, + "log_odds_chosen": 1.2706489562988281, + "log_odds_ratio": -0.3202516436576843, + "logits/chosen": -2.8386385440826416, + "logits/rejected": -2.835495710372925, + "logps/chosen": -0.5026634931564331, + "logps/rejected": -1.1739028692245483, + "loss": 0.3759, + "nll_loss": 0.31424498558044434, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10053269565105438, + "rewards/margins": 0.13424786925315857, + "rewards/rejected": -0.23478057980537415, + "step": 1620 + }, + { + "epoch": 1.16, + "grad_norm": 5.3125, + "learning_rate": 4.636391991244338e-06, + "log_odds_chosen": 1.2696077823638916, + "log_odds_ratio": -0.3153325915336609, + "logits/chosen": -2.858908176422119, + "logits/rejected": -2.8612444400787354, + "logps/chosen": -0.48242902755737305, + "logps/rejected": -1.126427173614502, + "loss": 0.366, + "nll_loss": 0.295919805765152, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.09648582339286804, + "rewards/margins": 0.12879963219165802, + "rewards/rejected": -0.22528544068336487, + "step": 1630 + }, + { + "epoch": 1.17, + "grad_norm": 6.4375, + "learning_rate": 4.634399986000405e-06, + "log_odds_chosen": 1.3514255285263062, + "log_odds_ratio": -0.2864634096622467, + "logits/chosen": -2.892820358276367, + "logits/rejected": -2.8774783611297607, + "logps/chosen": -0.4613906741142273, + "logps/rejected": -1.1687535047531128, + "loss": 0.3837, + "nll_loss": 0.35217028856277466, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.0922781378030777, + "rewards/margins": 0.14147259294986725, + "rewards/rejected": -0.23375073075294495, + "step": 1640 + }, + { + "epoch": 1.18, + "grad_norm": 5.90625, + "learning_rate": 4.632410546120794e-06, + "log_odds_chosen": 1.1376657485961914, + "log_odds_ratio": -0.3578290343284607, + "logits/chosen": -2.820157527923584, + "logits/rejected": -2.8282933235168457, + "logps/chosen": -0.5289143323898315, + "logps/rejected": -1.1361706256866455, + "loss": 0.4242, + "nll_loss": 0.36498746275901794, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10578285157680511, + "rewards/margins": 0.12145128101110458, + "rewards/rejected": -0.2272341251373291, + "step": 1650 + }, + { + "epoch": 1.19, + "grad_norm": 6.5, + "learning_rate": 4.6304236661039765e-06, + "log_odds_chosen": 1.2930752038955688, + "log_odds_ratio": -0.3022990822792053, + "logits/chosen": -2.8277323246002197, + "logits/rejected": -2.814790725708008, + "logps/chosen": -0.471322625875473, + "logps/rejected": -1.1453874111175537, + "loss": 0.3746, + "nll_loss": 0.298636794090271, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.09426452219486237, + "rewards/margins": 0.13481295108795166, + "rewards/rejected": -0.2290775030851364, + "step": 1660 + }, + { + "epoch": 1.19, + "grad_norm": 6.875, + "learning_rate": 4.628439340464919e-06, + "log_odds_chosen": 1.1731585264205933, + "log_odds_ratio": -0.33878791332244873, + "logits/chosen": -2.8189854621887207, + "logits/rejected": -2.822537660598755, + "logps/chosen": -0.5024202466011047, + "logps/rejected": -1.1166616678237915, + "loss": 0.3923, + "nll_loss": 0.34138697385787964, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.10048405081033707, + "rewards/margins": 0.12284828722476959, + "rewards/rejected": -0.22333233058452606, + "step": 1670 + }, + { + "epoch": 1.2, + "grad_norm": 6.21875, + "learning_rate": 4.626457563735034e-06, + "log_odds_chosen": 1.2489228248596191, + "log_odds_ratio": -0.30081456899642944, + "logits/chosen": -2.8132882118225098, + "logits/rejected": -2.8195571899414062, + "logps/chosen": -0.501319408416748, + "logps/rejected": -1.1651036739349365, + "loss": 0.3777, + "nll_loss": 0.3397257328033447, + "rewards/accuracies": 0.96875, + "rewards/chosen": -0.10026389360427856, + "rewards/margins": 0.13275685906410217, + "rewards/rejected": -0.23302075266838074, + "step": 1680 + }, + { + "epoch": 1.21, + "grad_norm": 6.5, + "learning_rate": 4.624478330462108e-06, + "log_odds_chosen": 1.2629085779190063, + "log_odds_ratio": -0.3190768361091614, + "logits/chosen": -2.808572769165039, + "logits/rejected": -2.8040456771850586, + "logps/chosen": -0.52824467420578, + "logps/rejected": -1.2037794589996338, + "loss": 0.4016, + "nll_loss": 0.3477191627025604, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.105648934841156, + "rewards/margins": 0.13510698080062866, + "rewards/rejected": -0.24075591564178467, + "step": 1690 + }, + { + "epoch": 1.21, + "grad_norm": 5.3125, + "learning_rate": 4.622501635210244e-06, + "log_odds_chosen": 1.199979305267334, + "log_odds_ratio": -0.32586055994033813, + "logits/chosen": -2.811058759689331, + "logits/rejected": -2.797663927078247, + "logps/chosen": -0.5032976269721985, + "logps/rejected": -1.1406500339508057, + "loss": 0.412, + "nll_loss": 0.35429102182388306, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10065951198339462, + "rewards/margins": 0.1274704784154892, + "rewards/rejected": -0.2281300127506256, + "step": 1700 + }, + { + "epoch": 1.22, + "grad_norm": 6.625, + "learning_rate": 4.62052747255979e-06, + "log_odds_chosen": 1.2002490758895874, + "log_odds_ratio": -0.3310433030128479, + "logits/chosen": -2.8232879638671875, + "logits/rejected": -2.8157012462615967, + "logps/chosen": -0.5096195936203003, + "logps/rejected": -1.1472256183624268, + "loss": 0.4145, + "nll_loss": 0.33412426710128784, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.10192392021417618, + "rewards/margins": 0.12752120196819305, + "rewards/rejected": -0.22944512963294983, + "step": 1710 + }, + { + "epoch": 1.23, + "grad_norm": 6.25, + "learning_rate": 4.61855583710729e-06, + "log_odds_chosen": 1.200430154800415, + "log_odds_ratio": -0.3242012560367584, + "logits/chosen": -2.807443141937256, + "logits/rejected": -2.812464475631714, + "logps/chosen": -0.4960111975669861, + "logps/rejected": -1.1150873899459839, + "loss": 0.3998, + "nll_loss": 0.3255448639392853, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.0992022305727005, + "rewards/margins": 0.12381526082754135, + "rewards/rejected": -0.22301749885082245, + "step": 1720 + }, + { + "epoch": 1.24, + "grad_norm": 6.15625, + "learning_rate": 4.616586723465408e-06, + "log_odds_chosen": 1.2508338689804077, + "log_odds_ratio": -0.31589239835739136, + "logits/chosen": -2.7934987545013428, + "logits/rejected": -2.798473358154297, + "logps/chosen": -0.46048063039779663, + "logps/rejected": -1.0900535583496094, + "loss": 0.3718, + "nll_loss": 0.2973485589027405, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.09209613502025604, + "rewards/margins": 0.12591460347175598, + "rewards/rejected": -0.21801073849201202, + "step": 1730 + }, + { + "epoch": 1.24, + "grad_norm": 5.5, + "learning_rate": 4.614620126262875e-06, + "log_odds_chosen": 1.2658488750457764, + "log_odds_ratio": -0.31101447343826294, + "logits/chosen": -2.7943694591522217, + "logits/rejected": -2.810157299041748, + "logps/chosen": -0.5024830102920532, + "logps/rejected": -1.1770861148834229, + "loss": 0.3848, + "nll_loss": 0.3470260202884674, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.10049660503864288, + "rewards/margins": 0.1349206268787384, + "rewards/rejected": -0.2354172170162201, + "step": 1740 + }, + { + "epoch": 1.25, + "grad_norm": 5.71875, + "learning_rate": 4.6126560401444256e-06, + "log_odds_chosen": 1.2377068996429443, + "log_odds_ratio": -0.32577404379844666, + "logits/chosen": -2.8102307319641113, + "logits/rejected": -2.816279411315918, + "logps/chosen": -0.4699520170688629, + "logps/rejected": -1.1359916925430298, + "loss": 0.3875, + "nll_loss": 0.3352469503879547, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.09399040788412094, + "rewards/margins": 0.13320791721343994, + "rewards/rejected": -0.22719831764698029, + "step": 1750 + }, + { + "epoch": 1.26, + "grad_norm": 5.3125, + "learning_rate": 4.610694459770736e-06, + "log_odds_chosen": 1.142558217048645, + "log_odds_ratio": -0.3530025780200958, + "logits/chosen": -2.8239128589630127, + "logits/rejected": -2.8176794052124023, + "logps/chosen": -0.5302969217300415, + "logps/rejected": -1.1179604530334473, + "loss": 0.424, + "nll_loss": 0.36241117119789124, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.10605937242507935, + "rewards/margins": 0.11753270775079727, + "rewards/rejected": -0.22359208762645721, + "step": 1760 + }, + { + "epoch": 1.26, + "grad_norm": 6.71875, + "learning_rate": 4.6087353798183585e-06, + "log_odds_chosen": 1.2016445398330688, + "log_odds_ratio": -0.338972806930542, + "logits/chosen": -2.8231258392333984, + "logits/rejected": -2.8218886852264404, + "logps/chosen": -0.49100199341773987, + "logps/rejected": -1.0961363315582275, + "loss": 0.3851, + "nll_loss": 0.34230172634124756, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.09820040315389633, + "rewards/margins": 0.1210268959403038, + "rewards/rejected": -0.21922728419303894, + "step": 1770 + }, + { + "epoch": 1.27, + "grad_norm": 8.0, + "learning_rate": 4.606778794979673e-06, + "log_odds_chosen": 1.2381914854049683, + "log_odds_ratio": -0.32014578580856323, + "logits/chosen": -2.7523140907287598, + "logits/rejected": -2.769972801208496, + "logps/chosen": -0.48253411054611206, + "logps/rejected": -1.137094259262085, + "loss": 0.414, + "nll_loss": 0.3594844937324524, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.09650683403015137, + "rewards/margins": 0.13091202080249786, + "rewards/rejected": -0.22741885483264923, + "step": 1780 + }, + { + "epoch": 1.28, + "grad_norm": 5.375, + "learning_rate": 4.604824699962814e-06, + "log_odds_chosen": 1.2986476421356201, + "log_odds_ratio": -0.31768113374710083, + "logits/chosen": -2.8044631481170654, + "logits/rejected": -2.8003344535827637, + "logps/chosen": -0.48466992378234863, + "logps/rejected": -1.1684885025024414, + "loss": 0.4, + "nll_loss": 0.32119065523147583, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09693397581577301, + "rewards/margins": 0.13676372170448303, + "rewards/rejected": -0.23369769752025604, + "step": 1790 + }, + { + "epoch": 1.29, + "grad_norm": 5.75, + "learning_rate": 4.602873089491618e-06, + "log_odds_chosen": 1.3184231519699097, + "log_odds_ratio": -0.3061246871948242, + "logits/chosen": -2.7834739685058594, + "logits/rejected": -2.7912349700927734, + "logps/chosen": -0.4672764837741852, + "logps/rejected": -1.1388607025146484, + "loss": 0.3609, + "nll_loss": 0.312313973903656, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.09345529973506927, + "rewards/margins": 0.1343168318271637, + "rewards/rejected": -0.22777214646339417, + "step": 1800 + }, + { + "epoch": 1.29, + "grad_norm": 5.40625, + "learning_rate": 4.600923958305558e-06, + "log_odds_chosen": 1.221590280532837, + "log_odds_ratio": -0.325833261013031, + "logits/chosen": -2.7938928604125977, + "logits/rejected": -2.787433624267578, + "logps/chosen": -0.49871402978897095, + "logps/rejected": -1.1466445922851562, + "loss": 0.391, + "nll_loss": 0.3510339856147766, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.09974280744791031, + "rewards/margins": 0.1295861303806305, + "rewards/rejected": -0.2293289452791214, + "step": 1810 + }, + { + "epoch": 1.3, + "grad_norm": 6.0, + "learning_rate": 4.59897730115969e-06, + "log_odds_chosen": 1.3827050924301147, + "log_odds_ratio": -0.30398255586624146, + "logits/chosen": -2.811122417449951, + "logits/rejected": -2.817436933517456, + "logps/chosen": -0.4758991599082947, + "logps/rejected": -1.212922215461731, + "loss": 0.3903, + "nll_loss": 0.32494717836380005, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.09517984092235565, + "rewards/margins": 0.14740462601184845, + "rewards/rejected": -0.2425844967365265, + "step": 1820 + }, + { + "epoch": 1.31, + "grad_norm": 5.5625, + "learning_rate": 4.597033112824591e-06, + "log_odds_chosen": 1.2106988430023193, + "log_odds_ratio": -0.3271104395389557, + "logits/chosen": -2.798574924468994, + "logits/rejected": -2.78853702545166, + "logps/chosen": -0.5198577642440796, + "logps/rejected": -1.1642177104949951, + "loss": 0.3965, + "nll_loss": 0.3389652669429779, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10397156327962875, + "rewards/margins": 0.12887199223041534, + "rewards/rejected": -0.2328435480594635, + "step": 1830 + }, + { + "epoch": 1.31, + "grad_norm": 6.21875, + "learning_rate": 4.595091388086298e-06, + "log_odds_chosen": 1.1952093839645386, + "log_odds_ratio": -0.3166283071041107, + "logits/chosen": -2.8241782188415527, + "logits/rejected": -2.8152718544006348, + "logps/chosen": -0.5200030207633972, + "logps/rejected": -1.1538721323013306, + "loss": 0.3864, + "nll_loss": 0.30587464570999146, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10400060564279556, + "rewards/margins": 0.12677384912967682, + "rewards/rejected": -0.23077444732189178, + "step": 1840 + }, + { + "epoch": 1.32, + "grad_norm": 6.96875, + "learning_rate": 4.593152121746254e-06, + "log_odds_chosen": 1.168222427368164, + "log_odds_ratio": -0.3400697410106659, + "logits/chosen": -2.816959857940674, + "logits/rejected": -2.813157081604004, + "logps/chosen": -0.4970974326133728, + "logps/rejected": -1.108543038368225, + "loss": 0.411, + "nll_loss": 0.31471529603004456, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.0994194895029068, + "rewards/margins": 0.12228912115097046, + "rewards/rejected": -0.22170861065387726, + "step": 1850 + }, + { + "epoch": 1.33, + "grad_norm": 5.59375, + "learning_rate": 4.591215308621242e-06, + "log_odds_chosen": 1.2203196287155151, + "log_odds_ratio": -0.3189007639884949, + "logits/chosen": -2.8120200634002686, + "logits/rejected": -2.791141986846924, + "logps/chosen": -0.4980306029319763, + "logps/rejected": -1.1655288934707642, + "loss": 0.3934, + "nll_loss": 0.31481805443763733, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.09960611909627914, + "rewards/margins": 0.1334996372461319, + "rewards/rejected": -0.23310574889183044, + "step": 1860 + }, + { + "epoch": 1.34, + "grad_norm": 6.25, + "learning_rate": 4.5892809435433355e-06, + "log_odds_chosen": 1.245577096939087, + "log_odds_ratio": -0.32619625329971313, + "logits/chosen": -2.8177731037139893, + "logits/rejected": -2.8102052211761475, + "logps/chosen": -0.4943556785583496, + "logps/rejected": -1.1462085247039795, + "loss": 0.3925, + "nll_loss": 0.3532278537750244, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.0988711342215538, + "rewards/margins": 0.13037055730819702, + "rewards/rejected": -0.2292417287826538, + "step": 1870 + }, + { + "epoch": 1.34, + "grad_norm": 6.09375, + "learning_rate": 4.587349021359836e-06, + "log_odds_chosen": 1.1849457025527954, + "log_odds_ratio": -0.3331480026245117, + "logits/chosen": -2.8479719161987305, + "logits/rejected": -2.8441169261932373, + "logps/chosen": -0.5096966624259949, + "logps/rejected": -1.1546456813812256, + "loss": 0.4052, + "nll_loss": 0.32981541752815247, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10193934291601181, + "rewards/margins": 0.1289898157119751, + "rewards/rejected": -0.23092913627624512, + "step": 1880 + }, + { + "epoch": 1.35, + "grad_norm": 5.75, + "learning_rate": 4.585419536933215e-06, + "log_odds_chosen": 1.235563039779663, + "log_odds_ratio": -0.3173638880252838, + "logits/chosen": -2.8725390434265137, + "logits/rejected": -2.859806776046753, + "logps/chosen": -0.4742043614387512, + "logps/rejected": -1.1231261491775513, + "loss": 0.4115, + "nll_loss": 0.365268349647522, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.0948408767580986, + "rewards/margins": 0.12978434562683105, + "rewards/rejected": -0.22462522983551025, + "step": 1890 + }, + { + "epoch": 1.36, + "grad_norm": 5.4375, + "learning_rate": 4.583492485141056e-06, + "log_odds_chosen": 1.3153822422027588, + "log_odds_ratio": -0.3131297528743744, + "logits/chosen": -2.783536434173584, + "logits/rejected": -2.803969144821167, + "logps/chosen": -0.4793526530265808, + "logps/rejected": -1.1401029825210571, + "loss": 0.3573, + "nll_loss": 0.2991887629032135, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.09587053954601288, + "rewards/margins": 0.1321500688791275, + "rewards/rejected": -0.2280205935239792, + "step": 1900 + }, + { + "epoch": 1.36, + "grad_norm": 6.25, + "learning_rate": 4.581567860876004e-06, + "log_odds_chosen": 1.2473056316375732, + "log_odds_ratio": -0.3080199658870697, + "logits/chosen": -2.8462700843811035, + "logits/rejected": -2.8370962142944336, + "logps/chosen": -0.48333874344825745, + "logps/rejected": -1.148425817489624, + "loss": 0.4128, + "nll_loss": 0.3549380302429199, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.09666775166988373, + "rewards/margins": 0.13301745057106018, + "rewards/rejected": -0.2296852171421051, + "step": 1910 + }, + { + "epoch": 1.37, + "grad_norm": 6.75, + "learning_rate": 4.579645659045699e-06, + "log_odds_chosen": 1.227070689201355, + "log_odds_ratio": -0.3165205121040344, + "logits/chosen": -2.8399529457092285, + "logits/rejected": -2.8465752601623535, + "logps/chosen": -0.46384549140930176, + "logps/rejected": -1.0851547718048096, + "loss": 0.401, + "nll_loss": 0.3482987880706787, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.09276910126209259, + "rewards/margins": 0.12426185607910156, + "rewards/rejected": -0.21703095734119415, + "step": 1920 + }, + { + "epoch": 1.38, + "grad_norm": 4.46875, + "learning_rate": 4.577725874572724e-06, + "log_odds_chosen": 1.1611140966415405, + "log_odds_ratio": -0.34230470657348633, + "logits/chosen": -2.807835340499878, + "logits/rejected": -2.817835807800293, + "logps/chosen": -0.49586695432662964, + "logps/rejected": -1.093207836151123, + "loss": 0.3975, + "nll_loss": 0.31580257415771484, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.0991733968257904, + "rewards/margins": 0.11946818977594376, + "rewards/rejected": -0.21864160895347595, + "step": 1930 + }, + { + "epoch": 1.39, + "grad_norm": 6.21875, + "learning_rate": 4.575808502394551e-06, + "log_odds_chosen": 1.2263875007629395, + "log_odds_ratio": -0.3336968421936035, + "logits/chosen": -2.8074028491973877, + "logits/rejected": -2.808117628097534, + "logps/chosen": -0.5058302879333496, + "logps/rejected": -1.1664202213287354, + "loss": 0.3892, + "nll_loss": 0.32398825883865356, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.10116605460643768, + "rewards/margins": 0.13211797177791595, + "rewards/rejected": -0.23328404128551483, + "step": 1940 + }, + { + "epoch": 1.39, + "grad_norm": 5.65625, + "learning_rate": 4.573893537463482e-06, + "log_odds_chosen": 1.1256129741668701, + "log_odds_ratio": -0.3365449905395508, + "logits/chosen": -2.799619197845459, + "logits/rejected": -2.8067538738250732, + "logps/chosen": -0.501841127872467, + "logps/rejected": -1.0864284038543701, + "loss": 0.3919, + "nll_loss": 0.3244914412498474, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10036821663379669, + "rewards/margins": 0.11691747605800629, + "rewards/rejected": -0.21728567779064178, + "step": 1950 + }, + { + "epoch": 1.4, + "grad_norm": 7.71875, + "learning_rate": 4.5719809747465946e-06, + "log_odds_chosen": 1.2488094568252563, + "log_odds_ratio": -0.3095954358577728, + "logits/chosen": -2.80102801322937, + "logits/rejected": -2.786306381225586, + "logps/chosen": -0.48401910066604614, + "logps/rejected": -1.1524279117584229, + "loss": 0.391, + "nll_loss": 0.32259485125541687, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.09680382162332535, + "rewards/margins": 0.1336817443370819, + "rewards/rejected": -0.23048558831214905, + "step": 1960 + }, + { + "epoch": 1.41, + "grad_norm": 6.65625, + "learning_rate": 4.570070809225682e-06, + "log_odds_chosen": 1.185505986213684, + "log_odds_ratio": -0.3236411213874817, + "logits/chosen": -2.8083627223968506, + "logits/rejected": -2.8073079586029053, + "logps/chosen": -0.497401624917984, + "logps/rejected": -1.1157503128051758, + "loss": 0.3984, + "nll_loss": 0.32068413496017456, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.09948031604290009, + "rewards/margins": 0.12366974353790283, + "rewards/rejected": -0.2231500893831253, + "step": 1970 + }, + { + "epoch": 1.41, + "grad_norm": 7.0625, + "learning_rate": 4.568163035897205e-06, + "log_odds_chosen": 1.2525742053985596, + "log_odds_ratio": -0.31062158942222595, + "logits/chosen": -2.852297782897949, + "logits/rejected": -2.858063220977783, + "logps/chosen": -0.5127692222595215, + "logps/rejected": -1.180938482284546, + "loss": 0.4053, + "nll_loss": 0.36488935351371765, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10255385935306549, + "rewards/margins": 0.1336338222026825, + "rewards/rejected": -0.23618769645690918, + "step": 1980 + }, + { + "epoch": 1.42, + "grad_norm": 6.5625, + "learning_rate": 4.566257649772231e-06, + "log_odds_chosen": 1.2319920063018799, + "log_odds_ratio": -0.32411015033721924, + "logits/chosen": -2.8400864601135254, + "logits/rejected": -2.835186004638672, + "logps/chosen": -0.5001600384712219, + "logps/rejected": -1.1353777647018433, + "loss": 0.4052, + "nll_loss": 0.346365749835968, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10003199428319931, + "rewards/margins": 0.12704357504844666, + "rewards/rejected": -0.22707557678222656, + "step": 1990 + }, + { + "epoch": 1.43, + "grad_norm": 5.78125, + "learning_rate": 4.564354645876385e-06, + "log_odds_chosen": 1.2825465202331543, + "log_odds_ratio": -0.31023693084716797, + "logits/chosen": -2.7589869499206543, + "logits/rejected": -2.7817113399505615, + "logps/chosen": -0.48962241411209106, + "logps/rejected": -1.1758294105529785, + "loss": 0.3808, + "nll_loss": 0.3050033152103424, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.09792448580265045, + "rewards/margins": 0.137241393327713, + "rewards/rejected": -0.23516587913036346, + "step": 2000 + }, + { + "epoch": 1.44, + "grad_norm": 6.5625, + "learning_rate": 4.562454019249786e-06, + "log_odds_chosen": 1.1062853336334229, + "log_odds_ratio": -0.34708550572395325, + "logits/chosen": -2.8106868267059326, + "logits/rejected": -2.802447557449341, + "logps/chosen": -0.5518588423728943, + "logps/rejected": -1.1463332176208496, + "loss": 0.4399, + "nll_loss": 0.3327658772468567, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.11037178337574005, + "rewards/margins": 0.11889486014842987, + "rewards/rejected": -0.22926661372184753, + "step": 2010 + }, + { + "epoch": 1.44, + "grad_norm": 6.65625, + "learning_rate": 4.560555764947004e-06, + "log_odds_chosen": 1.2340061664581299, + "log_odds_ratio": -0.32612934708595276, + "logits/chosen": -2.822478771209717, + "logits/rejected": -2.828272819519043, + "logps/chosen": -0.5015519857406616, + "logps/rejected": -1.164368987083435, + "loss": 0.3998, + "nll_loss": 0.37030792236328125, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.10031040757894516, + "rewards/margins": 0.13256338238716125, + "rewards/rejected": -0.2328738272190094, + "step": 2020 + }, + { + "epoch": 1.45, + "grad_norm": 5.5625, + "learning_rate": 4.5586598780369954e-06, + "log_odds_chosen": 1.1390058994293213, + "log_odds_ratio": -0.35169175267219543, + "logits/chosen": -2.8428843021392822, + "logits/rejected": -2.846796989440918, + "logps/chosen": -0.5042189359664917, + "logps/rejected": -1.092398762702942, + "loss": 0.3971, + "nll_loss": 0.3557208478450775, + "rewards/accuracies": 0.893750011920929, + "rewards/chosen": -0.10084378719329834, + "rewards/margins": 0.11763594299554825, + "rewards/rejected": -0.21847975254058838, + "step": 2030 + }, + { + "epoch": 1.46, + "grad_norm": 6.5625, + "learning_rate": 4.556766353603058e-06, + "log_odds_chosen": 1.2142447233200073, + "log_odds_ratio": -0.3221642076969147, + "logits/chosen": -2.7607929706573486, + "logits/rejected": -2.769547939300537, + "logps/chosen": -0.5120490789413452, + "logps/rejected": -1.1542648077011108, + "loss": 0.4132, + "nll_loss": 0.31557202339172363, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10240981727838516, + "rewards/margins": 0.1284431517124176, + "rewards/rejected": -0.23085296154022217, + "step": 2040 + }, + { + "epoch": 1.46, + "grad_norm": 6.46875, + "learning_rate": 4.55487518674277e-06, + "log_odds_chosen": 1.2244555950164795, + "log_odds_ratio": -0.3199111521244049, + "logits/chosen": -2.848184108734131, + "logits/rejected": -2.853646755218506, + "logps/chosen": -0.4814947545528412, + "logps/rejected": -1.1112531423568726, + "loss": 0.4191, + "nll_loss": 0.33359044790267944, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.0962989553809166, + "rewards/margins": 0.12595167756080627, + "rewards/rejected": -0.22225065529346466, + "step": 2050 + }, + { + "epoch": 1.47, + "grad_norm": 5.34375, + "learning_rate": 4.552986372567943e-06, + "log_odds_chosen": 1.2204258441925049, + "log_odds_ratio": -0.3285110592842102, + "logits/chosen": -2.826977252960205, + "logits/rejected": -2.8170952796936035, + "logps/chosen": -0.518255352973938, + "logps/rejected": -1.1798815727233887, + "loss": 0.3758, + "nll_loss": 0.335254967212677, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10365106910467148, + "rewards/margins": 0.13232523202896118, + "rewards/rejected": -0.23597629368305206, + "step": 2060 + }, + { + "epoch": 1.48, + "grad_norm": 5.65625, + "learning_rate": 4.5510999062045625e-06, + "log_odds_chosen": 1.268137812614441, + "log_odds_ratio": -0.3236086964607239, + "logits/chosen": -2.7644591331481934, + "logits/rejected": -2.761575222015381, + "logps/chosen": -0.5131622552871704, + "logps/rejected": -1.2004690170288086, + "loss": 0.3893, + "nll_loss": 0.33567947149276733, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.10263246297836304, + "rewards/margins": 0.1374613493680954, + "rewards/rejected": -0.24009379744529724, + "step": 2070 + }, + { + "epoch": 1.49, + "grad_norm": 6.09375, + "learning_rate": 4.5492157827927435e-06, + "log_odds_chosen": 1.2079408168792725, + "log_odds_ratio": -0.33384907245635986, + "logits/chosen": -2.86165714263916, + "logits/rejected": -2.8367888927459717, + "logps/chosen": -0.4977470338344574, + "logps/rejected": -1.1325632333755493, + "loss": 0.3736, + "nll_loss": 0.32109367847442627, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.09954941272735596, + "rewards/margins": 0.12696322798728943, + "rewards/rejected": -0.22651264071464539, + "step": 2080 + }, + { + "epoch": 1.49, + "grad_norm": 7.21875, + "learning_rate": 4.54733399748667e-06, + "log_odds_chosen": 1.2205212116241455, + "log_odds_ratio": -0.3305164873600006, + "logits/chosen": -2.7856006622314453, + "logits/rejected": -2.7885613441467285, + "logps/chosen": -0.5106269717216492, + "logps/rejected": -1.1481513977050781, + "loss": 0.3855, + "nll_loss": 0.30324143171310425, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10212540626525879, + "rewards/margins": 0.12750491499900818, + "rewards/rejected": -0.22963032126426697, + "step": 2090 + }, + { + "epoch": 1.5, + "grad_norm": 6.75, + "learning_rate": 4.5454545454545455e-06, + "log_odds_chosen": 1.2237420082092285, + "log_odds_ratio": -0.32826218008995056, + "logits/chosen": -2.820686101913452, + "logits/rejected": -2.8283371925354004, + "logps/chosen": -0.5093476176261902, + "logps/rejected": -1.168192744255066, + "loss": 0.3887, + "nll_loss": 0.3230911195278168, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10186950862407684, + "rewards/margins": 0.13176901638507843, + "rewards/rejected": -0.23363855481147766, + "step": 2100 + }, + { + "epoch": 1.51, + "grad_norm": 6.3125, + "learning_rate": 4.543577421878542e-06, + "log_odds_chosen": 1.2703464031219482, + "log_odds_ratio": -0.31365537643432617, + "logits/chosen": -2.851209878921509, + "logits/rejected": -2.8478894233703613, + "logps/chosen": -0.4876154363155365, + "logps/rejected": -1.1669528484344482, + "loss": 0.4003, + "nll_loss": 0.32528436183929443, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.09752309322357178, + "rewards/margins": 0.13586744666099548, + "rewards/rejected": -0.23339056968688965, + "step": 2110 + }, + { + "epoch": 1.51, + "grad_norm": 6.46875, + "learning_rate": 4.541702621954749e-06, + "log_odds_chosen": 1.1352488994598389, + "log_odds_ratio": -0.35285863280296326, + "logits/chosen": -2.8669912815093994, + "logits/rejected": -2.872535228729248, + "logps/chosen": -0.5130313634872437, + "logps/rejected": -1.1068975925445557, + "loss": 0.4053, + "nll_loss": 0.33999350666999817, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -0.10260625928640366, + "rewards/margins": 0.11877324432134628, + "rewards/rejected": -0.22137951850891113, + "step": 2120 + }, + { + "epoch": 1.52, + "grad_norm": 5.71875, + "learning_rate": 4.539830140893113e-06, + "log_odds_chosen": 1.4157606363296509, + "log_odds_ratio": -0.30813926458358765, + "logits/chosen": -2.8120830059051514, + "logits/rejected": -2.8225760459899902, + "logps/chosen": -0.4803242087364197, + "logps/rejected": -1.2175981998443604, + "loss": 0.3963, + "nll_loss": 0.3383215367794037, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.09606485068798065, + "rewards/margins": 0.14745476841926575, + "rewards/rejected": -0.2435196191072464, + "step": 2130 + }, + { + "epoch": 1.53, + "grad_norm": 7.625, + "learning_rate": 4.537959973917404e-06, + "log_odds_chosen": 1.2107295989990234, + "log_odds_ratio": -0.3288795053958893, + "logits/chosen": -2.809863567352295, + "logits/rejected": -2.8031539916992188, + "logps/chosen": -0.49784055352211, + "logps/rejected": -1.1504080295562744, + "loss": 0.3939, + "nll_loss": 0.3284316062927246, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.09956810623407364, + "rewards/margins": 0.1305135190486908, + "rewards/rejected": -0.23008163273334503, + "step": 2140 + }, + { + "epoch": 1.54, + "grad_norm": 7.46875, + "learning_rate": 4.536092116265145e-06, + "log_odds_chosen": 1.1781766414642334, + "log_odds_ratio": -0.33598729968070984, + "logits/chosen": -2.8239569664001465, + "logits/rejected": -2.8153862953186035, + "logps/chosen": -0.5003775954246521, + "logps/rejected": -1.1214288473129272, + "loss": 0.3922, + "nll_loss": 0.3246839940547943, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10007552057504654, + "rewards/margins": 0.12421026080846786, + "rewards/rejected": -0.2242857962846756, + "step": 2150 + }, + { + "epoch": 1.54, + "grad_norm": 6.46875, + "learning_rate": 4.534226563187573e-06, + "log_odds_chosen": 1.1511073112487793, + "log_odds_ratio": -0.3392142653465271, + "logits/chosen": -2.8518738746643066, + "logits/rejected": -2.854708671569824, + "logps/chosen": -0.5298787355422974, + "logps/rejected": -1.1405723094940186, + "loss": 0.3987, + "nll_loss": 0.37272533774375916, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10597574710845947, + "rewards/margins": 0.12213869392871857, + "rewards/rejected": -0.22811445593833923, + "step": 2160 + }, + { + "epoch": 1.55, + "grad_norm": 6.3125, + "learning_rate": 4.532363309949585e-06, + "log_odds_chosen": 1.0746400356292725, + "log_odds_ratio": -0.3582301139831543, + "logits/chosen": -2.7901675701141357, + "logits/rejected": -2.7982590198516846, + "logps/chosen": -0.5267711877822876, + "logps/rejected": -1.0863219499588013, + "loss": 0.4085, + "nll_loss": 0.3383466601371765, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10535424947738647, + "rewards/margins": 0.1119101420044899, + "rewards/rejected": -0.21726438403129578, + "step": 2170 + }, + { + "epoch": 1.56, + "grad_norm": 7.375, + "learning_rate": 4.530502351829687e-06, + "log_odds_chosen": 1.2173881530761719, + "log_odds_ratio": -0.33332520723342896, + "logits/chosen": -2.816040515899658, + "logits/rejected": -2.8241238594055176, + "logps/chosen": -0.49707546830177307, + "logps/rejected": -1.1558698415756226, + "loss": 0.3933, + "nll_loss": 0.29985347390174866, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.09941510856151581, + "rewards/margins": 0.13175883889198303, + "rewards/rejected": -0.23117394745349884, + "step": 2180 + }, + { + "epoch": 1.56, + "grad_norm": 6.0, + "learning_rate": 4.528643684119943e-06, + "log_odds_chosen": 1.2386395931243896, + "log_odds_ratio": -0.3029495179653168, + "logits/chosen": -2.780738115310669, + "logits/rejected": -2.78902530670166, + "logps/chosen": -0.49545398354530334, + "logps/rejected": -1.158929705619812, + "loss": 0.3934, + "nll_loss": 0.3328419327735901, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.09909079968929291, + "rewards/margins": 0.13269512355327606, + "rewards/rejected": -0.23178592324256897, + "step": 2190 + }, + { + "epoch": 1.57, + "grad_norm": 7.34375, + "learning_rate": 4.526787302125927e-06, + "log_odds_chosen": 1.2402592897415161, + "log_odds_ratio": -0.3278099596500397, + "logits/chosen": -2.8122589588165283, + "logits/rejected": -2.816236972808838, + "logps/chosen": -0.5384883284568787, + "logps/rejected": -1.2339115142822266, + "loss": 0.4106, + "nll_loss": 0.3580947518348694, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10769768059253693, + "rewards/margins": 0.1390846073627472, + "rewards/rejected": -0.2467823028564453, + "step": 2200 + }, + { + "epoch": 1.58, + "grad_norm": 6.125, + "learning_rate": 4.524933201166673e-06, + "log_odds_chosen": 1.1136146783828735, + "log_odds_ratio": -0.3713721036911011, + "logits/chosen": -2.8021061420440674, + "logits/rejected": -2.813220739364624, + "logps/chosen": -0.5083444714546204, + "logps/rejected": -1.091552495956421, + "loss": 0.4274, + "nll_loss": 0.36474576592445374, + "rewards/accuracies": 0.893750011920929, + "rewards/chosen": -0.10166887938976288, + "rewards/margins": 0.11664160341024399, + "rewards/rejected": -0.21831050515174866, + "step": 2210 + }, + { + "epoch": 1.59, + "grad_norm": 8.125, + "learning_rate": 4.523081376574626e-06, + "log_odds_chosen": 1.2216575145721436, + "log_odds_ratio": -0.3291280269622803, + "logits/chosen": -2.777296304702759, + "logits/rejected": -2.7747302055358887, + "logps/chosen": -0.5027607679367065, + "logps/rejected": -1.1558706760406494, + "loss": 0.4256, + "nll_loss": 0.3766383230686188, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.10055215656757355, + "rewards/margins": 0.1306219846010208, + "rewards/rejected": -0.23117414116859436, + "step": 2220 + }, + { + "epoch": 1.59, + "grad_norm": 5.875, + "learning_rate": 4.521231823695586e-06, + "log_odds_chosen": 1.131630539894104, + "log_odds_ratio": -0.34842541813850403, + "logits/chosen": -2.8320116996765137, + "logits/rejected": -2.8329334259033203, + "logps/chosen": -0.5016427636146545, + "logps/rejected": -1.0832736492156982, + "loss": 0.3947, + "nll_loss": 0.3312470316886902, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10032854229211807, + "rewards/margins": 0.11632619798183441, + "rewards/rejected": -0.21665474772453308, + "step": 2230 + }, + { + "epoch": 1.6, + "grad_norm": 6.4375, + "learning_rate": 4.519384537888671e-06, + "log_odds_chosen": 1.2074496746063232, + "log_odds_ratio": -0.341810017824173, + "logits/chosen": -2.805079698562622, + "logits/rejected": -2.7929458618164062, + "logps/chosen": -0.4954722821712494, + "logps/rejected": -1.1329338550567627, + "loss": 0.4051, + "nll_loss": 0.3333224356174469, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.099094457924366, + "rewards/margins": 0.12749230861663818, + "rewards/rejected": -0.2265867441892624, + "step": 2240 + }, + { + "epoch": 1.61, + "grad_norm": 6.59375, + "learning_rate": 4.517539514526257e-06, + "log_odds_chosen": 1.3590493202209473, + "log_odds_ratio": -0.316969096660614, + "logits/chosen": -2.803619861602783, + "logits/rejected": -2.77778959274292, + "logps/chosen": -0.5187050700187683, + "logps/rejected": -1.2949590682983398, + "loss": 0.4183, + "nll_loss": 0.34726181626319885, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10374102741479874, + "rewards/margins": 0.15525080263614655, + "rewards/rejected": -0.2589918076992035, + "step": 2250 + }, + { + "epoch": 1.61, + "grad_norm": 6.375, + "learning_rate": 4.515696748993935e-06, + "log_odds_chosen": 1.159447193145752, + "log_odds_ratio": -0.3347181975841522, + "logits/chosen": -2.809387683868408, + "logits/rejected": -2.8114075660705566, + "logps/chosen": -0.5045318007469177, + "logps/rejected": -1.105101466178894, + "loss": 0.4172, + "nll_loss": 0.3646569848060608, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.1009063571691513, + "rewards/margins": 0.12011395394802094, + "rewards/rejected": -0.22102029621601105, + "step": 2260 + }, + { + "epoch": 1.62, + "grad_norm": 6.53125, + "learning_rate": 4.513856236690462e-06, + "log_odds_chosen": 1.1499160528182983, + "log_odds_ratio": -0.34458065032958984, + "logits/chosen": -2.81234073638916, + "logits/rejected": -2.8134610652923584, + "logps/chosen": -0.5100609660148621, + "logps/rejected": -1.1153619289398193, + "loss": 0.4097, + "nll_loss": 0.3220939040184021, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.10201219469308853, + "rewards/margins": 0.12106022983789444, + "rewards/rejected": -0.22307243943214417, + "step": 2270 + }, + { + "epoch": 1.63, + "grad_norm": 6.75, + "learning_rate": 4.51201797302771e-06, + "log_odds_chosen": 1.307038426399231, + "log_odds_ratio": -0.32670050859451294, + "logits/chosen": -2.855238676071167, + "logits/rejected": -2.830141544342041, + "logps/chosen": -0.5477741360664368, + "logps/rejected": -1.310255765914917, + "loss": 0.4111, + "nll_loss": 0.3261423110961914, + "rewards/accuracies": 0.893750011920929, + "rewards/chosen": -0.10955484211444855, + "rewards/margins": 0.152496337890625, + "rewards/rejected": -0.26205116510391235, + "step": 2280 + }, + { + "epoch": 1.64, + "grad_norm": 7.5, + "learning_rate": 4.510181953430622e-06, + "log_odds_chosen": 1.170881986618042, + "log_odds_ratio": -0.3537355363368988, + "logits/chosen": -2.7898900508880615, + "logits/rejected": -2.8005497455596924, + "logps/chosen": -0.5161569714546204, + "logps/rejected": -1.1166584491729736, + "loss": 0.4116, + "nll_loss": 0.33776649832725525, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.10323138535022736, + "rewards/margins": 0.12010029703378677, + "rewards/rejected": -0.22333166003227234, + "step": 2290 + }, + { + "epoch": 1.64, + "grad_norm": 6.59375, + "learning_rate": 4.508348173337162e-06, + "log_odds_chosen": 1.1670069694519043, + "log_odds_ratio": -0.3370336890220642, + "logits/chosen": -2.8468751907348633, + "logits/rejected": -2.8393032550811768, + "logps/chosen": -0.5258538722991943, + "logps/rejected": -1.1647305488586426, + "loss": 0.4118, + "nll_loss": 0.3415473699569702, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10517077147960663, + "rewards/margins": 0.12777534127235413, + "rewards/rejected": -0.23294611275196075, + "step": 2300 + }, + { + "epoch": 1.65, + "grad_norm": 7.21875, + "learning_rate": 4.5065166281982656e-06, + "log_odds_chosen": 1.2587502002716064, + "log_odds_ratio": -0.31747183203697205, + "logits/chosen": -2.833258867263794, + "logits/rejected": -2.828791379928589, + "logps/chosen": -0.5313466191291809, + "logps/rejected": -1.2055209875106812, + "loss": 0.3804, + "nll_loss": 0.3351854085922241, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.10626931488513947, + "rewards/margins": 0.1348349153995514, + "rewards/rejected": -0.24110420048236847, + "step": 2310 + }, + { + "epoch": 1.66, + "grad_norm": 6.375, + "learning_rate": 4.5046873134777955e-06, + "log_odds_chosen": 1.1143945455551147, + "log_odds_ratio": -0.3569260239601135, + "logits/chosen": -2.8429629802703857, + "logits/rejected": -2.8426883220672607, + "logps/chosen": -0.5169599652290344, + "logps/rejected": -1.0884437561035156, + "loss": 0.4024, + "nll_loss": 0.3241182565689087, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10339200496673584, + "rewards/margins": 0.11429674923419952, + "rewards/rejected": -0.21768875420093536, + "step": 2320 + }, + { + "epoch": 1.66, + "grad_norm": 6.125, + "learning_rate": 4.5028602246524934e-06, + "log_odds_chosen": 1.1783859729766846, + "log_odds_ratio": -0.3340161442756653, + "logits/chosen": -2.806804895401001, + "logits/rejected": -2.7988064289093018, + "logps/chosen": -0.5388362407684326, + "logps/rejected": -1.1798697710037231, + "loss": 0.3924, + "nll_loss": 0.34538668394088745, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1077672466635704, + "rewards/margins": 0.12820670008659363, + "rewards/rejected": -0.23597395420074463, + "step": 2330 + }, + { + "epoch": 1.67, + "grad_norm": 7.09375, + "learning_rate": 4.5010353572119344e-06, + "log_odds_chosen": 1.2434608936309814, + "log_odds_ratio": -0.3279341459274292, + "logits/chosen": -2.8133628368377686, + "logits/rejected": -2.803882360458374, + "logps/chosen": -0.5466577410697937, + "logps/rejected": -1.2451903820037842, + "loss": 0.4016, + "nll_loss": 0.32733532786369324, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.10933154821395874, + "rewards/margins": 0.13970652222633362, + "rewards/rejected": -0.24903810024261475, + "step": 2340 + }, + { + "epoch": 1.68, + "grad_norm": 6.65625, + "learning_rate": 4.499212706658476e-06, + "log_odds_chosen": 1.2468292713165283, + "log_odds_ratio": -0.33085036277770996, + "logits/chosen": -2.7604432106018066, + "logits/rejected": -2.767918586730957, + "logps/chosen": -0.5312229990959167, + "logps/rejected": -1.232725977897644, + "loss": 0.3905, + "nll_loss": 0.28453925251960754, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.10624460875988007, + "rewards/margins": 0.14030058681964874, + "rewards/rejected": -0.24654516577720642, + "step": 2350 + }, + { + "epoch": 1.69, + "grad_norm": 6.15625, + "learning_rate": 4.497392268507216e-06, + "log_odds_chosen": 1.3811371326446533, + "log_odds_ratio": -0.3187825083732605, + "logits/chosen": -2.7945587635040283, + "logits/rejected": -2.793541431427002, + "logps/chosen": -0.5095025897026062, + "logps/rejected": -1.2895538806915283, + "loss": 0.3888, + "nll_loss": 0.33172607421875, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.10190053284168243, + "rewards/margins": 0.15601028501987457, + "rewards/rejected": -0.2579107880592346, + "step": 2360 + }, + { + "epoch": 1.69, + "grad_norm": 8.1875, + "learning_rate": 4.495574038285945e-06, + "log_odds_chosen": 1.2981445789337158, + "log_odds_ratio": -0.3155536949634552, + "logits/chosen": -2.8101601600646973, + "logits/rejected": -2.8108277320861816, + "logps/chosen": -0.5573509335517883, + "logps/rejected": -1.3329538106918335, + "loss": 0.3898, + "nll_loss": 0.3312874436378479, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.11147018522024155, + "rewards/margins": 0.1551206111907959, + "rewards/rejected": -0.26659080386161804, + "step": 2370 + }, + { + "epoch": 1.7, + "grad_norm": 8.5625, + "learning_rate": 4.493758011535097e-06, + "log_odds_chosen": 1.2831408977508545, + "log_odds_ratio": -0.3103974163532257, + "logits/chosen": -2.823028564453125, + "logits/rejected": -2.813875436782837, + "logps/chosen": -0.5119468569755554, + "logps/rejected": -1.2149946689605713, + "loss": 0.4083, + "nll_loss": 0.32668930292129517, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.1023893728852272, + "rewards/margins": 0.1406095325946808, + "rewards/rejected": -0.2429989129304886, + "step": 2380 + }, + { + "epoch": 1.71, + "grad_norm": 9.375, + "learning_rate": 4.491944183807709e-06, + "log_odds_chosen": 1.1459182500839233, + "log_odds_ratio": -0.3687919080257416, + "logits/chosen": -2.850087881088257, + "logits/rejected": -2.8486053943634033, + "logps/chosen": -0.5623646974563599, + "logps/rejected": -1.2140840291976929, + "loss": 0.3556, + "nll_loss": 0.285260409116745, + "rewards/accuracies": 0.893750011920929, + "rewards/chosen": -0.11247295141220093, + "rewards/margins": 0.13034388422966003, + "rewards/rejected": -0.24281683564186096, + "step": 2390 + }, + { + "epoch": 1.71, + "grad_norm": 9.375, + "learning_rate": 4.490132550669373e-06, + "log_odds_chosen": 1.240378975868225, + "log_odds_ratio": -0.3130456507205963, + "logits/chosen": -2.9485726356506348, + "logits/rejected": -2.9329330921173096, + "logps/chosen": -0.5569415092468262, + "logps/rejected": -1.2546156644821167, + "loss": 0.4249, + "nll_loss": 0.3691002428531647, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.11138832569122314, + "rewards/margins": 0.1395348310470581, + "rewards/rejected": -0.25092315673828125, + "step": 2400 + }, + { + "epoch": 1.72, + "grad_norm": 7.0625, + "learning_rate": 4.488323107698186e-06, + "log_odds_chosen": 1.3868653774261475, + "log_odds_ratio": -0.2894250750541687, + "logits/chosen": -2.9048142433166504, + "logits/rejected": -2.893167018890381, + "logps/chosen": -0.4791548252105713, + "logps/rejected": -1.2176661491394043, + "loss": 0.4143, + "nll_loss": 0.3569006025791168, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.09583096206188202, + "rewards/margins": 0.14770226180553436, + "rewards/rejected": -0.24353322386741638, + "step": 2410 + }, + { + "epoch": 1.73, + "grad_norm": 7.21875, + "learning_rate": 4.486515850484713e-06, + "log_odds_chosen": 1.1212716102600098, + "log_odds_ratio": -0.3596242070198059, + "logits/chosen": -2.9160571098327637, + "logits/rejected": -2.9128189086914062, + "logps/chosen": -0.5376850366592407, + "logps/rejected": -1.1596351861953735, + "loss": 0.4265, + "nll_loss": 0.33928748965263367, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10753700882196426, + "rewards/margins": 0.12439004331827164, + "rewards/rejected": -0.2319270670413971, + "step": 2420 + }, + { + "epoch": 1.74, + "grad_norm": 8.5, + "learning_rate": 4.484710774631934e-06, + "log_odds_chosen": 1.241854190826416, + "log_odds_ratio": -0.3230142295360565, + "logits/chosen": -2.8752593994140625, + "logits/rejected": -2.8711416721343994, + "logps/chosen": -0.5400632619857788, + "logps/rejected": -1.2231850624084473, + "loss": 0.397, + "nll_loss": 0.3497290313243866, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.10801263898611069, + "rewards/margins": 0.13662435114383698, + "rewards/rejected": -0.24463701248168945, + "step": 2430 + }, + { + "epoch": 1.74, + "grad_norm": 7.28125, + "learning_rate": 4.482907875755205e-06, + "log_odds_chosen": 1.3686769008636475, + "log_odds_ratio": -0.2961091697216034, + "logits/chosen": -2.8909826278686523, + "logits/rejected": -2.897916078567505, + "logps/chosen": -0.5009250640869141, + "logps/rejected": -1.2302018404006958, + "loss": 0.4144, + "nll_loss": 0.36144185066223145, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.10018502175807953, + "rewards/margins": 0.14585533738136292, + "rewards/rejected": -0.24604037404060364, + "step": 2440 + }, + { + "epoch": 1.75, + "grad_norm": 7.40625, + "learning_rate": 4.481107149482208e-06, + "log_odds_chosen": 1.3370695114135742, + "log_odds_ratio": -0.30031710863113403, + "logits/chosen": -2.8956050872802734, + "logits/rejected": -2.8868050575256348, + "logps/chosen": -0.5050898194313049, + "logps/rejected": -1.255013108253479, + "loss": 0.3929, + "nll_loss": 0.3568061888217926, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10101795196533203, + "rewards/margins": 0.14998464286327362, + "rewards/rejected": -0.25100260972976685, + "step": 2450 + }, + { + "epoch": 1.76, + "grad_norm": 8.6875, + "learning_rate": 4.4793085914529136e-06, + "log_odds_chosen": 1.1334112882614136, + "log_odds_ratio": -0.3533148169517517, + "logits/chosen": -2.8731157779693604, + "logits/rejected": -2.8705520629882812, + "logps/chosen": -0.5255860090255737, + "logps/rejected": -1.1402482986450195, + "loss": 0.3965, + "nll_loss": 0.34123849868774414, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10511720180511475, + "rewards/margins": 0.12293247878551483, + "rewards/rejected": -0.22804968059062958, + "step": 2460 + }, + { + "epoch": 1.76, + "grad_norm": 10.6875, + "learning_rate": 4.477512197319528e-06, + "log_odds_chosen": 1.3563247919082642, + "log_odds_ratio": -0.32014063000679016, + "logits/chosen": -2.8848843574523926, + "logits/rejected": -2.879897356033325, + "logps/chosen": -0.5252963900566101, + "logps/rejected": -1.3075590133666992, + "loss": 0.3723, + "nll_loss": 0.30930250883102417, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10505928099155426, + "rewards/margins": 0.15645255148410797, + "rewards/rejected": -0.26151180267333984, + "step": 2470 + }, + { + "epoch": 1.77, + "grad_norm": 19.25, + "learning_rate": 4.475717962746456e-06, + "log_odds_chosen": 1.2710864543914795, + "log_odds_ratio": -0.3212870657444, + "logits/chosen": -2.875734806060791, + "logits/rejected": -2.869267702102661, + "logps/chosen": -0.5439091920852661, + "logps/rejected": -1.2747299671173096, + "loss": 0.3726, + "nll_loss": 0.30996406078338623, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1087818369269371, + "rewards/margins": 0.1461641490459442, + "rewards/rejected": -0.2549459636211395, + "step": 2480 + }, + { + "epoch": 1.78, + "grad_norm": 8.1875, + "learning_rate": 4.4739258834102515e-06, + "log_odds_chosen": 1.2608749866485596, + "log_odds_ratio": -0.346863329410553, + "logits/chosen": -2.85408878326416, + "logits/rejected": -2.849069595336914, + "logps/chosen": -0.5514413118362427, + "logps/rejected": -1.3013412952423096, + "loss": 0.3812, + "nll_loss": 0.33392244577407837, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11028827726840973, + "rewards/margins": 0.14997997879981995, + "rewards/rejected": -0.26026827096939087, + "step": 2490 + }, + { + "epoch": 1.79, + "grad_norm": 7.125, + "learning_rate": 4.47213595499958e-06, + "log_odds_chosen": 1.5626336336135864, + "log_odds_ratio": -0.27938312292099, + "logits/chosen": -2.928284168243408, + "logits/rejected": -2.90639328956604, + "logps/chosen": -0.5721508860588074, + "logps/rejected": -1.5081751346588135, + "loss": 0.3923, + "nll_loss": 0.3527207374572754, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.11443017423152924, + "rewards/margins": 0.18720485270023346, + "rewards/rejected": -0.3016350269317627, + "step": 2500 + }, + { + "epoch": 1.79, + "grad_norm": 6.65625, + "learning_rate": 4.470348173215168e-06, + "log_odds_chosen": 1.3578099012374878, + "log_odds_ratio": -0.3152514100074768, + "logits/chosen": -2.8871371746063232, + "logits/rejected": -2.874263286590576, + "logps/chosen": -0.5261938571929932, + "logps/rejected": -1.3208293914794922, + "loss": 0.3849, + "nll_loss": 0.33318930864334106, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.10523878037929535, + "rewards/margins": 0.15892712771892548, + "rewards/rejected": -0.26416587829589844, + "step": 2510 + }, + { + "epoch": 1.8, + "grad_norm": 12.5625, + "learning_rate": 4.468562533769766e-06, + "log_odds_chosen": 1.367193579673767, + "log_odds_ratio": -0.3165552616119385, + "logits/chosen": -2.8899295330047607, + "logits/rejected": -2.890939950942993, + "logps/chosen": -0.5429189205169678, + "logps/rejected": -1.3376638889312744, + "loss": 0.4111, + "nll_loss": 0.33587661385536194, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10858378559350967, + "rewards/margins": 0.15894900262355804, + "rewards/rejected": -0.2675327956676483, + "step": 2520 + }, + { + "epoch": 1.81, + "grad_norm": 12.8125, + "learning_rate": 4.4667790323881e-06, + "log_odds_chosen": 1.2805125713348389, + "log_odds_ratio": -0.33233442902565, + "logits/chosen": -2.871812105178833, + "logits/rejected": -2.852710723876953, + "logps/chosen": -0.6220626831054688, + "logps/rejected": -1.3669788837432861, + "loss": 0.4336, + "nll_loss": 0.4037063717842102, + "rewards/accuracies": 0.9312499761581421, + "rewards/chosen": -0.12441255897283554, + "rewards/margins": 0.14898324012756348, + "rewards/rejected": -0.2733957767486572, + "step": 2530 + }, + { + "epoch": 1.81, + "grad_norm": 7.0625, + "learning_rate": 4.464997664806832e-06, + "log_odds_chosen": 1.1691948175430298, + "log_odds_ratio": -0.35392698645591736, + "logits/chosen": -2.857517719268799, + "logits/rejected": -2.862380266189575, + "logps/chosen": -0.5593948364257812, + "logps/rejected": -1.201662302017212, + "loss": 0.3871, + "nll_loss": 0.31616219878196716, + "rewards/accuracies": 0.893750011920929, + "rewards/chosen": -0.11187896877527237, + "rewards/margins": 0.12845350801944733, + "rewards/rejected": -0.2403324842453003, + "step": 2540 + }, + { + "epoch": 1.82, + "grad_norm": 6.4375, + "learning_rate": 4.463218426774518e-06, + "log_odds_chosen": 1.4263107776641846, + "log_odds_ratio": -0.3046194911003113, + "logits/chosen": -2.891172170639038, + "logits/rejected": -2.8799169063568115, + "logps/chosen": -0.5329869389533997, + "logps/rejected": -1.3692753314971924, + "loss": 0.3873, + "nll_loss": 0.3430352509021759, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.10659738630056381, + "rewards/margins": 0.16725768148899078, + "rewards/rejected": -0.273855060338974, + "step": 2550 + }, + { + "epoch": 1.83, + "grad_norm": 6.40625, + "learning_rate": 4.461441314051561e-06, + "log_odds_chosen": 1.4294536113739014, + "log_odds_ratio": -0.2968199849128723, + "logits/chosen": -2.9138002395629883, + "logits/rejected": -2.8921501636505127, + "logps/chosen": -0.4996632933616638, + "logps/rejected": -1.3187425136566162, + "loss": 0.3745, + "nll_loss": 0.3486596941947937, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.09993267059326172, + "rewards/margins": 0.16381582617759705, + "rewards/rejected": -0.2637484669685364, + "step": 2560 + }, + { + "epoch": 1.84, + "grad_norm": 7.59375, + "learning_rate": 4.459666322410172e-06, + "log_odds_chosen": 1.3704941272735596, + "log_odds_ratio": -0.3186499774456024, + "logits/chosen": -2.883667469024658, + "logits/rejected": -2.8775885105133057, + "logps/chosen": -0.5285792350769043, + "logps/rejected": -1.3211987018585205, + "loss": 0.4048, + "nll_loss": 0.3316243290901184, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10571584850549698, + "rewards/margins": 0.15852385759353638, + "rewards/rejected": -0.26423972845077515, + "step": 2570 + }, + { + "epoch": 1.84, + "grad_norm": 9.1875, + "learning_rate": 4.457893447634326e-06, + "log_odds_chosen": 1.2892141342163086, + "log_odds_ratio": -0.33191436529159546, + "logits/chosen": -2.9250831604003906, + "logits/rejected": -2.902353286743164, + "logps/chosen": -0.580119788646698, + "logps/rejected": -1.3289637565612793, + "loss": 0.4149, + "nll_loss": 0.35767120122909546, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1160239577293396, + "rewards/margins": 0.14976876974105835, + "rewards/rejected": -0.26579275727272034, + "step": 2580 + }, + { + "epoch": 1.85, + "grad_norm": 10.5625, + "learning_rate": 4.456122685519721e-06, + "log_odds_chosen": 1.394887089729309, + "log_odds_ratio": -0.3056487441062927, + "logits/chosen": -2.8950932025909424, + "logits/rejected": -2.873574733734131, + "logps/chosen": -0.5190961956977844, + "logps/rejected": -1.3081409931182861, + "loss": 0.4005, + "nll_loss": 0.31329116225242615, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.10381922870874405, + "rewards/margins": 0.15780898928642273, + "rewards/rejected": -0.2616282105445862, + "step": 2590 + }, + { + "epoch": 1.86, + "grad_norm": 7.09375, + "learning_rate": 4.45435403187374e-06, + "log_odds_chosen": 1.507157325744629, + "log_odds_ratio": -0.2677215039730072, + "logits/chosen": -2.8969600200653076, + "logits/rejected": -2.88423752784729, + "logps/chosen": -0.4688204824924469, + "logps/rejected": -1.299645185470581, + "loss": 0.3953, + "nll_loss": 0.32218068838119507, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.09376410394906998, + "rewards/margins": 0.16616491973400116, + "rewards/rejected": -0.25992903113365173, + "step": 2600 + }, + { + "epoch": 1.86, + "grad_norm": 9.4375, + "learning_rate": 4.452587482515399e-06, + "log_odds_chosen": 1.3385193347930908, + "log_odds_ratio": -0.31111469864845276, + "logits/chosen": -2.907179355621338, + "logits/rejected": -2.8795833587646484, + "logps/chosen": -0.5588836669921875, + "logps/rejected": -1.3447484970092773, + "loss": 0.398, + "nll_loss": 0.3411378264427185, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.11177675426006317, + "rewards/margins": 0.15717296302318573, + "rewards/rejected": -0.2689497172832489, + "step": 2610 + }, + { + "epoch": 1.87, + "grad_norm": 9.5625, + "learning_rate": 4.450823033275315e-06, + "log_odds_chosen": 1.338440179824829, + "log_odds_ratio": -0.3163678050041199, + "logits/chosen": -2.9387764930725098, + "logits/rejected": -2.9056990146636963, + "logps/chosen": -0.5297420024871826, + "logps/rejected": -1.322826862335205, + "loss": 0.422, + "nll_loss": 0.3693048357963562, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.10594840347766876, + "rewards/margins": 0.15861697494983673, + "rewards/rejected": -0.2645653784275055, + "step": 2620 + }, + { + "epoch": 1.88, + "grad_norm": 7.65625, + "learning_rate": 4.4490606799956615e-06, + "log_odds_chosen": 1.4915597438812256, + "log_odds_ratio": -0.28770941495895386, + "logits/chosen": -2.8788347244262695, + "logits/rejected": -2.8595736026763916, + "logps/chosen": -0.5416139364242554, + "logps/rejected": -1.4380385875701904, + "loss": 0.4005, + "nll_loss": 0.34102195501327515, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10832278430461884, + "rewards/margins": 0.1792849451303482, + "rewards/rejected": -0.28760772943496704, + "step": 2630 + }, + { + "epoch": 1.89, + "grad_norm": 9.5625, + "learning_rate": 4.447300418530126e-06, + "log_odds_chosen": 1.3638993501663208, + "log_odds_ratio": -0.32332590222358704, + "logits/chosen": -2.916489601135254, + "logits/rejected": -2.8951425552368164, + "logps/chosen": -0.5407701730728149, + "logps/rejected": -1.3687996864318848, + "loss": 0.4186, + "nll_loss": 0.350581556558609, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.10815402120351791, + "rewards/margins": 0.16560593247413635, + "rewards/rejected": -0.27375996112823486, + "step": 2640 + }, + { + "epoch": 1.89, + "grad_norm": 8.4375, + "learning_rate": 4.4455422447438715e-06, + "log_odds_chosen": 1.3353352546691895, + "log_odds_ratio": -0.3252139091491699, + "logits/chosen": -2.880558729171753, + "logits/rejected": -2.8813929557800293, + "logps/chosen": -0.5225256681442261, + "logps/rejected": -1.2812443971633911, + "loss": 0.4082, + "nll_loss": 0.32984524965286255, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.10450513660907745, + "rewards/margins": 0.15174375474452972, + "rewards/rejected": -0.25624892115592957, + "step": 2650 + }, + { + "epoch": 1.9, + "grad_norm": 9.0625, + "learning_rate": 4.443786154513493e-06, + "log_odds_chosen": 1.541635513305664, + "log_odds_ratio": -0.2674759328365326, + "logits/chosen": -2.883507490158081, + "logits/rejected": -2.8745486736297607, + "logps/chosen": -0.5493952035903931, + "logps/rejected": -1.4771674871444702, + "loss": 0.3756, + "nll_loss": 0.2988761067390442, + "rewards/accuracies": 0.956250011920929, + "rewards/chosen": -0.1098790392279625, + "rewards/margins": 0.18555445969104767, + "rewards/rejected": -0.2954334616661072, + "step": 2660 + }, + { + "epoch": 1.91, + "grad_norm": 12.9375, + "learning_rate": 4.442032143726981e-06, + "log_odds_chosen": 1.3634321689605713, + "log_odds_ratio": -0.30965596437454224, + "logits/chosen": -2.8866159915924072, + "logits/rejected": -2.8669233322143555, + "logps/chosen": -0.602931022644043, + "logps/rejected": -1.4159696102142334, + "loss": 0.4085, + "nll_loss": 0.362027645111084, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.12058620154857635, + "rewards/margins": 0.16260775923728943, + "rewards/rejected": -0.283193975687027, + "step": 2670 + }, + { + "epoch": 1.91, + "grad_norm": 13.9375, + "learning_rate": 4.440280208283675e-06, + "log_odds_chosen": 1.3336797952651978, + "log_odds_ratio": -0.32034236192703247, + "logits/chosen": -2.9205827713012695, + "logits/rejected": -2.8959076404571533, + "logps/chosen": -0.5290694236755371, + "logps/rejected": -1.265318512916565, + "loss": 0.3972, + "nll_loss": 0.3247922360897064, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.1058138832449913, + "rewards/margins": 0.14724981784820557, + "rewards/rejected": -0.25306370854377747, + "step": 2680 + }, + { + "epoch": 1.92, + "grad_norm": 12.0625, + "learning_rate": 4.43853034409423e-06, + "log_odds_chosen": 1.4389830827713013, + "log_odds_ratio": -0.308421790599823, + "logits/chosen": -2.900420904159546, + "logits/rejected": -2.879181385040283, + "logps/chosen": -0.5513266324996948, + "logps/rejected": -1.4156787395477295, + "loss": 0.3989, + "nll_loss": 0.3391249179840088, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.11026531457901001, + "rewards/margins": 0.1728704273700714, + "rewards/rejected": -0.2831357419490814, + "step": 2690 + }, + { + "epoch": 1.93, + "grad_norm": 7.46875, + "learning_rate": 4.43678254708057e-06, + "log_odds_chosen": 1.2590786218643188, + "log_odds_ratio": -0.3339710831642151, + "logits/chosen": -2.8714280128479004, + "logits/rejected": -2.8531947135925293, + "logps/chosen": -0.5529037117958069, + "logps/rejected": -1.291151762008667, + "loss": 0.4083, + "nll_loss": 0.34685009717941284, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.11058074235916138, + "rewards/margins": 0.1476496458053589, + "rewards/rejected": -0.25823038816452026, + "step": 2700 + }, + { + "epoch": 1.94, + "grad_norm": 10.8125, + "learning_rate": 4.435036813175853e-06, + "log_odds_chosen": 1.296800971031189, + "log_odds_ratio": -0.3468873202800751, + "logits/chosen": -2.9166648387908936, + "logits/rejected": -2.905029773712158, + "logps/chosen": -0.5468270778656006, + "logps/rejected": -1.318878412246704, + "loss": 0.4244, + "nll_loss": 0.3611888289451599, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10936541855335236, + "rewards/margins": 0.15441028773784637, + "rewards/rejected": -0.26377567648887634, + "step": 2710 + }, + { + "epoch": 1.94, + "grad_norm": 7.59375, + "learning_rate": 4.4332931383244296e-06, + "log_odds_chosen": 1.5938103199005127, + "log_odds_ratio": -0.2786775231361389, + "logits/chosen": -2.84563946723938, + "logits/rejected": -2.81329345703125, + "logps/chosen": -0.5737396478652954, + "logps/rejected": -1.5809630155563354, + "loss": 0.4009, + "nll_loss": 0.3476629853248596, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.11474792659282684, + "rewards/margins": 0.20144470036029816, + "rewards/rejected": -0.316192626953125, + "step": 2720 + }, + { + "epoch": 1.95, + "grad_norm": 9.125, + "learning_rate": 4.431551518481802e-06, + "log_odds_chosen": 1.4222679138183594, + "log_odds_ratio": -0.30235162377357483, + "logits/chosen": -2.859123468399048, + "logits/rejected": -2.8459434509277344, + "logps/chosen": -0.5479883551597595, + "logps/rejected": -1.3775622844696045, + "loss": 0.3895, + "nll_loss": 0.31317585706710815, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.10959766805171967, + "rewards/margins": 0.16591483354568481, + "rewards/rejected": -0.2755124866962433, + "step": 2730 + }, + { + "epoch": 1.96, + "grad_norm": 8.1875, + "learning_rate": 4.429811949614588e-06, + "log_odds_chosen": 1.3053306341171265, + "log_odds_ratio": -0.3146135210990906, + "logits/chosen": -2.9177818298339844, + "logits/rejected": -2.8973758220672607, + "logps/chosen": -0.5131221413612366, + "logps/rejected": -1.2459287643432617, + "loss": 0.4246, + "nll_loss": 0.37119024991989136, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10262443125247955, + "rewards/margins": 0.14656129479408264, + "rewards/rejected": -0.2491857260465622, + "step": 2740 + }, + { + "epoch": 1.96, + "grad_norm": 6.4375, + "learning_rate": 4.428074427700477e-06, + "log_odds_chosen": 1.3217928409576416, + "log_odds_ratio": -0.30279672145843506, + "logits/chosen": -2.877779245376587, + "logits/rejected": -2.860002279281616, + "logps/chosen": -0.5184148550033569, + "logps/rejected": -1.2421989440917969, + "loss": 0.3943, + "nll_loss": 0.32501065731048584, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.1036829724907875, + "rewards/margins": 0.14475682377815247, + "rewards/rejected": -0.24843978881835938, + "step": 2750 + }, + { + "epoch": 1.97, + "grad_norm": 16.125, + "learning_rate": 4.426338948728195e-06, + "log_odds_chosen": 1.3941218852996826, + "log_odds_ratio": -0.3162110447883606, + "logits/chosen": -2.8602135181427, + "logits/rejected": -2.820308208465576, + "logps/chosen": -0.5790046453475952, + "logps/rejected": -1.4153722524642944, + "loss": 0.4456, + "nll_loss": 0.3927566409111023, + "rewards/accuracies": 0.9437500238418579, + "rewards/chosen": -0.11580093950033188, + "rewards/margins": 0.16727350652217865, + "rewards/rejected": -0.28307443857192993, + "step": 2760 + }, + { + "epoch": 1.98, + "grad_norm": 7.15625, + "learning_rate": 4.424605508697463e-06, + "log_odds_chosen": 1.4302750825881958, + "log_odds_ratio": -0.3106474280357361, + "logits/chosen": -2.8623061180114746, + "logits/rejected": -2.825331449508667, + "logps/chosen": -0.5542712211608887, + "logps/rejected": -1.4113256931304932, + "loss": 0.3867, + "nll_loss": 0.34051352739334106, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.11085423082113266, + "rewards/margins": 0.17141090333461761, + "rewards/rejected": -0.2822651267051697, + "step": 2770 + }, + { + "epoch": 1.99, + "grad_norm": 11.4375, + "learning_rate": 4.42287410361896e-06, + "log_odds_chosen": 1.4414396286010742, + "log_odds_ratio": -0.32269230484962463, + "logits/chosen": -2.8639965057373047, + "logits/rejected": -2.8246994018554688, + "logps/chosen": -0.5694876313209534, + "logps/rejected": -1.4324452877044678, + "loss": 0.3603, + "nll_loss": 0.307388037443161, + "rewards/accuracies": 0.8812500238418579, + "rewards/chosen": -0.11389752477407455, + "rewards/margins": 0.17259155213832855, + "rewards/rejected": -0.2864890694618225, + "step": 2780 + }, + { + "epoch": 1.99, + "grad_norm": 12.0625, + "learning_rate": 4.421144729514289e-06, + "log_odds_chosen": 1.4677602052688599, + "log_odds_ratio": -0.3058595061302185, + "logits/chosen": -2.8635823726654053, + "logits/rejected": -2.80998158454895, + "logps/chosen": -0.5519368052482605, + "logps/rejected": -1.4589126110076904, + "loss": 0.4043, + "nll_loss": 0.3577142357826233, + "rewards/accuracies": 0.918749988079071, + "rewards/chosen": -0.11038736253976822, + "rewards/margins": 0.18139515817165375, + "rewards/rejected": -0.2917824983596802, + "step": 2790 + }, + { + "epoch": 2.0, + "grad_norm": 12.75, + "learning_rate": 4.419417382415923e-06, + "log_odds_chosen": 1.4039043188095093, + "log_odds_ratio": -0.313946932554245, + "logits/chosen": -2.8399224281311035, + "logits/rejected": -2.791377544403076, + "logps/chosen": -0.5001746416091919, + "logps/rejected": -1.3304522037506104, + "loss": 0.3924, + "nll_loss": 0.3202509880065918, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1000349372625351, + "rewards/margins": 0.16605551540851593, + "rewards/rejected": -0.26609042286872864, + "step": 2800 + }, + { + "epoch": 2.01, + "grad_norm": 5.3125, + "learning_rate": 4.417692058367186e-06, + "log_odds_chosen": 2.910543441772461, + "log_odds_ratio": -0.10447581857442856, + "logits/chosen": -2.8468985557556152, + "logits/rejected": -2.7128536701202393, + "logps/chosen": -0.390835702419281, + "logps/rejected": -2.187709331512451, + "loss": 0.2834, + "nll_loss": 0.2709491550922394, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0781671404838562, + "rewards/margins": 0.35937467217445374, + "rewards/rejected": -0.43754181265830994, + "step": 2810 + }, + { + "epoch": 2.01, + "grad_norm": 9.5625, + "learning_rate": 4.415968753422204e-06, + "log_odds_chosen": 3.4466099739074707, + "log_odds_ratio": -0.07032948732376099, + "logits/chosen": -2.8091824054718018, + "logits/rejected": -2.6169466972351074, + "logps/chosen": -0.31530073285102844, + "logps/rejected": -2.4634060859680176, + "loss": 0.2586, + "nll_loss": 0.2396393120288849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06306014955043793, + "rewards/margins": 0.4296211302280426, + "rewards/rejected": -0.4926813244819641, + "step": 2820 + }, + { + "epoch": 2.02, + "grad_norm": 4.65625, + "learning_rate": 4.414247463645868e-06, + "log_odds_chosen": 3.7165610790252686, + "log_odds_ratio": -0.06657154858112335, + "logits/chosen": -2.7692294120788574, + "logits/rejected": -2.4765193462371826, + "logps/chosen": -0.37618759274482727, + "logps/rejected": -2.8358898162841797, + "loss": 0.2718, + "nll_loss": 0.2537682354450226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07523752003908157, + "rewards/margins": 0.491940438747406, + "rewards/rejected": -0.5671780109405518, + "step": 2830 + }, + { + "epoch": 2.03, + "grad_norm": 6.1875, + "learning_rate": 4.4125281851137995e-06, + "log_odds_chosen": 5.014394283294678, + "log_odds_ratio": -0.07537925243377686, + "logits/chosen": -2.721604108810425, + "logits/rejected": -2.253276824951172, + "logps/chosen": -0.3364471197128296, + "logps/rejected": -4.104693412780762, + "loss": 0.2853, + "nll_loss": 0.24688100814819336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06728943437337875, + "rewards/margins": 0.7536492943763733, + "rewards/rejected": -0.820938766002655, + "step": 2840 + }, + { + "epoch": 2.03, + "grad_norm": 7.8125, + "learning_rate": 4.41081091391231e-06, + "log_odds_chosen": 3.7566096782684326, + "log_odds_ratio": -0.06575653702020645, + "logits/chosen": -2.7676377296447754, + "logits/rejected": -2.4655532836914062, + "logps/chosen": -0.3585343360900879, + "logps/rejected": -2.8366215229034424, + "loss": 0.2814, + "nll_loss": 0.27807971835136414, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0717068687081337, + "rewards/margins": 0.49561747908592224, + "rewards/rejected": -0.5673243403434753, + "step": 2850 + }, + { + "epoch": 2.04, + "grad_norm": 13.875, + "learning_rate": 4.409095646138363e-06, + "log_odds_chosen": 3.4027340412139893, + "log_odds_ratio": -0.08646047860383987, + "logits/chosen": -2.78098726272583, + "logits/rejected": -2.575989246368408, + "logps/chosen": -0.3481005132198334, + "logps/rejected": -2.515467405319214, + "loss": 0.2687, + "nll_loss": 0.2460239678621292, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06962011754512787, + "rewards/margins": 0.4334734380245209, + "rewards/rejected": -0.5030934810638428, + "step": 2860 + }, + { + "epoch": 2.05, + "grad_norm": 5.5625, + "learning_rate": 4.4073823778995425e-06, + "log_odds_chosen": 3.673715114593506, + "log_odds_ratio": -0.06583552062511444, + "logits/chosen": -2.7850005626678467, + "logits/rejected": -2.458684206008911, + "logps/chosen": -0.3202974200248718, + "logps/rejected": -2.65244197845459, + "loss": 0.2405, + "nll_loss": 0.2267676144838333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06405948847532272, + "rewards/margins": 0.4664289355278015, + "rewards/rejected": -0.5304883718490601, + "step": 2870 + }, + { + "epoch": 2.06, + "grad_norm": 12.8125, + "learning_rate": 4.405671105314009e-06, + "log_odds_chosen": 4.673083782196045, + "log_odds_ratio": -0.05653030425310135, + "logits/chosen": -2.6835262775421143, + "logits/rejected": -2.0964083671569824, + "logps/chosen": -0.336531400680542, + "logps/rejected": -3.6344618797302246, + "loss": 0.2548, + "nll_loss": 0.23812773823738098, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0673062726855278, + "rewards/margins": 0.6595860719680786, + "rewards/rejected": -0.726892352104187, + "step": 2880 + }, + { + "epoch": 2.06, + "grad_norm": 5.71875, + "learning_rate": 4.4039618245104645e-06, + "log_odds_chosen": 5.218452453613281, + "log_odds_ratio": -0.06279017776250839, + "logits/chosen": -2.765897274017334, + "logits/rejected": -2.0521953105926514, + "logps/chosen": -0.3615598678588867, + "logps/rejected": -4.352395534515381, + "loss": 0.2837, + "nll_loss": 0.2878434956073761, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07231196761131287, + "rewards/margins": 0.7981672286987305, + "rewards/rejected": -0.8704792261123657, + "step": 2890 + }, + { + "epoch": 2.07, + "grad_norm": 9.125, + "learning_rate": 4.4022545316281195e-06, + "log_odds_chosen": 5.200108051300049, + "log_odds_ratio": -0.05653291940689087, + "logits/chosen": -2.7568302154541016, + "logits/rejected": -2.0886034965515137, + "logps/chosen": -0.34464651346206665, + "logps/rejected": -4.230252742767334, + "loss": 0.2593, + "nll_loss": 0.26374679803848267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06892929971218109, + "rewards/margins": 0.7771213054656982, + "rewards/rejected": -0.8460506200790405, + "step": 2900 + }, + { + "epoch": 2.08, + "grad_norm": 9.375, + "learning_rate": 4.40054922281665e-06, + "log_odds_chosen": 6.3368120193481445, + "log_odds_ratio": -0.04714951664209366, + "logits/chosen": -2.7386841773986816, + "logits/rejected": -2.0858142375946045, + "logps/chosen": -0.342684805393219, + "logps/rejected": -5.299139499664307, + "loss": 0.2966, + "nll_loss": 0.28953665494918823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06853695958852768, + "rewards/margins": 0.9912910461425781, + "rewards/rejected": -1.0598279237747192, + "step": 2910 + }, + { + "epoch": 2.08, + "grad_norm": 10.125, + "learning_rate": 4.398845894236168e-06, + "log_odds_chosen": 6.120891094207764, + "log_odds_ratio": -0.045149557292461395, + "logits/chosen": -2.792410135269165, + "logits/rejected": -2.072402000427246, + "logps/chosen": -0.3363955020904541, + "logps/rejected": -5.071963787078857, + "loss": 0.2825, + "nll_loss": 0.25938451290130615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06727909296751022, + "rewards/margins": 0.9471136927604675, + "rewards/rejected": -1.014392614364624, + "step": 2920 + }, + { + "epoch": 2.09, + "grad_norm": 8.875, + "learning_rate": 4.397144542057179e-06, + "log_odds_chosen": 4.736415863037109, + "log_odds_ratio": -0.06085144728422165, + "logits/chosen": -2.801853656768799, + "logits/rejected": -2.182584047317505, + "logps/chosen": -0.36493486166000366, + "logps/rejected": -3.7831199169158936, + "loss": 0.2756, + "nll_loss": 0.2436126172542572, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07298697531223297, + "rewards/margins": 0.6836371421813965, + "rewards/rejected": -0.7566241025924683, + "step": 2930 + }, + { + "epoch": 2.1, + "grad_norm": 7.75, + "learning_rate": 4.395445162460548e-06, + "log_odds_chosen": 6.897581577301025, + "log_odds_ratio": -0.03966455161571503, + "logits/chosen": -2.73842191696167, + "logits/rejected": -1.9335638284683228, + "logps/chosen": -0.3176586329936981, + "logps/rejected": -5.78334903717041, + "loss": 0.251, + "nll_loss": 0.23550419509410858, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06353173404932022, + "rewards/margins": 1.0931380987167358, + "rewards/rejected": -1.1566698551177979, + "step": 2940 + }, + { + "epoch": 2.11, + "grad_norm": 5.59375, + "learning_rate": 4.393747751637469e-06, + "log_odds_chosen": 10.120447158813477, + "log_odds_ratio": -0.03257293254137039, + "logits/chosen": -2.7335376739501953, + "logits/rejected": -1.6728357076644897, + "logps/chosen": -0.32205477356910706, + "logps/rejected": -8.999835968017578, + "loss": 0.2672, + "nll_loss": 0.2610648274421692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06441095471382141, + "rewards/margins": 1.7355563640594482, + "rewards/rejected": -1.7999674081802368, + "step": 2950 + }, + { + "epoch": 2.11, + "grad_norm": 14.3125, + "learning_rate": 4.392052305789416e-06, + "log_odds_chosen": 9.041637420654297, + "log_odds_ratio": -0.039078570902347565, + "logits/chosen": -2.7224085330963135, + "logits/rejected": -1.6541340351104736, + "logps/chosen": -0.3390257954597473, + "logps/rejected": -7.973293304443359, + "loss": 0.2984, + "nll_loss": 0.2727344036102295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06780517101287842, + "rewards/margins": 1.5268534421920776, + "rewards/rejected": -1.594658613204956, + "step": 2960 + }, + { + "epoch": 2.12, + "grad_norm": 10.8125, + "learning_rate": 4.390358821128123e-06, + "log_odds_chosen": 9.127324104309082, + "log_odds_ratio": -0.04081900417804718, + "logits/chosen": -2.6818902492523193, + "logits/rejected": -1.3260700702667236, + "logps/chosen": -0.3540460765361786, + "logps/rejected": -8.071810722351074, + "loss": 0.2843, + "nll_loss": 0.28363126516342163, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07080921530723572, + "rewards/margins": 1.5435529947280884, + "rewards/rejected": -1.6143620014190674, + "step": 2970 + }, + { + "epoch": 2.13, + "grad_norm": 18.0, + "learning_rate": 4.388667293875536e-06, + "log_odds_chosen": 5.46827507019043, + "log_odds_ratio": -0.13300354778766632, + "logits/chosen": -2.7808876037597656, + "logits/rejected": -2.0889151096343994, + "logps/chosen": -0.4834941327571869, + "logps/rejected": -4.695062160491943, + "loss": 0.275, + "nll_loss": 0.26671329140663147, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0966988280415535, + "rewards/margins": 0.8423136472702026, + "rewards/rejected": -0.9390126466751099, + "step": 2980 + }, + { + "epoch": 2.13, + "grad_norm": 169.0, + "learning_rate": 4.386977720263786e-06, + "log_odds_chosen": 6.041357040405273, + "log_odds_ratio": -0.05618007108569145, + "logits/chosen": -2.7575278282165527, + "logits/rejected": -1.7493776082992554, + "logps/chosen": -0.3787495195865631, + "logps/rejected": -5.060299873352051, + "loss": 0.2629, + "nll_loss": 0.26606082916259766, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.07574990391731262, + "rewards/margins": 0.9363101124763489, + "rewards/rejected": -1.0120599269866943, + "step": 2990 + }, + { + "epoch": 2.14, + "grad_norm": 5.9375, + "learning_rate": 4.385290096535147e-06, + "log_odds_chosen": 4.69273567199707, + "log_odds_ratio": -0.05490034073591232, + "logits/chosen": -2.8089425563812256, + "logits/rejected": -2.2398369312286377, + "logps/chosen": -0.30732420086860657, + "logps/rejected": -3.626305103302002, + "loss": 0.2486, + "nll_loss": 0.2282293289899826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.061464838683605194, + "rewards/margins": 0.663796067237854, + "rewards/rejected": -0.7252610325813293, + "step": 3000 + }, + { + "epoch": 2.15, + "grad_norm": 12.6875, + "learning_rate": 4.383604418942005e-06, + "log_odds_chosen": 6.116213321685791, + "log_odds_ratio": -0.03878642991185188, + "logits/chosen": -2.6938247680664062, + "logits/rejected": -1.8152358531951904, + "logps/chosen": -0.35414764285087585, + "logps/rejected": -5.104779243469238, + "loss": 0.2722, + "nll_loss": 0.252914160490036, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07082952558994293, + "rewards/margins": 0.9501264691352844, + "rewards/rejected": -1.0209559202194214, + "step": 3010 + }, + { + "epoch": 2.16, + "grad_norm": 6.125, + "learning_rate": 4.381920683746824e-06, + "log_odds_chosen": 5.905968189239502, + "log_odds_ratio": -0.0426829531788826, + "logits/chosen": -2.756247043609619, + "logits/rejected": -2.1121325492858887, + "logps/chosen": -0.3639851212501526, + "logps/rejected": -4.953127384185791, + "loss": 0.2756, + "nll_loss": 0.27040037512779236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0727970153093338, + "rewards/margins": 0.9178284406661987, + "rewards/rejected": -0.9906253814697266, + "step": 3020 + }, + { + "epoch": 2.16, + "grad_norm": 5.8125, + "learning_rate": 4.380238887222108e-06, + "log_odds_chosen": 6.093326568603516, + "log_odds_ratio": -0.04192028567194939, + "logits/chosen": -2.7782235145568848, + "logits/rejected": -1.9548759460449219, + "logps/chosen": -0.31508156657218933, + "logps/rejected": -4.972496032714844, + "loss": 0.2734, + "nll_loss": 0.2683493494987488, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.06301631033420563, + "rewards/margins": 0.9314829707145691, + "rewards/rejected": -0.9944992065429688, + "step": 3030 + }, + { + "epoch": 2.17, + "grad_norm": 6.03125, + "learning_rate": 4.378559025650368e-06, + "log_odds_chosen": 7.272418022155762, + "log_odds_ratio": -0.056223928928375244, + "logits/chosen": -2.7609264850616455, + "logits/rejected": -1.9744220972061157, + "logps/chosen": -0.3785443902015686, + "logps/rejected": -6.300936698913574, + "loss": 0.2611, + "nll_loss": 0.26268187165260315, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.07570887356996536, + "rewards/margins": 1.184478521347046, + "rewards/rejected": -1.2601875066757202, + "step": 3040 + }, + { + "epoch": 2.18, + "grad_norm": 5.21875, + "learning_rate": 4.376881095324086e-06, + "log_odds_chosen": 8.435240745544434, + "log_odds_ratio": -0.032564710825681686, + "logits/chosen": -2.803131580352783, + "logits/rejected": -1.676296591758728, + "logps/chosen": -0.3182581067085266, + "logps/rejected": -7.271533012390137, + "loss": 0.2522, + "nll_loss": 0.23994970321655273, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06365162134170532, + "rewards/margins": 1.3906551599502563, + "rewards/rejected": -1.454306721687317, + "step": 3050 + }, + { + "epoch": 2.18, + "grad_norm": 7.5625, + "learning_rate": 4.375205092545683e-06, + "log_odds_chosen": 8.230413436889648, + "log_odds_ratio": -0.05366669222712517, + "logits/chosen": -2.786719560623169, + "logits/rejected": -1.8391005992889404, + "logps/chosen": -0.3586600422859192, + "logps/rejected": -7.209918022155762, + "loss": 0.2779, + "nll_loss": 0.2507212460041046, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07173201441764832, + "rewards/margins": 1.3702516555786133, + "rewards/rejected": -1.4419835805892944, + "step": 3060 + }, + { + "epoch": 2.19, + "grad_norm": 5.125, + "learning_rate": 4.373531013627483e-06, + "log_odds_chosen": 8.445428848266602, + "log_odds_ratio": -0.03792678564786911, + "logits/chosen": -2.7394258975982666, + "logits/rejected": -1.4675519466400146, + "logps/chosen": -0.3647417426109314, + "logps/rejected": -7.469712734222412, + "loss": 0.2809, + "nll_loss": 0.26669415831565857, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07294835150241852, + "rewards/margins": 1.4209941625595093, + "rewards/rejected": -1.4939426183700562, + "step": 3070 + }, + { + "epoch": 2.2, + "grad_norm": 170.0, + "learning_rate": 4.371858854891681e-06, + "log_odds_chosen": 6.993101596832275, + "log_odds_ratio": -0.06284736096858978, + "logits/chosen": -2.7770023345947266, + "logits/rejected": -1.9066228866577148, + "logps/chosen": -0.36158767342567444, + "logps/rejected": -6.016913414001465, + "loss": 0.2731, + "nll_loss": 0.25095489621162415, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07231752574443817, + "rewards/margins": 1.1310651302337646, + "rewards/rejected": -1.2033826112747192, + "step": 3080 + }, + { + "epoch": 2.21, + "grad_norm": 20.375, + "learning_rate": 4.370188612670307e-06, + "log_odds_chosen": 6.399539947509766, + "log_odds_ratio": -0.05658464506268501, + "logits/chosen": -2.7655324935913086, + "logits/rejected": -1.907060980796814, + "logps/chosen": -0.3928866684436798, + "logps/rejected": -5.48469877243042, + "loss": 0.2814, + "nll_loss": 0.2612989544868469, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07857732474803925, + "rewards/margins": 1.0183625221252441, + "rewards/rejected": -1.0969398021697998, + "step": 3090 + }, + { + "epoch": 2.21, + "grad_norm": 10.8125, + "learning_rate": 4.36852028330519e-06, + "log_odds_chosen": 6.503146171569824, + "log_odds_ratio": -0.04645160958170891, + "logits/chosen": -2.803072929382324, + "logits/rejected": -1.9909429550170898, + "logps/chosen": -0.38660162687301636, + "logps/rejected": -5.571995735168457, + "loss": 0.2914, + "nll_loss": 0.2930335998535156, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07732032984495163, + "rewards/margins": 1.0370789766311646, + "rewards/rejected": -1.1143993139266968, + "step": 3100 + }, + { + "epoch": 2.22, + "grad_norm": 7.46875, + "learning_rate": 4.3668538631479314e-06, + "log_odds_chosen": 6.382928848266602, + "log_odds_ratio": -0.04722817987203598, + "logits/chosen": -2.793405055999756, + "logits/rejected": -1.897592544555664, + "logps/chosen": -0.36716800928115845, + "logps/rejected": -5.360346794128418, + "loss": 0.2719, + "nll_loss": 0.2766880989074707, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07343360781669617, + "rewards/margins": 0.9986356496810913, + "rewards/rejected": -1.0720694065093994, + "step": 3110 + }, + { + "epoch": 2.23, + "grad_norm": 7.53125, + "learning_rate": 4.365189348559864e-06, + "log_odds_chosen": 7.294475555419922, + "log_odds_ratio": -0.04280845448374748, + "logits/chosen": -2.794233560562134, + "logits/rejected": -1.9600608348846436, + "logps/chosen": -0.3509625494480133, + "logps/rejected": -6.329296112060547, + "loss": 0.26, + "nll_loss": 0.24876642227172852, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07019250839948654, + "rewards/margins": 1.1956669092178345, + "rewards/rejected": -1.2658593654632568, + "step": 3120 + }, + { + "epoch": 2.23, + "grad_norm": 105.0, + "learning_rate": 4.363526735912025e-06, + "log_odds_chosen": 8.40985107421875, + "log_odds_ratio": -0.03858170285820961, + "logits/chosen": -2.7838852405548096, + "logits/rejected": -1.66619873046875, + "logps/chosen": -0.39186400175094604, + "logps/rejected": -7.510270118713379, + "loss": 0.2711, + "nll_loss": 0.27240419387817383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07837279886007309, + "rewards/margins": 1.423681378364563, + "rewards/rejected": -1.5020540952682495, + "step": 3130 + }, + { + "epoch": 2.24, + "grad_norm": 48.25, + "learning_rate": 4.361866021585114e-06, + "log_odds_chosen": 8.032949447631836, + "log_odds_ratio": -0.03759379684925079, + "logits/chosen": -2.8125336170196533, + "logits/rejected": -1.9667034149169922, + "logps/chosen": -0.3394584059715271, + "logps/rejected": -6.949484348297119, + "loss": 0.277, + "nll_loss": 0.2678987979888916, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0678916722536087, + "rewards/margins": 1.322005271911621, + "rewards/rejected": -1.3898969888687134, + "step": 3140 + }, + { + "epoch": 2.25, + "grad_norm": 9.625, + "learning_rate": 4.360207201969474e-06, + "log_odds_chosen": 6.880602836608887, + "log_odds_ratio": -0.040961816906929016, + "logits/chosen": -2.7715115547180176, + "logits/rejected": -2.049297571182251, + "logps/chosen": -0.34252291917800903, + "logps/rejected": -5.822251319885254, + "loss": 0.2519, + "nll_loss": 0.23448018729686737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06850457936525345, + "rewards/margins": 1.0959457159042358, + "rewards/rejected": -1.1644504070281982, + "step": 3150 + }, + { + "epoch": 2.26, + "grad_norm": 120.0, + "learning_rate": 4.358550273465042e-06, + "log_odds_chosen": 7.489804267883301, + "log_odds_ratio": -0.030518781393766403, + "logits/chosen": -2.8056952953338623, + "logits/rejected": -1.9100799560546875, + "logps/chosen": -0.3443801999092102, + "logps/rejected": -6.379244804382324, + "loss": 0.267, + "nll_loss": 0.25786659121513367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06887603551149368, + "rewards/margins": 1.2069729566574097, + "rewards/rejected": -1.2758489847183228, + "step": 3160 + }, + { + "epoch": 2.26, + "grad_norm": 6.875, + "learning_rate": 4.356895232481328e-06, + "log_odds_chosen": 8.904826164245605, + "log_odds_ratio": -0.02953409031033516, + "logits/chosen": -2.8109521865844727, + "logits/rejected": -1.7148170471191406, + "logps/chosen": -0.32581400871276855, + "logps/rejected": -7.794666290283203, + "loss": 0.2778, + "nll_loss": 0.2546003758907318, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.065162792801857, + "rewards/margins": 1.4937704801559448, + "rewards/rejected": -1.5589332580566406, + "step": 3170 + }, + { + "epoch": 2.27, + "grad_norm": 64.5, + "learning_rate": 4.3552420754373795e-06, + "log_odds_chosen": 9.826112747192383, + "log_odds_ratio": -0.03542652726173401, + "logits/chosen": -2.755399465560913, + "logits/rejected": -1.6918747425079346, + "logps/chosen": -0.37103694677352905, + "logps/rejected": -8.79539680480957, + "loss": 0.2858, + "nll_loss": 0.27970394492149353, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07420738786458969, + "rewards/margins": 1.6848719120025635, + "rewards/rejected": -1.7590793371200562, + "step": 3180 + }, + { + "epoch": 2.28, + "grad_norm": 71.0, + "learning_rate": 4.353590798761745e-06, + "log_odds_chosen": 7.645397186279297, + "log_odds_ratio": -0.041773177683353424, + "logits/chosen": -2.7663168907165527, + "logits/rejected": -1.89877188205719, + "logps/chosen": -0.3466086685657501, + "logps/rejected": -6.663360595703125, + "loss": 0.2852, + "nll_loss": 0.2754463255405426, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06932173669338226, + "rewards/margins": 1.263350248336792, + "rewards/rejected": -1.3326722383499146, + "step": 3190 + }, + { + "epoch": 2.28, + "grad_norm": 8.0, + "learning_rate": 4.351941398892446e-06, + "log_odds_chosen": 10.17717170715332, + "log_odds_ratio": -0.02503364160656929, + "logits/chosen": -2.7787466049194336, + "logits/rejected": -1.7833493947982788, + "logps/chosen": -0.38776397705078125, + "logps/rejected": -9.250219345092773, + "loss": 0.291, + "nll_loss": 0.3181122839450836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07755279541015625, + "rewards/margins": 1.7724910974502563, + "rewards/rejected": -1.8500440120697021, + "step": 3200 + }, + { + "epoch": 2.29, + "grad_norm": 4.6875, + "learning_rate": 4.350293872276944e-06, + "log_odds_chosen": 8.712306022644043, + "log_odds_ratio": -0.028896396979689598, + "logits/chosen": -2.7661192417144775, + "logits/rejected": -1.6439266204833984, + "logps/chosen": -0.3586076498031616, + "logps/rejected": -7.733296871185303, + "loss": 0.2939, + "nll_loss": 0.28883105516433716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07172153890132904, + "rewards/margins": 1.474937915802002, + "rewards/rejected": -1.5466594696044922, + "step": 3210 + }, + { + "epoch": 2.3, + "grad_norm": 14.3125, + "learning_rate": 4.348648215372106e-06, + "log_odds_chosen": 9.273715019226074, + "log_odds_ratio": -0.025510499253869057, + "logits/chosen": -2.7492473125457764, + "logits/rejected": -1.7804412841796875, + "logps/chosen": -0.3289060592651367, + "logps/rejected": -8.170428276062012, + "loss": 0.287, + "nll_loss": 0.2702774405479431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06578120589256287, + "rewards/margins": 1.568304419517517, + "rewards/rejected": -1.6340856552124023, + "step": 3220 + }, + { + "epoch": 2.31, + "grad_norm": 5.53125, + "learning_rate": 4.347004424644176e-06, + "log_odds_chosen": 9.255958557128906, + "log_odds_ratio": -0.04089302942156792, + "logits/chosen": -2.817354202270508, + "logits/rejected": -1.8950843811035156, + "logps/chosen": -0.32880938053131104, + "logps/rejected": -8.185022354125977, + "loss": 0.2918, + "nll_loss": 0.2752782106399536, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06576187163591385, + "rewards/margins": 1.5712426900863647, + "rewards/rejected": -1.6370046138763428, + "step": 3230 + }, + { + "epoch": 2.31, + "grad_norm": 6.40625, + "learning_rate": 4.34536249656874e-06, + "log_odds_chosen": 8.917261123657227, + "log_odds_ratio": -0.04923417791724205, + "logits/chosen": -2.776374101638794, + "logits/rejected": -1.8802967071533203, + "logps/chosen": -0.42338424921035767, + "logps/rejected": -7.919134616851807, + "loss": 0.2844, + "nll_loss": 0.29589223861694336, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.0846768468618393, + "rewards/margins": 1.499150037765503, + "rewards/rejected": -1.583827018737793, + "step": 3240 + }, + { + "epoch": 2.32, + "grad_norm": 6.5, + "learning_rate": 4.3437224276306945e-06, + "log_odds_chosen": 7.567285060882568, + "log_odds_ratio": -0.028490770608186722, + "logits/chosen": -2.786663770675659, + "logits/rejected": -1.9782333374023438, + "logps/chosen": -0.33940792083740234, + "logps/rejected": -6.447556495666504, + "loss": 0.3091, + "nll_loss": 0.3036688566207886, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06788158416748047, + "rewards/margins": 1.2216296195983887, + "rewards/rejected": -1.2895113229751587, + "step": 3250 + }, + { + "epoch": 2.33, + "grad_norm": 17.875, + "learning_rate": 4.342084214324218e-06, + "log_odds_chosen": 7.1914873123168945, + "log_odds_ratio": -0.0445329025387764, + "logits/chosen": -2.756364345550537, + "logits/rejected": -2.037761926651001, + "logps/chosen": -0.3597845435142517, + "logps/rejected": -6.1982855796813965, + "loss": 0.3067, + "nll_loss": 0.2898246645927429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07195691019296646, + "rewards/margins": 1.1677000522613525, + "rewards/rejected": -1.2396571636199951, + "step": 3260 + }, + { + "epoch": 2.33, + "grad_norm": 13.0625, + "learning_rate": 4.340447853152738e-06, + "log_odds_chosen": 8.178131103515625, + "log_odds_ratio": -0.05221225693821907, + "logits/chosen": -2.783968925476074, + "logits/rejected": -1.8930259943008423, + "logps/chosen": -0.36348551511764526, + "logps/rejected": -7.173810005187988, + "loss": 0.2735, + "nll_loss": 0.258183091878891, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07269710302352905, + "rewards/margins": 1.3620648384094238, + "rewards/rejected": -1.434761881828308, + "step": 3270 + }, + { + "epoch": 2.34, + "grad_norm": 5.71875, + "learning_rate": 4.338813340628896e-06, + "log_odds_chosen": 9.553061485290527, + "log_odds_ratio": -0.037361737340688705, + "logits/chosen": -2.797703266143799, + "logits/rejected": -1.5537981986999512, + "logps/chosen": -0.33459705114364624, + "logps/rejected": -8.510927200317383, + "loss": 0.2714, + "nll_loss": 0.27574431896209717, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06691941618919373, + "rewards/margins": 1.6352659463882446, + "rewards/rejected": -1.7021853923797607, + "step": 3280 + }, + { + "epoch": 2.35, + "grad_norm": 12.9375, + "learning_rate": 4.337180673274523e-06, + "log_odds_chosen": 8.518575668334961, + "log_odds_ratio": -0.04755989462137222, + "logits/chosen": -2.8275094032287598, + "logits/rejected": -1.775490403175354, + "logps/chosen": -0.33448418974876404, + "logps/rejected": -7.48303747177124, + "loss": 0.2834, + "nll_loss": 0.27971869707107544, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06689684092998505, + "rewards/margins": 1.4297107458114624, + "rewards/rejected": -1.4966075420379639, + "step": 3290 + }, + { + "epoch": 2.36, + "grad_norm": 18.625, + "learning_rate": 4.3355498476206e-06, + "log_odds_chosen": 9.549077033996582, + "log_odds_ratio": -0.0418349914252758, + "logits/chosen": -2.8091912269592285, + "logits/rejected": -1.7388540506362915, + "logps/chosen": -0.3228936493396759, + "logps/rejected": -8.422323226928711, + "loss": 0.2922, + "nll_loss": 0.28200000524520874, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06457872688770294, + "rewards/margins": 1.6198861598968506, + "rewards/rejected": -1.6844650506973267, + "step": 3300 + }, + { + "epoch": 2.36, + "grad_norm": 5.375, + "learning_rate": 4.333920860207238e-06, + "log_odds_chosen": 9.765109062194824, + "log_odds_ratio": -0.03472736105322838, + "logits/chosen": -2.820134162902832, + "logits/rejected": -1.793215036392212, + "logps/chosen": -0.4387420117855072, + "logps/rejected": -8.810163497924805, + "loss": 0.2826, + "nll_loss": 0.2727591395378113, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08774841576814651, + "rewards/margins": 1.6742843389511108, + "rewards/rejected": -1.7620328664779663, + "step": 3310 + }, + { + "epoch": 2.37, + "grad_norm": 9.0625, + "learning_rate": 4.332293707583636e-06, + "log_odds_chosen": 9.640665054321289, + "log_odds_ratio": -0.03734064847230911, + "logits/chosen": -2.774531841278076, + "logits/rejected": -1.7828537225723267, + "logps/chosen": -0.41496315598487854, + "logps/rejected": -8.623726844787598, + "loss": 0.3401, + "nll_loss": 0.3533519506454468, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08299262821674347, + "rewards/margins": 1.6417526006698608, + "rewards/rejected": -1.7247453927993774, + "step": 3320 + }, + { + "epoch": 2.38, + "grad_norm": 48.75, + "learning_rate": 4.330668386308059e-06, + "log_odds_chosen": 8.390340805053711, + "log_odds_ratio": -0.03434724360704422, + "logits/chosen": -2.824404716491699, + "logits/rejected": -1.6728851795196533, + "logps/chosen": -0.36389368772506714, + "logps/rejected": -7.420858860015869, + "loss": 0.2978, + "nll_loss": 0.28047677874565125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07277874648571014, + "rewards/margins": 1.4113929271697998, + "rewards/rejected": -1.4841716289520264, + "step": 3330 + }, + { + "epoch": 2.38, + "grad_norm": 27.25, + "learning_rate": 4.329044892947799e-06, + "log_odds_chosen": 7.926604270935059, + "log_odds_ratio": -0.04124735668301582, + "logits/chosen": -2.8356728553771973, + "logits/rejected": -1.8978474140167236, + "logps/chosen": -0.3497302830219269, + "logps/rejected": -6.923464775085449, + "loss": 0.258, + "nll_loss": 0.24766549468040466, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06994606554508209, + "rewards/margins": 1.3147468566894531, + "rewards/rejected": -1.384692907333374, + "step": 3340 + }, + { + "epoch": 2.39, + "grad_norm": 56.0, + "learning_rate": 4.327423224079155e-06, + "log_odds_chosen": 6.804099082946777, + "log_odds_ratio": -0.05955817550420761, + "logits/chosen": -2.8271944522857666, + "logits/rejected": -2.0780460834503174, + "logps/chosen": -0.3145996928215027, + "logps/rejected": -5.751158237457275, + "loss": 0.2661, + "nll_loss": 0.25528645515441895, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06291993707418442, + "rewards/margins": 1.087311863899231, + "rewards/rejected": -1.1502315998077393, + "step": 3350 + }, + { + "epoch": 2.4, + "grad_norm": 21.5, + "learning_rate": 4.325803376287392e-06, + "log_odds_chosen": 8.253396987915039, + "log_odds_ratio": -0.0472552627325058, + "logits/chosen": -2.797362804412842, + "logits/rejected": -1.7111046314239502, + "logps/chosen": -0.35209912061691284, + "logps/rejected": -7.188366889953613, + "loss": 0.2921, + "nll_loss": 0.3038533329963684, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07041982561349869, + "rewards/margins": 1.3672535419464111, + "rewards/rejected": -1.4376734495162964, + "step": 3360 + }, + { + "epoch": 2.41, + "grad_norm": 8.6875, + "learning_rate": 4.32418534616672e-06, + "log_odds_chosen": 10.038507461547852, + "log_odds_ratio": -0.025366192683577538, + "logits/chosen": -2.804220676422119, + "logits/rejected": -1.6713545322418213, + "logps/chosen": -0.35934942960739136, + "logps/rejected": -9.00309944152832, + "loss": 0.2658, + "nll_loss": 0.26834237575531006, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0718698799610138, + "rewards/margins": 1.728750228881836, + "rewards/rejected": -1.8006200790405273, + "step": 3370 + }, + { + "epoch": 2.41, + "grad_norm": 6.0, + "learning_rate": 4.322569130320256e-06, + "log_odds_chosen": 9.466753005981445, + "log_odds_ratio": -0.03348467871546745, + "logits/chosen": -2.8029136657714844, + "logits/rejected": -1.865025520324707, + "logps/chosen": -0.38565486669540405, + "logps/rejected": -8.445741653442383, + "loss": 0.2768, + "nll_loss": 0.2976570129394531, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07713097333908081, + "rewards/margins": 1.6120173931121826, + "rewards/rejected": -1.6891483068466187, + "step": 3380 + }, + { + "epoch": 2.42, + "grad_norm": 13.5, + "learning_rate": 4.320954725359999e-06, + "log_odds_chosen": 9.79299259185791, + "log_odds_ratio": -0.027992457151412964, + "logits/chosen": -2.841531753540039, + "logits/rejected": -1.9643710851669312, + "logps/chosen": -0.35286644101142883, + "logps/rejected": -8.675617218017578, + "loss": 0.2939, + "nll_loss": 0.3023623824119568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07057328522205353, + "rewards/margins": 1.6645504236221313, + "rewards/rejected": -1.735123634338379, + "step": 3390 + }, + { + "epoch": 2.43, + "grad_norm": 41.25, + "learning_rate": 4.319342127906801e-06, + "log_odds_chosen": 9.294361114501953, + "log_odds_ratio": -0.045621536672115326, + "logits/chosen": -2.8277316093444824, + "logits/rejected": -1.8078911304473877, + "logps/chosen": -0.36391180753707886, + "logps/rejected": -8.31618595123291, + "loss": 0.2618, + "nll_loss": 0.25773972272872925, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07278236001729965, + "rewards/margins": 1.5904548168182373, + "rewards/rejected": -1.66323721408844, + "step": 3400 + }, + { + "epoch": 2.43, + "grad_norm": 5.65625, + "learning_rate": 4.317731334590332e-06, + "log_odds_chosen": 9.953015327453613, + "log_odds_ratio": -0.04586916044354439, + "logits/chosen": -2.7547903060913086, + "logits/rejected": -1.697317361831665, + "logps/chosen": -0.3400295674800873, + "logps/rejected": -8.843751907348633, + "loss": 0.2692, + "nll_loss": 0.24716739356517792, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.06800590455532074, + "rewards/margins": 1.700744390487671, + "rewards/rejected": -1.7687504291534424, + "step": 3410 + }, + { + "epoch": 2.44, + "grad_norm": 6.78125, + "learning_rate": 4.316122342049056e-06, + "log_odds_chosen": 11.804905891418457, + "log_odds_ratio": -0.023801427334547043, + "logits/chosen": -2.755643844604492, + "logits/rejected": -1.4954235553741455, + "logps/chosen": -0.4441162943840027, + "logps/rejected": -10.793535232543945, + "loss": 0.2901, + "nll_loss": 0.3134029507637024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08882326632738113, + "rewards/margins": 2.0698838233947754, + "rewards/rejected": -2.1587071418762207, + "step": 3420 + }, + { + "epoch": 2.45, + "grad_norm": 7.03125, + "learning_rate": 4.314515146930197e-06, + "log_odds_chosen": 9.51766586303711, + "log_odds_ratio": -0.029458215460181236, + "logits/chosen": -2.814960241317749, + "logits/rejected": -1.9240309000015259, + "logps/chosen": -0.41664624214172363, + "logps/rejected": -8.569226264953613, + "loss": 0.2687, + "nll_loss": 0.25920039415359497, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08332924544811249, + "rewards/margins": 1.6305160522460938, + "rewards/rejected": -1.7138452529907227, + "step": 3430 + }, + { + "epoch": 2.46, + "grad_norm": 5.875, + "learning_rate": 4.312909745889715e-06, + "log_odds_chosen": 7.934914588928223, + "log_odds_ratio": -0.033872656524181366, + "logits/chosen": -2.7913076877593994, + "logits/rejected": -1.9354534149169922, + "logps/chosen": -0.411059707403183, + "logps/rejected": -7.0791335105896, + "loss": 0.2602, + "nll_loss": 0.24955859780311584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.082211934030056, + "rewards/margins": 1.3336145877838135, + "rewards/rejected": -1.4158265590667725, + "step": 3440 + }, + { + "epoch": 2.46, + "grad_norm": 37.75, + "learning_rate": 4.311306135592269e-06, + "log_odds_chosen": 8.438997268676758, + "log_odds_ratio": -0.04086356610059738, + "logits/chosen": -2.8126413822174072, + "logits/rejected": -1.8445075750350952, + "logps/chosen": -0.3259912133216858, + "logps/rejected": -7.326315879821777, + "loss": 0.2547, + "nll_loss": 0.24355188012123108, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06519824266433716, + "rewards/margins": 1.4000650644302368, + "rewards/rejected": -1.4652631282806396, + "step": 3450 + }, + { + "epoch": 2.47, + "grad_norm": 11.5625, + "learning_rate": 4.309704312711197e-06, + "log_odds_chosen": 7.0475311279296875, + "log_odds_ratio": -0.04972859099507332, + "logits/chosen": -2.7898261547088623, + "logits/rejected": -2.0793161392211914, + "logps/chosen": -0.3466077148914337, + "logps/rejected": -5.998042106628418, + "loss": 0.2998, + "nll_loss": 0.27084097266197205, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06932154297828674, + "rewards/margins": 1.1302868127822876, + "rewards/rejected": -1.199608564376831, + "step": 3460 + }, + { + "epoch": 2.48, + "grad_norm": 25.0, + "learning_rate": 4.3081042739284794e-06, + "log_odds_chosen": 9.243393898010254, + "log_odds_ratio": -0.03272037208080292, + "logits/chosen": -2.832883834838867, + "logits/rejected": -1.8257993459701538, + "logps/chosen": -0.3495597839355469, + "logps/rejected": -8.209525108337402, + "loss": 0.264, + "nll_loss": 0.24593329429626465, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06991195678710938, + "rewards/margins": 1.5719932317733765, + "rewards/rejected": -1.6419051885604858, + "step": 3470 + }, + { + "epoch": 2.48, + "grad_norm": 11.125, + "learning_rate": 4.306506015934716e-06, + "log_odds_chosen": 8.312199592590332, + "log_odds_ratio": -0.024369016289711, + "logits/chosen": -2.8706321716308594, + "logits/rejected": -1.9261465072631836, + "logps/chosen": -0.3649846911430359, + "logps/rejected": -7.237170219421387, + "loss": 0.2681, + "nll_loss": 0.25165751576423645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07299693673849106, + "rewards/margins": 1.3744370937347412, + "rewards/rejected": -1.4474341869354248, + "step": 3480 + }, + { + "epoch": 2.49, + "grad_norm": 58.5, + "learning_rate": 4.304909535429091e-06, + "log_odds_chosen": 10.501726150512695, + "log_odds_ratio": -0.029145091772079468, + "logits/chosen": -2.8260130882263184, + "logits/rejected": -1.6204798221588135, + "logps/chosen": -0.3936752378940582, + "logps/rejected": -9.544812202453613, + "loss": 0.2738, + "nll_loss": 0.2682205140590668, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07873504608869553, + "rewards/margins": 1.8302274942398071, + "rewards/rejected": -1.9089622497558594, + "step": 3490 + }, + { + "epoch": 2.5, + "grad_norm": 15.9375, + "learning_rate": 4.303314829119352e-06, + "log_odds_chosen": 10.030715942382812, + "log_odds_ratio": -0.04658619314432144, + "logits/chosen": -2.796156406402588, + "logits/rejected": -1.8100440502166748, + "logps/chosen": -0.3424500823020935, + "logps/rejected": -8.992310523986816, + "loss": 0.2764, + "nll_loss": 0.24179503321647644, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06849001348018646, + "rewards/margins": 1.7299721240997314, + "rewards/rejected": -1.798462152481079, + "step": 3500 + }, + { + "epoch": 2.51, + "grad_norm": 122.5, + "learning_rate": 4.301721893721773e-06, + "log_odds_chosen": 10.208802223205566, + "log_odds_ratio": -0.04213592782616615, + "logits/chosen": -2.797102212905884, + "logits/rejected": -1.4663350582122803, + "logps/chosen": -0.3950093686580658, + "logps/rejected": -9.20128345489502, + "loss": 0.301, + "nll_loss": 0.3496857285499573, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07900188118219376, + "rewards/margins": 1.7612546682357788, + "rewards/rejected": -1.840256690979004, + "step": 3510 + }, + { + "epoch": 2.51, + "grad_norm": 10.25, + "learning_rate": 4.300130725961134e-06, + "log_odds_chosen": 6.270288467407227, + "log_odds_ratio": -0.05212852358818054, + "logits/chosen": -2.8619461059570312, + "logits/rejected": -2.255450487136841, + "logps/chosen": -0.36604180932044983, + "logps/rejected": -5.2709221839904785, + "loss": 0.265, + "nll_loss": 0.26578956842422485, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07320836186408997, + "rewards/margins": 0.9809761047363281, + "rewards/rejected": -1.0541845560073853, + "step": 3520 + }, + { + "epoch": 2.52, + "grad_norm": 10.125, + "learning_rate": 4.298541322570686e-06, + "log_odds_chosen": 6.3298749923706055, + "log_odds_ratio": -0.03688116371631622, + "logits/chosen": -2.834110736846924, + "logits/rejected": -2.0629196166992188, + "logps/chosen": -0.3773978352546692, + "logps/rejected": -5.346269130706787, + "loss": 0.247, + "nll_loss": 0.24589797854423523, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07547955214977264, + "rewards/margins": 0.9937742948532104, + "rewards/rejected": -1.0692538022994995, + "step": 3530 + }, + { + "epoch": 2.53, + "grad_norm": 136.0, + "learning_rate": 4.296953680292129e-06, + "log_odds_chosen": 7.077627658843994, + "log_odds_ratio": -0.06007321551442146, + "logits/chosen": -2.754781484603882, + "logits/rejected": -1.8707349300384521, + "logps/chosen": -0.41770386695861816, + "logps/rejected": -6.089178085327148, + "loss": 0.3063, + "nll_loss": 0.3370429575443268, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08354077488183975, + "rewards/margins": 1.134294867515564, + "rewards/rejected": -1.2178356647491455, + "step": 3540 + }, + { + "epoch": 2.53, + "grad_norm": 7.1875, + "learning_rate": 4.295367795875578e-06, + "log_odds_chosen": 4.167150974273682, + "log_odds_ratio": -0.06860674917697906, + "logits/chosen": -2.8772263526916504, + "logits/rejected": -2.3703644275665283, + "logps/chosen": -0.3609137237071991, + "logps/rejected": -3.2382397651672363, + "loss": 0.2878, + "nll_loss": 0.3157772421836853, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07218273729085922, + "rewards/margins": 0.575465202331543, + "rewards/rejected": -0.6476479172706604, + "step": 3550 + }, + { + "epoch": 2.54, + "grad_norm": 7.9375, + "learning_rate": 4.293783666079539e-06, + "log_odds_chosen": 6.502009391784668, + "log_odds_ratio": -0.03050677478313446, + "logits/chosen": -2.6957783699035645, + "logits/rejected": -1.993639349937439, + "logps/chosen": -0.31069567799568176, + "logps/rejected": -5.301680088043213, + "loss": 0.2675, + "nll_loss": 0.2550848424434662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06213913485407829, + "rewards/margins": 0.9981969594955444, + "rewards/rejected": -1.0603359937667847, + "step": 3560 + }, + { + "epoch": 2.55, + "grad_norm": 8.875, + "learning_rate": 4.292201287670881e-06, + "log_odds_chosen": 7.6693291664123535, + "log_odds_ratio": -0.04327741265296936, + "logits/chosen": -2.78936505317688, + "logits/rejected": -1.8133246898651123, + "logps/chosen": -0.42709359526634216, + "logps/rejected": -6.817948818206787, + "loss": 0.3028, + "nll_loss": 0.2974828779697418, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0854187160730362, + "rewards/margins": 1.2781710624694824, + "rewards/rejected": -1.363589882850647, + "step": 3570 + }, + { + "epoch": 2.56, + "grad_norm": 7.90625, + "learning_rate": 4.2906206574248056e-06, + "log_odds_chosen": 7.7335991859436035, + "log_odds_ratio": -0.03188318759202957, + "logits/chosen": -2.823167085647583, + "logits/rejected": -1.8172123432159424, + "logps/chosen": -0.39690691232681274, + "logps/rejected": -6.871090888977051, + "loss": 0.2829, + "nll_loss": 0.26852673292160034, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07938139140605927, + "rewards/margins": 1.2948367595672607, + "rewards/rejected": -1.3742179870605469, + "step": 3580 + }, + { + "epoch": 2.56, + "grad_norm": 8.375, + "learning_rate": 4.289041772124823e-06, + "log_odds_chosen": 7.018429756164551, + "log_odds_ratio": -0.04488766938447952, + "logits/chosen": -2.8090367317199707, + "logits/rejected": -2.016038417816162, + "logps/chosen": -0.4279165267944336, + "logps/rejected": -6.177850246429443, + "loss": 0.2934, + "nll_loss": 0.27853289246559143, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.08558329194784164, + "rewards/margins": 1.1499868631362915, + "rewards/rejected": -1.235569953918457, + "step": 3590 + }, + { + "epoch": 2.57, + "grad_norm": 7.84375, + "learning_rate": 4.2874646285627205e-06, + "log_odds_chosen": 7.609469413757324, + "log_odds_ratio": -0.029052983969449997, + "logits/chosen": -2.792881727218628, + "logits/rejected": -1.8695917129516602, + "logps/chosen": -0.35575172305107117, + "logps/rejected": -6.599902153015137, + "loss": 0.2679, + "nll_loss": 0.26382261514663696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07115034759044647, + "rewards/margins": 1.2488303184509277, + "rewards/rejected": -1.3199807405471802, + "step": 3600 + }, + { + "epoch": 2.58, + "grad_norm": 11.0625, + "learning_rate": 4.2858892235385405e-06, + "log_odds_chosen": 9.122172355651855, + "log_odds_ratio": -0.02934536337852478, + "logits/chosen": -2.7800889015197754, + "logits/rejected": -1.775040626525879, + "logps/chosen": -0.40309590101242065, + "logps/rejected": -8.151556968688965, + "loss": 0.2977, + "nll_loss": 0.28484228253364563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08061918616294861, + "rewards/margins": 1.5496922731399536, + "rewards/rejected": -1.630311369895935, + "step": 3610 + }, + { + "epoch": 2.58, + "grad_norm": 5.03125, + "learning_rate": 4.2843155538605454e-06, + "log_odds_chosen": 9.74396800994873, + "log_odds_ratio": -0.03267233446240425, + "logits/chosen": -2.8342878818511963, + "logits/rejected": -1.560414433479309, + "logps/chosen": -0.358869731426239, + "logps/rejected": -8.761539459228516, + "loss": 0.286, + "nll_loss": 0.2713310122489929, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.071773961186409, + "rewards/margins": 1.680533766746521, + "rewards/rejected": -1.7523078918457031, + "step": 3620 + }, + { + "epoch": 2.59, + "grad_norm": 8.3125, + "learning_rate": 4.2827436163452e-06, + "log_odds_chosen": 8.96192741394043, + "log_odds_ratio": -0.025754928588867188, + "logits/chosen": -2.807020664215088, + "logits/rejected": -1.7215229272842407, + "logps/chosen": -0.37024766206741333, + "logps/rejected": -7.972418785095215, + "loss": 0.2918, + "nll_loss": 0.2985715866088867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07404953241348267, + "rewards/margins": 1.5204341411590576, + "rewards/rejected": -1.5944838523864746, + "step": 3630 + }, + { + "epoch": 2.6, + "grad_norm": 27.375, + "learning_rate": 4.2811734078171365e-06, + "log_odds_chosen": 9.322843551635742, + "log_odds_ratio": -0.02989858388900757, + "logits/chosen": -2.800328016281128, + "logits/rejected": -1.6582973003387451, + "logps/chosen": -0.33669382333755493, + "logps/rejected": -8.228538513183594, + "loss": 0.2924, + "nll_loss": 0.294689804315567, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06733877211809158, + "rewards/margins": 1.578368902206421, + "rewards/rejected": -1.6457077264785767, + "step": 3640 + }, + { + "epoch": 2.61, + "grad_norm": 83.5, + "learning_rate": 4.27960492510913e-06, + "log_odds_chosen": 10.60124397277832, + "log_odds_ratio": -0.03527144342660904, + "logits/chosen": -2.7842440605163574, + "logits/rejected": -1.780975103378296, + "logps/chosen": -0.35910865664482117, + "logps/rejected": -9.61752986907959, + "loss": 0.2886, + "nll_loss": 0.26610469818115234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07182172685861588, + "rewards/margins": 1.851684331893921, + "rewards/rejected": -1.9235061407089233, + "step": 3650 + }, + { + "epoch": 2.61, + "grad_norm": 11.6875, + "learning_rate": 4.278038165062074e-06, + "log_odds_chosen": 9.46485424041748, + "log_odds_ratio": -0.030444592237472534, + "logits/chosen": -2.7923178672790527, + "logits/rejected": -1.7434918880462646, + "logps/chosen": -0.36185431480407715, + "logps/rejected": -8.415678977966309, + "loss": 0.2739, + "nll_loss": 0.2679918110370636, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07237086445093155, + "rewards/margins": 1.6107648611068726, + "rewards/rejected": -1.6831356287002563, + "step": 3660 + }, + { + "epoch": 2.62, + "grad_norm": 11.875, + "learning_rate": 4.276473124524951e-06, + "log_odds_chosen": 10.846599578857422, + "log_odds_ratio": -0.0410008430480957, + "logits/chosen": -2.814073324203491, + "logits/rejected": -1.7574056386947632, + "logps/chosen": -0.3752827048301697, + "logps/rejected": -9.809711456298828, + "loss": 0.3038, + "nll_loss": 0.2877393960952759, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0750565379858017, + "rewards/margins": 1.886885643005371, + "rewards/rejected": -1.9619423151016235, + "step": 3670 + }, + { + "epoch": 2.63, + "grad_norm": 80.5, + "learning_rate": 4.274909800354809e-06, + "log_odds_chosen": 8.6845703125, + "log_odds_ratio": -0.04010792821645737, + "logits/chosen": -2.8353562355041504, + "logits/rejected": -1.9117534160614014, + "logps/chosen": -0.36866775155067444, + "logps/rejected": -7.723712921142578, + "loss": 0.2716, + "nll_loss": 0.28985413908958435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07373355329036713, + "rewards/margins": 1.4710088968276978, + "rewards/rejected": -1.5447423458099365, + "step": 3680 + }, + { + "epoch": 2.63, + "grad_norm": 25.5, + "learning_rate": 4.27334818941673e-06, + "log_odds_chosen": 11.139304161071777, + "log_odds_ratio": -0.02806129679083824, + "logits/chosen": -2.829277515411377, + "logits/rejected": -1.7117398977279663, + "logps/chosen": -0.43500715494155884, + "logps/rejected": -10.296022415161133, + "loss": 0.298, + "nll_loss": 0.30640918016433716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08700142055749893, + "rewards/margins": 1.972203254699707, + "rewards/rejected": -2.0592048168182373, + "step": 3690 + }, + { + "epoch": 2.64, + "grad_norm": 14.0625, + "learning_rate": 4.271788288583805e-06, + "log_odds_chosen": 9.920567512512207, + "log_odds_ratio": -0.0430777445435524, + "logits/chosen": -2.800262928009033, + "logits/rejected": -1.6749203205108643, + "logps/chosen": -0.3537217676639557, + "logps/rejected": -8.953767776489258, + "loss": 0.292, + "nll_loss": 0.28976768255233765, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.0707443580031395, + "rewards/margins": 1.720009207725525, + "rewards/rejected": -1.7907536029815674, + "step": 3700 + }, + { + "epoch": 2.65, + "grad_norm": 71.0, + "learning_rate": 4.270230094737115e-06, + "log_odds_chosen": 10.269146919250488, + "log_odds_ratio": -0.03263562172651291, + "logits/chosen": -2.8407130241394043, + "logits/rejected": -1.8693710565567017, + "logps/chosen": -0.3560786247253418, + "logps/rejected": -9.248159408569336, + "loss": 0.2528, + "nll_loss": 0.24508798122406006, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.07121572643518448, + "rewards/margins": 1.7784162759780884, + "rewards/rejected": -1.8496320247650146, + "step": 3710 + }, + { + "epoch": 2.66, + "grad_norm": 163.0, + "learning_rate": 4.268673604765692e-06, + "log_odds_chosen": 8.313056945800781, + "log_odds_ratio": -0.04137984663248062, + "logits/chosen": -2.8410933017730713, + "logits/rejected": -1.973070502281189, + "logps/chosen": -0.3569386601448059, + "logps/rejected": -7.325540065765381, + "loss": 0.2936, + "nll_loss": 0.3143910765647888, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07138773798942566, + "rewards/margins": 1.3937203884124756, + "rewards/rejected": -1.4651081562042236, + "step": 3720 + }, + { + "epoch": 2.66, + "grad_norm": 69.0, + "learning_rate": 4.267118815566505e-06, + "log_odds_chosen": 11.062416076660156, + "log_odds_ratio": -0.02929757535457611, + "logits/chosen": -2.8256027698516846, + "logits/rejected": -1.8267700672149658, + "logps/chosen": -0.34090739488601685, + "logps/rejected": -10.022697448730469, + "loss": 0.2669, + "nll_loss": 0.2610389292240143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06818147003650665, + "rewards/margins": 1.936357855796814, + "rewards/rejected": -2.0045394897460938, + "step": 3730 + }, + { + "epoch": 2.67, + "grad_norm": 7.21875, + "learning_rate": 4.265565724044426e-06, + "log_odds_chosen": 9.198521614074707, + "log_odds_ratio": -0.03330180048942566, + "logits/chosen": -2.8640224933624268, + "logits/rejected": -1.98549485206604, + "logps/chosen": -0.41554588079452515, + "logps/rejected": -8.330385208129883, + "loss": 0.2867, + "nll_loss": 0.29421791434288025, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08310917764902115, + "rewards/margins": 1.58296799659729, + "rewards/rejected": -1.6660770177841187, + "step": 3740 + }, + { + "epoch": 2.68, + "grad_norm": 26.125, + "learning_rate": 4.264014327112208e-06, + "log_odds_chosen": 8.376938819885254, + "log_odds_ratio": -0.03763898089528084, + "logits/chosen": -2.8337912559509277, + "logits/rejected": -1.9237620830535889, + "logps/chosen": -0.33510148525238037, + "logps/rejected": -7.281014919281006, + "loss": 0.2649, + "nll_loss": 0.23882155120372772, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06702030450105667, + "rewards/margins": 1.389182686805725, + "rewards/rejected": -1.4562032222747803, + "step": 3750 + }, + { + "epoch": 2.68, + "grad_norm": 6.375, + "learning_rate": 4.26246462169046e-06, + "log_odds_chosen": 7.407693386077881, + "log_odds_ratio": -0.0290813185274601, + "logits/chosen": -2.852565050125122, + "logits/rejected": -2.0294179916381836, + "logps/chosen": -0.3306184411048889, + "logps/rejected": -6.291057586669922, + "loss": 0.2842, + "nll_loss": 0.28192299604415894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06612369418144226, + "rewards/margins": 1.1920878887176514, + "rewards/rejected": -1.2582114934921265, + "step": 3760 + }, + { + "epoch": 2.69, + "grad_norm": 27.5, + "learning_rate": 4.260916604707614e-06, + "log_odds_chosen": 8.492695808410645, + "log_odds_ratio": -0.040566060692071915, + "logits/chosen": -2.8278088569641113, + "logits/rejected": -1.880413293838501, + "logps/chosen": -0.40683627128601074, + "logps/rejected": -7.554902076721191, + "loss": 0.269, + "nll_loss": 0.2897428870201111, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.08136724680662155, + "rewards/margins": 1.4296132326126099, + "rewards/rejected": -1.5109803676605225, + "step": 3770 + }, + { + "epoch": 2.7, + "grad_norm": 5.75, + "learning_rate": 4.25937027309991e-06, + "log_odds_chosen": 7.113955497741699, + "log_odds_ratio": -0.034956224262714386, + "logits/chosen": -2.863619089126587, + "logits/rejected": -2.0861170291900635, + "logps/chosen": -0.3817751407623291, + "logps/rejected": -6.190094947814941, + "loss": 0.3086, + "nll_loss": 0.32031726837158203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07635502517223358, + "rewards/margins": 1.1616640090942383, + "rewards/rejected": -1.2380189895629883, + "step": 3780 + }, + { + "epoch": 2.71, + "grad_norm": 11.5625, + "learning_rate": 4.257825623811364e-06, + "log_odds_chosen": 6.6032867431640625, + "log_odds_ratio": -0.04721903055906296, + "logits/chosen": -2.870551824569702, + "logits/rejected": -2.1494574546813965, + "logps/chosen": -0.3325832486152649, + "logps/rejected": -5.56205940246582, + "loss": 0.2733, + "nll_loss": 0.2754889130592346, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06651665270328522, + "rewards/margins": 1.0458953380584717, + "rewards/rejected": -1.1124117374420166, + "step": 3790 + }, + { + "epoch": 2.71, + "grad_norm": 13.625, + "learning_rate": 4.256282653793743e-06, + "log_odds_chosen": 9.226226806640625, + "log_odds_ratio": -0.04071826487779617, + "logits/chosen": -2.8239336013793945, + "logits/rejected": -1.799629807472229, + "logps/chosen": -0.37041693925857544, + "logps/rejected": -8.24487018585205, + "loss": 0.2801, + "nll_loss": 0.2625892162322998, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07408339530229568, + "rewards/margins": 1.5748907327651978, + "rewards/rejected": -1.648974061012268, + "step": 3800 + }, + { + "epoch": 2.72, + "grad_norm": 43.0, + "learning_rate": 4.254741360006543e-06, + "log_odds_chosen": 7.27142858505249, + "log_odds_ratio": -0.04866673797369003, + "logits/chosen": -2.8658313751220703, + "logits/rejected": -2.249227523803711, + "logps/chosen": -0.3399750590324402, + "logps/rejected": -6.300660133361816, + "loss": 0.2534, + "nll_loss": 0.22867396473884583, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06799499690532684, + "rewards/margins": 1.1921371221542358, + "rewards/rejected": -1.2601318359375, + "step": 3810 + }, + { + "epoch": 2.73, + "grad_norm": 7.5625, + "learning_rate": 4.25320173941696e-06, + "log_odds_chosen": 8.403349876403809, + "log_odds_ratio": -0.03414331004023552, + "logits/chosen": -2.849536657333374, + "logits/rejected": -2.041189193725586, + "logps/chosen": -0.3983810842037201, + "logps/rejected": -7.541253566741943, + "loss": 0.277, + "nll_loss": 0.2703934609889984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07967622578144073, + "rewards/margins": 1.428574562072754, + "rewards/rejected": -1.5082508325576782, + "step": 3820 + }, + { + "epoch": 2.73, + "grad_norm": 11.6875, + "learning_rate": 4.251663788999866e-06, + "log_odds_chosen": 8.267216682434082, + "log_odds_ratio": -0.03860500827431679, + "logits/chosen": -2.8517775535583496, + "logits/rejected": -2.037395477294922, + "logps/chosen": -0.35646042227745056, + "logps/rejected": -7.292947292327881, + "loss": 0.2727, + "nll_loss": 0.262928307056427, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07129208743572235, + "rewards/margins": 1.3872973918914795, + "rewards/rejected": -1.4585894346237183, + "step": 3830 + }, + { + "epoch": 2.74, + "grad_norm": 24.625, + "learning_rate": 4.250127505737787e-06, + "log_odds_chosen": 9.158185958862305, + "log_odds_ratio": -0.02719098888337612, + "logits/chosen": -2.81072735786438, + "logits/rejected": -1.8471095561981201, + "logps/chosen": -0.3558461368083954, + "logps/rejected": -8.152399063110352, + "loss": 0.2867, + "nll_loss": 0.2748354375362396, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07116921991109848, + "rewards/margins": 1.5593106746673584, + "rewards/rejected": -1.6304798126220703, + "step": 3840 + }, + { + "epoch": 2.75, + "grad_norm": 6.21875, + "learning_rate": 4.2485928866208736e-06, + "log_odds_chosen": 5.923229217529297, + "log_odds_ratio": -0.0449279360473156, + "logits/chosen": -2.8480160236358643, + "logits/rejected": -2.3773159980773926, + "logps/chosen": -0.3430297374725342, + "logps/rejected": -4.906435012817383, + "loss": 0.2786, + "nll_loss": 0.26425907015800476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0686059445142746, + "rewards/margins": 0.9126811027526855, + "rewards/rejected": -0.981286883354187, + "step": 3850 + }, + { + "epoch": 2.76, + "grad_norm": 11.9375, + "learning_rate": 4.247059928646881e-06, + "log_odds_chosen": 9.403549194335938, + "log_odds_ratio": -0.04165271669626236, + "logits/chosen": -2.783756971359253, + "logits/rejected": -1.7871586084365845, + "logps/chosen": -0.44730639457702637, + "logps/rejected": -8.516668319702148, + "loss": 0.3078, + "nll_loss": 0.2746645212173462, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08946128189563751, + "rewards/margins": 1.6138725280761719, + "rewards/rejected": -1.7033336162567139, + "step": 3860 + }, + { + "epoch": 2.76, + "grad_norm": 39.25, + "learning_rate": 4.245528628821135e-06, + "log_odds_chosen": 8.89954662322998, + "log_odds_ratio": -0.02781747281551361, + "logits/chosen": -2.8360252380371094, + "logits/rejected": -1.8986726999282837, + "logps/chosen": -0.3417971730232239, + "logps/rejected": -7.838662147521973, + "loss": 0.2597, + "nll_loss": 0.28470245003700256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06835943460464478, + "rewards/margins": 1.4993728399276733, + "rewards/rejected": -1.5677324533462524, + "step": 3870 + }, + { + "epoch": 2.77, + "grad_norm": 203.0, + "learning_rate": 4.243998984156526e-06, + "log_odds_chosen": 9.094325065612793, + "log_odds_ratio": -0.033297426998615265, + "logits/chosen": -2.8292903900146484, + "logits/rejected": -2.0152010917663574, + "logps/chosen": -0.35362544655799866, + "logps/rejected": -8.119251251220703, + "loss": 0.2861, + "nll_loss": 0.26851797103881836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07072508335113525, + "rewards/margins": 1.5531253814697266, + "rewards/rejected": -1.6238505840301514, + "step": 3880 + }, + { + "epoch": 2.78, + "grad_norm": 7.0, + "learning_rate": 4.242470991673459e-06, + "log_odds_chosen": 8.757810592651367, + "log_odds_ratio": -0.02989915944635868, + "logits/chosen": -2.8213298320770264, + "logits/rejected": -2.0070536136627197, + "logps/chosen": -0.4033467173576355, + "logps/rejected": -7.789436340332031, + "loss": 0.2885, + "nll_loss": 0.27011099457740784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0806693509221077, + "rewards/margins": 1.4772179126739502, + "rewards/rejected": -1.5578871965408325, + "step": 3890 + }, + { + "epoch": 2.78, + "grad_norm": 247.0, + "learning_rate": 4.240944648399854e-06, + "log_odds_chosen": 9.707173347473145, + "log_odds_ratio": -0.030115026980638504, + "logits/chosen": -2.812969923019409, + "logits/rejected": -1.7806400060653687, + "logps/chosen": -0.3756954073905945, + "logps/rejected": -8.687716484069824, + "loss": 0.2924, + "nll_loss": 0.2944888770580292, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07513907551765442, + "rewards/margins": 1.6624042987823486, + "rewards/rejected": -1.7375433444976807, + "step": 3900 + }, + { + "epoch": 2.79, + "grad_norm": 7.3125, + "learning_rate": 4.239419951371107e-06, + "log_odds_chosen": 7.855190277099609, + "log_odds_ratio": -0.049606241285800934, + "logits/chosen": -2.8382956981658936, + "logits/rejected": -1.9690237045288086, + "logps/chosen": -0.3678521513938904, + "logps/rejected": -6.8809919357299805, + "loss": 0.246, + "nll_loss": 0.24365882575511932, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.07357043772935867, + "rewards/margins": 1.3026279211044312, + "rewards/rejected": -1.3761985301971436, + "step": 3910 + }, + { + "epoch": 2.8, + "grad_norm": 8.0625, + "learning_rate": 4.237896897630065e-06, + "log_odds_chosen": 9.103668212890625, + "log_odds_ratio": -0.01939046010375023, + "logits/chosen": -2.8209850788116455, + "logits/rejected": -1.7593700885772705, + "logps/chosen": -0.3552018702030182, + "logps/rejected": -8.111700057983398, + "loss": 0.2691, + "nll_loss": 0.26674580574035645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07104037702083588, + "rewards/margins": 1.5512996912002563, + "rewards/rejected": -1.6223399639129639, + "step": 3920 + }, + { + "epoch": 2.81, + "grad_norm": 6.625, + "learning_rate": 4.2363754842270135e-06, + "log_odds_chosen": 8.898262023925781, + "log_odds_ratio": -0.03263109177350998, + "logits/chosen": -2.8059921264648438, + "logits/rejected": -1.9720518589019775, + "logps/chosen": -0.3654606342315674, + "logps/rejected": -7.9605865478515625, + "loss": 0.2722, + "nll_loss": 0.24651777744293213, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07309212535619736, + "rewards/margins": 1.5190250873565674, + "rewards/rejected": -1.592117190361023, + "step": 3930 + }, + { + "epoch": 2.81, + "grad_norm": 10.9375, + "learning_rate": 4.23485570821964e-06, + "log_odds_chosen": 10.18175220489502, + "log_odds_ratio": -0.029537459835410118, + "logits/chosen": -2.805284023284912, + "logits/rejected": -1.7588096857070923, + "logps/chosen": -0.3637697100639343, + "logps/rejected": -9.15390682220459, + "loss": 0.2921, + "nll_loss": 0.28845328092575073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07275393605232239, + "rewards/margins": 1.7580273151397705, + "rewards/rejected": -1.8307812213897705, + "step": 3940 + }, + { + "epoch": 2.82, + "grad_norm": 276.0, + "learning_rate": 4.233337566673017e-06, + "log_odds_chosen": 10.128081321716309, + "log_odds_ratio": -0.05651165917515755, + "logits/chosen": -2.7642838954925537, + "logits/rejected": -1.8633878231048584, + "logps/chosen": -0.4686155915260315, + "logps/rejected": -9.309432029724121, + "loss": 0.299, + "nll_loss": 0.2807057797908783, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.0937231183052063, + "rewards/margins": 1.7681634426116943, + "rewards/rejected": -1.8618863821029663, + "step": 3950 + }, + { + "epoch": 2.83, + "grad_norm": 9.0625, + "learning_rate": 4.2318210566595795e-06, + "log_odds_chosen": 10.621389389038086, + "log_odds_ratio": -0.01841096207499504, + "logits/chosen": -2.835549831390381, + "logits/rejected": -1.8261913061141968, + "logps/chosen": -0.3856840133666992, + "logps/rejected": -9.641094207763672, + "loss": 0.2804, + "nll_loss": 0.28866028785705566, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.0771368071436882, + "rewards/margins": 1.851082444190979, + "rewards/rejected": -1.928219199180603, + "step": 3960 + }, + { + "epoch": 2.83, + "grad_norm": 50.5, + "learning_rate": 4.230306175259094e-06, + "log_odds_chosen": 8.465916633605957, + "log_odds_ratio": -0.029557716101408005, + "logits/chosen": -2.8356223106384277, + "logits/rejected": -2.1212961673736572, + "logps/chosen": -0.3706379532814026, + "logps/rejected": -7.479179382324219, + "loss": 0.2831, + "nll_loss": 0.28696635365486145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07412759214639664, + "rewards/margins": 1.421708345413208, + "rewards/rejected": -1.495835781097412, + "step": 3970 + }, + { + "epoch": 2.84, + "grad_norm": 9.375, + "learning_rate": 4.228792919558642e-06, + "log_odds_chosen": 9.524942398071289, + "log_odds_ratio": -0.03312790021300316, + "logits/chosen": -2.817082405090332, + "logits/rejected": -1.7293701171875, + "logps/chosen": -0.35598936676979065, + "logps/rejected": -8.479887008666992, + "loss": 0.2787, + "nll_loss": 0.28613555431365967, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07119788229465485, + "rewards/margins": 1.6247799396514893, + "rewards/rejected": -1.6959775686264038, + "step": 3980 + }, + { + "epoch": 2.85, + "grad_norm": 8.625, + "learning_rate": 4.227281286652593e-06, + "log_odds_chosen": 9.673505783081055, + "log_odds_ratio": -0.027351032942533493, + "logits/chosen": -2.8485827445983887, + "logits/rejected": -1.9277604818344116, + "logps/chosen": -0.36449387669563293, + "logps/rejected": -8.6371431350708, + "loss": 0.2689, + "nll_loss": 0.25819873809814453, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07289878278970718, + "rewards/margins": 1.6545298099517822, + "rewards/rejected": -1.727428674697876, + "step": 3990 + }, + { + "epoch": 2.86, + "grad_norm": 11.875, + "learning_rate": 4.2257712736425835e-06, + "log_odds_chosen": 10.454000473022461, + "log_odds_ratio": -0.021729234606027603, + "logits/chosen": -2.8186697959899902, + "logits/rejected": -1.75783371925354, + "logps/chosen": -0.4553149342536926, + "logps/rejected": -9.653711318969727, + "loss": 0.2844, + "nll_loss": 0.27239570021629333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0910629853606224, + "rewards/margins": 1.8396793603897095, + "rewards/rejected": -1.9307425022125244, + "step": 4000 + }, + { + "epoch": 2.86, + "grad_norm": 4.3125, + "learning_rate": 4.224262877637488e-06, + "log_odds_chosen": 9.088384628295898, + "log_odds_ratio": -0.044943638145923615, + "logits/chosen": -2.779998302459717, + "logits/rejected": -1.8532028198242188, + "logps/chosen": -0.4339783191680908, + "logps/rejected": -8.246506690979004, + "loss": 0.2768, + "nll_loss": 0.27718260884284973, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08679567277431488, + "rewards/margins": 1.5625057220458984, + "rewards/rejected": -1.649301290512085, + "step": 4010 + }, + { + "epoch": 2.87, + "grad_norm": 8.125, + "learning_rate": 4.2227560957534054e-06, + "log_odds_chosen": 12.664664268493652, + "log_odds_ratio": -0.027644852176308632, + "logits/chosen": -2.8048336505889893, + "logits/rejected": -1.6057932376861572, + "logps/chosen": -0.3959696888923645, + "logps/rejected": -11.737017631530762, + "loss": 0.3086, + "nll_loss": 0.2660156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07919393479824066, + "rewards/margins": 2.26820969581604, + "rewards/rejected": -2.3474037647247314, + "step": 4020 + }, + { + "epoch": 2.88, + "grad_norm": 109.0, + "learning_rate": 4.221250925113625e-06, + "log_odds_chosen": 9.165689468383789, + "log_odds_ratio": -0.07480543851852417, + "logits/chosen": -2.8110146522521973, + "logits/rejected": -1.914781928062439, + "logps/chosen": -0.40431445837020874, + "logps/rejected": -8.292974472045898, + "loss": 0.2893, + "nll_loss": 0.2831747233867645, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.08086288720369339, + "rewards/margins": 1.5777318477630615, + "rewards/rejected": -1.6585948467254639, + "step": 4030 + }, + { + "epoch": 2.88, + "grad_norm": 7.65625, + "learning_rate": 4.219747362848612e-06, + "log_odds_chosen": 11.30323314666748, + "log_odds_ratio": -0.03717976063489914, + "logits/chosen": -2.7821929454803467, + "logits/rejected": -1.768080472946167, + "logps/chosen": -0.3442782461643219, + "logps/rejected": -10.223400115966797, + "loss": 0.2818, + "nll_loss": 0.2807686924934387, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.0688556432723999, + "rewards/margins": 1.9758243560791016, + "rewards/rejected": -2.044680118560791, + "step": 4040 + }, + { + "epoch": 2.89, + "grad_norm": 9.0, + "learning_rate": 4.2182454060959784e-06, + "log_odds_chosen": 11.626094818115234, + "log_odds_ratio": -0.02243855968117714, + "logits/chosen": -2.83784818649292, + "logits/rejected": -1.7854284048080444, + "logps/chosen": -0.375316321849823, + "logps/rejected": -10.655095100402832, + "loss": 0.2895, + "nll_loss": 0.29837626218795776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07506327331066132, + "rewards/margins": 2.055955648422241, + "rewards/rejected": -2.131019115447998, + "step": 4050 + }, + { + "epoch": 2.9, + "grad_norm": 6.25, + "learning_rate": 4.216745052000467e-06, + "log_odds_chosen": 9.441912651062012, + "log_odds_ratio": -0.07335133850574493, + "logits/chosen": -2.814119815826416, + "logits/rejected": -2.0087761878967285, + "logps/chosen": -0.40029245615005493, + "logps/rejected": -8.542582511901855, + "loss": 0.253, + "nll_loss": 0.24284212291240692, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.0800584927201271, + "rewards/margins": 1.6284582614898682, + "rewards/rejected": -1.7085163593292236, + "step": 4060 + }, + { + "epoch": 2.91, + "grad_norm": 8.5, + "learning_rate": 4.21524629771392e-06, + "log_odds_chosen": 11.409785270690918, + "log_odds_ratio": -0.032930441200733185, + "logits/chosen": -2.830761194229126, + "logits/rejected": -1.863943338394165, + "logps/chosen": -0.3692954480648041, + "logps/rejected": -10.434585571289062, + "loss": 0.288, + "nll_loss": 0.2768712639808655, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07385909557342529, + "rewards/margins": 2.0130581855773926, + "rewards/rejected": -2.0869174003601074, + "step": 4070 + }, + { + "epoch": 2.91, + "grad_norm": 5.15625, + "learning_rate": 4.213749140395264e-06, + "log_odds_chosen": 10.989635467529297, + "log_odds_ratio": -0.0375329926609993, + "logits/chosen": -2.829820394515991, + "logits/rejected": -2.0580878257751465, + "logps/chosen": -0.35255834460258484, + "logps/rejected": -9.971551895141602, + "loss": 0.2844, + "nll_loss": 0.26232025027275085, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.07051166892051697, + "rewards/margins": 1.9237985610961914, + "rewards/rejected": -1.9943103790283203, + "step": 4080 + }, + { + "epoch": 2.92, + "grad_norm": 72.5, + "learning_rate": 4.2122535772104825e-06, + "log_odds_chosen": 8.69615364074707, + "log_odds_ratio": -0.056691087782382965, + "logits/chosen": -2.840405225753784, + "logits/rejected": -1.9989845752716064, + "logps/chosen": -0.3394516110420227, + "logps/rejected": -7.6478590965271, + "loss": 0.2743, + "nll_loss": 0.2390586882829666, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.06789031624794006, + "rewards/margins": 1.4616813659667969, + "rewards/rejected": -1.529571771621704, + "step": 4090 + }, + { + "epoch": 2.93, + "grad_norm": 6.90625, + "learning_rate": 4.2107596053325946e-06, + "log_odds_chosen": 8.241374969482422, + "log_odds_ratio": -0.04560881108045578, + "logits/chosen": -2.870729446411133, + "logits/rejected": -2.089569568634033, + "logps/chosen": -0.3773348033428192, + "logps/rejected": -7.359114170074463, + "loss": 0.2822, + "nll_loss": 0.29517418146133423, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.07546695321798325, + "rewards/margins": 1.3963558673858643, + "rewards/rejected": -1.471822738647461, + "step": 4100 + }, + { + "epoch": 2.93, + "grad_norm": 5.8125, + "learning_rate": 4.209267221941637e-06, + "log_odds_chosen": 8.46093463897705, + "log_odds_ratio": -0.050915759056806564, + "logits/chosen": -2.8458335399627686, + "logits/rejected": -1.9423186779022217, + "logps/chosen": -0.3869299292564392, + "logps/rejected": -7.5012969970703125, + "loss": 0.2991, + "nll_loss": 0.2835760712623596, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.07738598436117172, + "rewards/margins": 1.4228734970092773, + "rewards/rejected": -1.5002593994140625, + "step": 4110 + }, + { + "epoch": 2.94, + "grad_norm": 22.25, + "learning_rate": 4.207776424224631e-06, + "log_odds_chosen": 10.923059463500977, + "log_odds_ratio": -0.023947065696120262, + "logits/chosen": -2.816763401031494, + "logits/rejected": -1.7249367237091064, + "logps/chosen": -0.3471135199069977, + "logps/rejected": -9.89926815032959, + "loss": 0.2673, + "nll_loss": 0.25374776124954224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06942270696163177, + "rewards/margins": 1.910431146621704, + "rewards/rejected": -1.979853868484497, + "step": 4120 + }, + { + "epoch": 2.95, + "grad_norm": 7.0625, + "learning_rate": 4.206287209375573e-06, + "log_odds_chosen": 9.080406188964844, + "log_odds_ratio": -0.05416526645421982, + "logits/chosen": -2.8165950775146484, + "logits/rejected": -1.8180831670761108, + "logps/chosen": -0.363316148519516, + "logps/rejected": -8.077341079711914, + "loss": 0.2679, + "nll_loss": 0.26229244470596313, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.07266323268413544, + "rewards/margins": 1.5428051948547363, + "rewards/rejected": -1.6154683828353882, + "step": 4130 + }, + { + "epoch": 2.96, + "grad_norm": 6.53125, + "learning_rate": 4.204799574595403e-06, + "log_odds_chosen": 6.818055629730225, + "log_odds_ratio": -0.05800958722829819, + "logits/chosen": -2.8333072662353516, + "logits/rejected": -2.186479330062866, + "logps/chosen": -0.3609169125556946, + "logps/rejected": -5.922616958618164, + "loss": 0.2654, + "nll_loss": 0.2335742712020874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07218338549137115, + "rewards/margins": 1.112339973449707, + "rewards/rejected": -1.184523344039917, + "step": 4140 + }, + { + "epoch": 2.96, + "grad_norm": 15.625, + "learning_rate": 4.203313517091987e-06, + "log_odds_chosen": 6.995954990386963, + "log_odds_ratio": -0.04991995543241501, + "logits/chosen": -2.8100647926330566, + "logits/rejected": -2.1644890308380127, + "logps/chosen": -0.3308708071708679, + "logps/rejected": -5.982227325439453, + "loss": 0.2739, + "nll_loss": 0.23660406470298767, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.06617416441440582, + "rewards/margins": 1.130271553993225, + "rewards/rejected": -1.1964454650878906, + "step": 4150 + }, + { + "epoch": 2.97, + "grad_norm": 24.5, + "learning_rate": 4.201829034080091e-06, + "log_odds_chosen": 9.040159225463867, + "log_odds_ratio": -0.048347409814596176, + "logits/chosen": -2.8145790100097656, + "logits/rejected": -1.903846025466919, + "logps/chosen": -0.40995389223098755, + "logps/rejected": -8.149190902709961, + "loss": 0.2742, + "nll_loss": 0.28532686829566956, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.08199077844619751, + "rewards/margins": 1.5478473901748657, + "rewards/rejected": -1.629838228225708, + "step": 4160 + }, + { + "epoch": 2.98, + "grad_norm": 9.0625, + "learning_rate": 4.200346122781363e-06, + "log_odds_chosen": 6.394429683685303, + "log_odds_ratio": -0.048393916338682175, + "logits/chosen": -2.858705997467041, + "logits/rejected": -2.1757659912109375, + "logps/chosen": -0.36599108576774597, + "logps/rejected": -5.35428524017334, + "loss": 0.2785, + "nll_loss": 0.2647210955619812, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -0.07319821417331696, + "rewards/margins": 0.9976588487625122, + "rewards/rejected": -1.0708571672439575, + "step": 4170 + }, + { + "epoch": 2.98, + "grad_norm": 8.0625, + "learning_rate": 4.1988647804243155e-06, + "log_odds_chosen": 7.234747409820557, + "log_odds_ratio": -0.03427041321992874, + "logits/chosen": -2.8021435737609863, + "logits/rejected": -2.0722737312316895, + "logps/chosen": -0.4443022310733795, + "logps/rejected": -6.377936840057373, + "loss": 0.2923, + "nll_loss": 0.2961937487125397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08886045217514038, + "rewards/margins": 1.1867269277572632, + "rewards/rejected": -1.2755874395370483, + "step": 4180 + }, + { + "epoch": 2.99, + "grad_norm": 15.5, + "learning_rate": 4.197385004244289e-06, + "log_odds_chosen": 8.025124549865723, + "log_odds_ratio": -0.023739898577332497, + "logits/chosen": -2.865403413772583, + "logits/rejected": -2.047375202178955, + "logps/chosen": -0.3514173626899719, + "logps/rejected": -6.971765995025635, + "loss": 0.2778, + "nll_loss": 0.2846386432647705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07028347253799438, + "rewards/margins": 1.3240697383880615, + "rewards/rejected": -1.3943531513214111, + "step": 4190 + }, + { + "epoch": 3.0, + "grad_norm": 6.8125, + "learning_rate": 4.195906791483446e-06, + "log_odds_chosen": 9.17123031616211, + "log_odds_ratio": -0.03721408173441887, + "logits/chosen": -2.8432440757751465, + "logits/rejected": -1.8437057733535767, + "logps/chosen": -0.40266337990760803, + "logps/rejected": -8.306893348693848, + "loss": 0.2994, + "nll_loss": 0.289571076631546, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -0.08053267002105713, + "rewards/margins": 1.580845832824707, + "rewards/rejected": -1.6613785028457642, + "step": 4200 + }, + { + "epoch": 3.0, + "step": 4200, + "total_flos": 0.0, + "train_loss": 0.4132561104070573, + "train_runtime": 52660.5661, + "train_samples_per_second": 2.553, + "train_steps_per_second": 0.08 + } + ], + "logging_steps": 10, + "max_steps": 4200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}