{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9870390859620204, "eval_steps": 2768, "global_step": 11000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.333925485610962, "learning_rate": 8e-07, "log_odds_chosen": 0.5151928067207336, "log_odds_ratio": -0.6412230730056763, "logits/chosen": -0.491842657327652, "logits/rejected": -0.7963203191757202, "logps/chosen": -2.191591262817383, "logps/rejected": -2.631917715072632, "loss": 3.7982, "nll_loss": 3.734078884124756, "rewards/accuracies": 0.625, "rewards/chosen": -0.21915917098522186, "rewards/margins": 0.044032637029886246, "rewards/rejected": -0.2631917893886566, "step": 10 }, { "epoch": 0.0, "grad_norm": 4.156618118286133, "learning_rate": 1.6e-06, "log_odds_chosen": 0.23690485954284668, "log_odds_ratio": -0.7374136447906494, "logits/chosen": -0.5650675296783447, "logits/rejected": -0.626733124256134, "logps/chosen": -1.9588556289672852, "logps/rejected": -2.1776576042175293, "loss": 4.1747, "nll_loss": 4.100991249084473, "rewards/accuracies": 0.625, "rewards/chosen": -0.195885568857193, "rewards/margins": 0.02188020572066307, "rewards/rejected": -0.21776576340198517, "step": 20 }, { "epoch": 0.01, "grad_norm": 3.653958320617676, "learning_rate": 2.4e-06, "log_odds_chosen": 0.2972280979156494, "log_odds_ratio": -0.8111011385917664, "logits/chosen": -0.57868492603302, "logits/rejected": -0.8484483957290649, "logps/chosen": -2.0790963172912598, "logps/rejected": -2.3348782062530518, "loss": 3.7657, "nll_loss": 3.6846203804016113, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.20790961384773254, "rewards/margins": 0.025578215718269348, "rewards/rejected": -0.2334878146648407, "step": 30 }, { "epoch": 0.01, "grad_norm": 3.8905930519104004, "learning_rate": 3.2e-06, "log_odds_chosen": 0.21622678637504578, "log_odds_ratio": -0.8261001706123352, "logits/chosen": -0.5219866037368774, "logits/rejected": -0.6627193689346313, "logps/chosen": -1.9454641342163086, "logps/rejected": -2.1101784706115723, "loss": 3.7382, "nll_loss": 3.655561923980713, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1945464164018631, "rewards/margins": 0.016471445560455322, "rewards/rejected": -0.21101787686347961, "step": 40 }, { "epoch": 0.01, "grad_norm": 2.2181079387664795, "learning_rate": 4e-06, "log_odds_chosen": 0.5430625081062317, "log_odds_ratio": -0.6073707938194275, "logits/chosen": -0.43996763229370117, "logits/rejected": -0.6621267199516296, "logps/chosen": -2.2849440574645996, "logps/rejected": -2.7556464672088623, "loss": 3.5221, "nll_loss": 3.461381435394287, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22849440574645996, "rewards/margins": 0.04707026481628418, "rewards/rejected": -0.27556467056274414, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.3843708038330078, "learning_rate": 4.8e-06, "log_odds_chosen": 0.3988548815250397, "log_odds_ratio": -0.6887394785881042, "logits/chosen": -0.45927801728248596, "logits/rejected": -0.6018295884132385, "logps/chosen": -1.714123010635376, "logps/rejected": -2.0624635219573975, "loss": 2.8047, "nll_loss": 2.7357983589172363, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.17141230404376984, "rewards/margins": 0.034834057092666626, "rewards/rejected": -0.20624634623527527, "step": 60 }, { "epoch": 0.01, "grad_norm": 6.880599498748779, "learning_rate": 5.6e-06, "log_odds_chosen": 0.5788862705230713, "log_odds_ratio": -0.6416047811508179, "logits/chosen": -0.43928202986717224, "logits/rejected": -0.46093741059303284, "logps/chosen": -1.9180206060409546, "logps/rejected": -2.4507076740264893, "loss": 2.7151, "nll_loss": 2.6509668827056885, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.19180205464363098, "rewards/margins": 0.053268708288669586, "rewards/rejected": -0.24507074058055878, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.88127601146698, "learning_rate": 6.4e-06, "log_odds_chosen": 0.4793362617492676, "log_odds_ratio": -0.6489545106887817, "logits/chosen": -0.3151110112667084, "logits/rejected": -0.12425204366445541, "logps/chosen": -1.9359104633331299, "logps/rejected": -2.3397438526153564, "loss": 2.3516, "nll_loss": 2.286700487136841, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.19359104335308075, "rewards/margins": 0.04038333147764206, "rewards/rejected": -0.2339743673801422, "step": 80 }, { "epoch": 0.02, "grad_norm": 1.657551646232605, "learning_rate": 7.2e-06, "log_odds_chosen": 0.2164599895477295, "log_odds_ratio": -0.6870883703231812, "logits/chosen": -0.34788548946380615, "logits/rejected": -0.2957800328731537, "logps/chosen": -1.727168321609497, "logps/rejected": -1.917694091796875, "loss": 2.1251, "nll_loss": 2.056426525115967, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17271684110164642, "rewards/margins": 0.01905256323516369, "rewards/rejected": -0.19176940619945526, "step": 90 }, { "epoch": 0.02, "grad_norm": 0.9917482137680054, "learning_rate": 8e-06, "log_odds_chosen": 0.017276203259825706, "log_odds_ratio": -0.8191194534301758, "logits/chosen": -0.48032236099243164, "logits/rejected": -0.4871880114078522, "logps/chosen": -1.7194982767105103, "logps/rejected": -1.7363418340682983, "loss": 1.9265, "nll_loss": 1.8445875644683838, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.1719498336315155, "rewards/margins": 0.0016843515913933516, "rewards/rejected": -0.17363418638706207, "step": 100 }, { "epoch": 0.02, "grad_norm": 0.8336722254753113, "learning_rate": 7.994176736060562e-06, "log_odds_chosen": 0.23384077847003937, "log_odds_ratio": -0.6849513053894043, "logits/chosen": -0.4313598573207855, "logits/rejected": -0.46657222509384155, "logps/chosen": -1.4655210971832275, "logps/rejected": -1.6593421697616577, "loss": 1.7861, "nll_loss": 1.717559576034546, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14655211567878723, "rewards/margins": 0.01938212849199772, "rewards/rejected": -0.1659342348575592, "step": 110 }, { "epoch": 0.02, "grad_norm": 0.9542021155357361, "learning_rate": 7.988353472121123e-06, "log_odds_chosen": 0.30552104115486145, "log_odds_ratio": -0.6328169703483582, "logits/chosen": -0.4570741653442383, "logits/rejected": -0.4447706341743469, "logps/chosen": -1.468922734260559, "logps/rejected": -1.714000940322876, "loss": 1.7333, "nll_loss": 1.6700189113616943, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.14689227938652039, "rewards/margins": 0.024507839232683182, "rewards/rejected": -0.17140009999275208, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.7623200416564941, "learning_rate": 7.982530208181685e-06, "log_odds_chosen": 0.26080387830734253, "log_odds_ratio": -0.701958954334259, "logits/chosen": -0.38970547914505005, "logits/rejected": -0.4110942780971527, "logps/chosen": -1.3950055837631226, "logps/rejected": -1.5955398082733154, "loss": 1.6744, "nll_loss": 1.6042404174804688, "rewards/accuracies": 0.5, "rewards/chosen": -0.13950055837631226, "rewards/margins": 0.020053423941135406, "rewards/rejected": -0.15955397486686707, "step": 130 }, { "epoch": 0.03, "grad_norm": 0.7866131663322449, "learning_rate": 7.976706944242247e-06, "log_odds_chosen": 0.366484135389328, "log_odds_ratio": -0.6378077268600464, "logits/chosen": -0.40181222558021545, "logits/rejected": -0.4499889314174652, "logps/chosen": -1.3115607500076294, "logps/rejected": -1.6199172735214233, "loss": 1.5161, "nll_loss": 1.4522874355316162, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1311560571193695, "rewards/margins": 0.030835667625069618, "rewards/rejected": -0.16199173033237457, "step": 140 }, { "epoch": 0.03, "grad_norm": 1.6389062404632568, "learning_rate": 7.97088368030281e-06, "log_odds_chosen": 0.24471768736839294, "log_odds_ratio": -0.6693255305290222, "logits/chosen": -0.4537169933319092, "logits/rejected": -0.48922696709632874, "logps/chosen": -1.465497612953186, "logps/rejected": -1.6574163436889648, "loss": 1.5532, "nll_loss": 1.4862229824066162, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1465497761964798, "rewards/margins": 0.019191861152648926, "rewards/rejected": -0.16574163734912872, "step": 150 }, { "epoch": 0.03, "grad_norm": 1.1604678630828857, "learning_rate": 7.965060416363372e-06, "log_odds_chosen": 0.35011082887649536, "log_odds_ratio": -0.6021308898925781, "logits/chosen": -0.4541899263858795, "logits/rejected": -0.4512806832790375, "logps/chosen": -1.4302852153778076, "logps/rejected": -1.7131397724151611, "loss": 1.5223, "nll_loss": 1.462041974067688, "rewards/accuracies": 0.625, "rewards/chosen": -0.14302849769592285, "rewards/margins": 0.028285473585128784, "rewards/rejected": -0.17131397128105164, "step": 160 }, { "epoch": 0.03, "grad_norm": 1.774051547050476, "learning_rate": 7.959237152423934e-06, "log_odds_chosen": 0.21281366050243378, "log_odds_ratio": -0.662868082523346, "logits/chosen": -0.4206954836845398, "logits/rejected": -0.4131297171115875, "logps/chosen": -1.4108431339263916, "logps/rejected": -1.5811221599578857, "loss": 1.4785, "nll_loss": 1.4122225046157837, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.14108431339263916, "rewards/margins": 0.017027903348207474, "rewards/rejected": -0.15811221301555634, "step": 170 }, { "epoch": 0.03, "grad_norm": 1.3976486921310425, "learning_rate": 7.953413888484495e-06, "log_odds_chosen": 0.3351721167564392, "log_odds_ratio": -0.6082215309143066, "logits/chosen": -0.4032515585422516, "logits/rejected": -0.3882203996181488, "logps/chosen": -1.3810927867889404, "logps/rejected": -1.6092808246612549, "loss": 1.2855, "nll_loss": 1.2246606349945068, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13810928165912628, "rewards/margins": 0.022818809375166893, "rewards/rejected": -0.16092810034751892, "step": 180 }, { "epoch": 0.03, "grad_norm": 0.935833215713501, "learning_rate": 7.947590624545057e-06, "log_odds_chosen": 0.3752503991127014, "log_odds_ratio": -0.5996502041816711, "logits/chosen": -0.39611080288887024, "logits/rejected": -0.4244672656059265, "logps/chosen": -1.145514726638794, "logps/rejected": -1.4192689657211304, "loss": 1.3198, "nll_loss": 1.2598202228546143, "rewards/accuracies": 0.625, "rewards/chosen": -0.11455146223306656, "rewards/margins": 0.027375441044569016, "rewards/rejected": -0.14192691445350647, "step": 190 }, { "epoch": 0.04, "grad_norm": 2.048588752746582, "learning_rate": 7.94176736060562e-06, "log_odds_chosen": 0.14159588515758514, "log_odds_ratio": -0.6899443864822388, "logits/chosen": -0.42449721693992615, "logits/rejected": -0.44452494382858276, "logps/chosen": -1.3461925983428955, "logps/rejected": -1.454458236694336, "loss": 1.3718, "nll_loss": 1.3028428554534912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.13461926579475403, "rewards/margins": 0.01082657091319561, "rewards/rejected": -0.1454458385705948, "step": 200 }, { "epoch": 0.04, "grad_norm": 1.715198278427124, "learning_rate": 7.935944096666182e-06, "log_odds_chosen": 0.39089664816856384, "log_odds_ratio": -0.5823230743408203, "logits/chosen": -0.4162190556526184, "logits/rejected": -0.3965161442756653, "logps/chosen": -1.270228624343872, "logps/rejected": -1.5800155401229858, "loss": 1.3012, "nll_loss": 1.2429354190826416, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1270228922367096, "rewards/margins": 0.030978679656982422, "rewards/rejected": -0.15800157189369202, "step": 210 }, { "epoch": 0.04, "grad_norm": 0.9870737791061401, "learning_rate": 7.930120832726742e-06, "log_odds_chosen": 0.2738127112388611, "log_odds_ratio": -0.6368094086647034, "logits/chosen": -0.3498522639274597, "logits/rejected": -0.3681924343109131, "logps/chosen": -1.1670268774032593, "logps/rejected": -1.351577877998352, "loss": 1.2731, "nll_loss": 1.2094577550888062, "rewards/accuracies": 0.625, "rewards/chosen": -0.11670269817113876, "rewards/margins": 0.01845509558916092, "rewards/rejected": -0.13515779376029968, "step": 220 }, { "epoch": 0.04, "grad_norm": 4.029344081878662, "learning_rate": 7.924297568787305e-06, "log_odds_chosen": 0.22043490409851074, "log_odds_ratio": -0.6865091919898987, "logits/chosen": -0.37322139739990234, "logits/rejected": -0.3585105836391449, "logps/chosen": -1.3887712955474854, "logps/rejected": -1.5607054233551025, "loss": 1.3353, "nll_loss": 1.2666887044906616, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.13887712359428406, "rewards/margins": 0.017193417996168137, "rewards/rejected": -0.1560705453157425, "step": 230 }, { "epoch": 0.04, "grad_norm": 0.7768957614898682, "learning_rate": 7.918474304847867e-06, "log_odds_chosen": 0.19368572533130646, "log_odds_ratio": -0.6984459757804871, "logits/chosen": -0.36836880445480347, "logits/rejected": -0.3484199643135071, "logps/chosen": -1.3645991086959839, "logps/rejected": -1.5094786882400513, "loss": 1.2396, "nll_loss": 1.1697386503219604, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.13645990192890167, "rewards/margins": 0.014487968757748604, "rewards/rejected": -0.15094786882400513, "step": 240 }, { "epoch": 0.05, "grad_norm": 0.8741191029548645, "learning_rate": 7.91265104090843e-06, "log_odds_chosen": 0.3740343749523163, "log_odds_ratio": -0.5765000581741333, "logits/chosen": -0.36863988637924194, "logits/rejected": -0.3575906753540039, "logps/chosen": -1.0825830698013306, "logps/rejected": -1.3362675905227661, "loss": 1.2257, "nll_loss": 1.1680984497070312, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10825830698013306, "rewards/margins": 0.0253684613853693, "rewards/rejected": -0.1336267590522766, "step": 250 }, { "epoch": 0.05, "grad_norm": 0.968717634677887, "learning_rate": 7.906827776968992e-06, "log_odds_chosen": 0.24148449301719666, "log_odds_ratio": -0.6510211229324341, "logits/chosen": -0.36772042512893677, "logits/rejected": -0.3647049367427826, "logps/chosen": -1.219939947128296, "logps/rejected": -1.3963510990142822, "loss": 1.2792, "nll_loss": 1.214083194732666, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12199399620294571, "rewards/margins": 0.017641115933656693, "rewards/rejected": -0.1396351158618927, "step": 260 }, { "epoch": 0.05, "grad_norm": 0.8676772117614746, "learning_rate": 7.901004513029554e-06, "log_odds_chosen": 0.2663150727748871, "log_odds_ratio": -0.6641027331352234, "logits/chosen": -0.3884763717651367, "logits/rejected": -0.38966697454452515, "logps/chosen": -1.3800342082977295, "logps/rejected": -1.598384141921997, "loss": 1.3198, "nll_loss": 1.253383994102478, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.138003408908844, "rewards/margins": 0.021835003048181534, "rewards/rejected": -0.15983840823173523, "step": 270 }, { "epoch": 0.05, "grad_norm": 1.793342113494873, "learning_rate": 7.895181249090114e-06, "log_odds_chosen": 0.22694933414459229, "log_odds_ratio": -0.6462079286575317, "logits/chosen": -0.3369145393371582, "logits/rejected": -0.34541624784469604, "logps/chosen": -1.1592384576797485, "logps/rejected": -1.309792399406433, "loss": 1.2404, "nll_loss": 1.175813913345337, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11592384427785873, "rewards/margins": 0.015055393800139427, "rewards/rejected": -0.1309792548418045, "step": 280 }, { "epoch": 0.05, "grad_norm": 0.962464451789856, "learning_rate": 7.889357985150677e-06, "log_odds_chosen": 0.2656427323818207, "log_odds_ratio": -0.6383371949195862, "logits/chosen": -0.28534650802612305, "logits/rejected": -0.2924650311470032, "logps/chosen": -1.1702284812927246, "logps/rejected": -1.358701229095459, "loss": 1.2384, "nll_loss": 1.1746160984039307, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.11702284961938858, "rewards/margins": 0.018847281113266945, "rewards/rejected": -0.13587012887001038, "step": 290 }, { "epoch": 0.05, "grad_norm": 0.5939451456069946, "learning_rate": 7.883534721211239e-06, "log_odds_chosen": 0.38102689385414124, "log_odds_ratio": -0.6059800386428833, "logits/chosen": -0.3350544273853302, "logits/rejected": -0.312080442905426, "logps/chosen": -1.1126482486724854, "logps/rejected": -1.4021085500717163, "loss": 1.1435, "nll_loss": 1.0828807353973389, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.11126482486724854, "rewards/margins": 0.02894604206085205, "rewards/rejected": -0.14021086692810059, "step": 300 }, { "epoch": 0.06, "grad_norm": 0.7120574712753296, "learning_rate": 7.8777114572718e-06, "log_odds_chosen": 0.2518892288208008, "log_odds_ratio": -0.6437832713127136, "logits/chosen": -0.42240291833877563, "logits/rejected": -0.41587719321250916, "logps/chosen": -1.2639614343643188, "logps/rejected": -1.4360151290893555, "loss": 1.3117, "nll_loss": 1.2472755908966064, "rewards/accuracies": 0.625, "rewards/chosen": -0.12639614939689636, "rewards/margins": 0.0172053761780262, "rewards/rejected": -0.14360150694847107, "step": 310 }, { "epoch": 0.06, "grad_norm": 1.4585373401641846, "learning_rate": 7.871888193332362e-06, "log_odds_chosen": 0.36516430974006653, "log_odds_ratio": -0.5913447141647339, "logits/chosen": -0.3302076756954193, "logits/rejected": -0.3184494376182556, "logps/chosen": -1.1804471015930176, "logps/rejected": -1.4232193231582642, "loss": 1.1885, "nll_loss": 1.1293482780456543, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11804471164941788, "rewards/margins": 0.024277225136756897, "rewards/rejected": -0.14232194423675537, "step": 320 }, { "epoch": 0.06, "grad_norm": 0.9858699440956116, "learning_rate": 7.866064929392924e-06, "log_odds_chosen": 0.21814580261707306, "log_odds_ratio": -0.6653276681900024, "logits/chosen": -0.3807678818702698, "logits/rejected": -0.3492942452430725, "logps/chosen": -1.3254904747009277, "logps/rejected": -1.4859329462051392, "loss": 1.2597, "nll_loss": 1.1931736469268799, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.13254904747009277, "rewards/margins": 0.01604425348341465, "rewards/rejected": -0.14859329164028168, "step": 330 }, { "epoch": 0.06, "grad_norm": 1.0847293138504028, "learning_rate": 7.860241665453487e-06, "log_odds_chosen": 0.27122896909713745, "log_odds_ratio": -0.6263772249221802, "logits/chosen": -0.3252618610858917, "logits/rejected": -0.3240824341773987, "logps/chosen": -1.2787262201309204, "logps/rejected": -1.471504807472229, "loss": 1.2253, "nll_loss": 1.1626585721969604, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.12787261605262756, "rewards/margins": 0.019277850165963173, "rewards/rejected": -0.14715047180652618, "step": 340 }, { "epoch": 0.06, "grad_norm": 0.9820899963378906, "learning_rate": 7.854418401514049e-06, "log_odds_chosen": 0.3788866698741913, "log_odds_ratio": -0.6247309446334839, "logits/chosen": -0.3561337888240814, "logits/rejected": -0.35025572776794434, "logps/chosen": -1.1670924425125122, "logps/rejected": -1.4341050386428833, "loss": 1.2023, "nll_loss": 1.1397864818572998, "rewards/accuracies": 0.625, "rewards/chosen": -0.11670924723148346, "rewards/margins": 0.026701247319579124, "rewards/rejected": -0.14341048896312714, "step": 350 }, { "epoch": 0.07, "grad_norm": 2.0867435932159424, "learning_rate": 7.848595137574611e-06, "log_odds_chosen": 0.23402830958366394, "log_odds_ratio": -0.6430379748344421, "logits/chosen": -0.341842383146286, "logits/rejected": -0.3549098074436188, "logps/chosen": -1.2200753688812256, "logps/rejected": -1.378993034362793, "loss": 1.2236, "nll_loss": 1.159274697303772, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12200756371021271, "rewards/margins": 0.015891747549176216, "rewards/rejected": -0.13789930939674377, "step": 360 }, { "epoch": 0.07, "grad_norm": 0.8319967985153198, "learning_rate": 7.842771873635172e-06, "log_odds_chosen": 0.3978227376937866, "log_odds_ratio": -0.5828840136528015, "logits/chosen": -0.3248536288738251, "logits/rejected": -0.3298476040363312, "logps/chosen": -1.1620112657546997, "logps/rejected": -1.4452251195907593, "loss": 1.2021, "nll_loss": 1.1437865495681763, "rewards/accuracies": 0.6875, "rewards/chosen": -0.11620111763477325, "rewards/margins": 0.02832140401005745, "rewards/rejected": -0.1445225179195404, "step": 370 }, { "epoch": 0.07, "grad_norm": 0.7204629778862, "learning_rate": 7.836948609695734e-06, "log_odds_chosen": 0.3162148594856262, "log_odds_ratio": -0.5983591079711914, "logits/chosen": -0.23571312427520752, "logits/rejected": -0.26293158531188965, "logps/chosen": -1.100656509399414, "logps/rejected": -1.3071973323822021, "loss": 1.1738, "nll_loss": 1.1139934062957764, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11006565392017365, "rewards/margins": 0.020654071122407913, "rewards/rejected": -0.13071972131729126, "step": 380 }, { "epoch": 0.07, "grad_norm": 1.169068455696106, "learning_rate": 7.831125345756296e-06, "log_odds_chosen": 0.19375436007976532, "log_odds_ratio": -0.6605286598205566, "logits/chosen": -0.25121766328811646, "logits/rejected": -0.2934662997722626, "logps/chosen": -1.0589444637298584, "logps/rejected": -1.1631048917770386, "loss": 1.1408, "nll_loss": 1.0747069120407104, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10589446127414703, "rewards/margins": 0.010416034609079361, "rewards/rejected": -0.1163104772567749, "step": 390 }, { "epoch": 0.07, "grad_norm": 0.5295692682266235, "learning_rate": 7.825302081816857e-06, "log_odds_chosen": 0.22412052750587463, "log_odds_ratio": -0.6809746623039246, "logits/chosen": -0.29396966099739075, "logits/rejected": -0.299424946308136, "logps/chosen": -1.1099778413772583, "logps/rejected": -1.2693623304367065, "loss": 1.1806, "nll_loss": 1.1125379800796509, "rewards/accuracies": 0.625, "rewards/chosen": -0.110997773706913, "rewards/margins": 0.015938464552164078, "rewards/rejected": -0.12693624198436737, "step": 400 }, { "epoch": 0.07, "grad_norm": 1.4080840349197388, "learning_rate": 7.81947881787742e-06, "log_odds_chosen": 0.42054247856140137, "log_odds_ratio": -0.5622063875198364, "logits/chosen": -0.30937671661376953, "logits/rejected": -0.3293423056602478, "logps/chosen": -1.262446641921997, "logps/rejected": -1.5442469120025635, "loss": 1.2273, "nll_loss": 1.1710504293441772, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1262446492910385, "rewards/margins": 0.028180036693811417, "rewards/rejected": -0.15442468225955963, "step": 410 }, { "epoch": 0.08, "grad_norm": 2.086719274520874, "learning_rate": 7.813655553937982e-06, "log_odds_chosen": 0.4324397146701813, "log_odds_ratio": -0.5911771059036255, "logits/chosen": -0.2944505512714386, "logits/rejected": -0.330447793006897, "logps/chosen": -1.2822068929672241, "logps/rejected": -1.585065484046936, "loss": 1.2142, "nll_loss": 1.1550967693328857, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12822069227695465, "rewards/margins": 0.03028585948050022, "rewards/rejected": -0.15850654244422913, "step": 420 }, { "epoch": 0.08, "grad_norm": 0.6857010722160339, "learning_rate": 7.807832289998544e-06, "log_odds_chosen": 0.3362474739551544, "log_odds_ratio": -0.5960213541984558, "logits/chosen": -0.24630114436149597, "logits/rejected": -0.24887843430042267, "logps/chosen": -1.0708539485931396, "logps/rejected": -1.299224615097046, "loss": 1.1308, "nll_loss": 1.0712475776672363, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10708538442850113, "rewards/margins": 0.02283708192408085, "rewards/rejected": -0.12992244958877563, "step": 430 }, { "epoch": 0.08, "grad_norm": 0.9802748560905457, "learning_rate": 7.802009026059106e-06, "log_odds_chosen": 0.20174989104270935, "log_odds_ratio": -0.6817139387130737, "logits/chosen": -0.3603675663471222, "logits/rejected": -0.33373549580574036, "logps/chosen": -1.2318315505981445, "logps/rejected": -1.3654711246490479, "loss": 1.2213, "nll_loss": 1.153092384338379, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.12318315356969833, "rewards/margins": 0.013363957405090332, "rewards/rejected": -0.13654711842536926, "step": 440 }, { "epoch": 0.08, "grad_norm": 0.811257004737854, "learning_rate": 7.796185762119669e-06, "log_odds_chosen": 0.308106929063797, "log_odds_ratio": -0.6701749563217163, "logits/chosen": -0.3041822612285614, "logits/rejected": -0.32910075783729553, "logps/chosen": -1.2259390354156494, "logps/rejected": -1.4316823482513428, "loss": 1.2089, "nll_loss": 1.1418609619140625, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12259390205144882, "rewards/margins": 0.02057434618473053, "rewards/rejected": -0.14316824078559875, "step": 450 }, { "epoch": 0.08, "grad_norm": 1.186498761177063, "learning_rate": 7.79036249818023e-06, "log_odds_chosen": 0.3135170340538025, "log_odds_ratio": -0.6137998700141907, "logits/chosen": -0.2752595543861389, "logits/rejected": -0.31168991327285767, "logps/chosen": -1.1841676235198975, "logps/rejected": -1.3871896266937256, "loss": 1.2211, "nll_loss": 1.1597373485565186, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.11841676384210587, "rewards/margins": 0.02030220814049244, "rewards/rejected": -0.13871899247169495, "step": 460 }, { "epoch": 0.08, "grad_norm": 0.4524146020412445, "learning_rate": 7.784539234240792e-06, "log_odds_chosen": 0.5339788198471069, "log_odds_ratio": -0.55875563621521, "logits/chosen": -0.31525543332099915, "logits/rejected": -0.3321399390697479, "logps/chosen": -1.1199116706848145, "logps/rejected": -1.5095632076263428, "loss": 1.1568, "nll_loss": 1.1008961200714111, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11199116706848145, "rewards/margins": 0.03896515816450119, "rewards/rejected": -0.15095631778240204, "step": 470 }, { "epoch": 0.09, "grad_norm": 1.2673509120941162, "learning_rate": 7.778715970301354e-06, "log_odds_chosen": 0.4090334475040436, "log_odds_ratio": -0.6181383728981018, "logits/chosen": -0.3052888512611389, "logits/rejected": -0.32204440236091614, "logps/chosen": -1.1738417148590088, "logps/rejected": -1.483721137046814, "loss": 1.1545, "nll_loss": 1.092673420906067, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.1173841580748558, "rewards/margins": 0.03098795935511589, "rewards/rejected": -0.1483720988035202, "step": 480 }, { "epoch": 0.09, "grad_norm": 1.0262770652770996, "learning_rate": 7.772892706361916e-06, "log_odds_chosen": 0.3209136426448822, "log_odds_ratio": -0.6104674339294434, "logits/chosen": -0.30723345279693604, "logits/rejected": -0.3061657249927521, "logps/chosen": -1.203070044517517, "logps/rejected": -1.403160572052002, "loss": 1.1315, "nll_loss": 1.0704646110534668, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.12030700594186783, "rewards/margins": 0.02000904455780983, "rewards/rejected": -0.14031605422496796, "step": 490 }, { "epoch": 0.09, "grad_norm": 0.8104180097579956, "learning_rate": 7.767069442422477e-06, "log_odds_chosen": 0.3832394480705261, "log_odds_ratio": -0.5944157838821411, "logits/chosen": -0.29042813181877136, "logits/rejected": -0.3143666684627533, "logps/chosen": -1.1808339357376099, "logps/rejected": -1.447609543800354, "loss": 1.1704, "nll_loss": 1.110947608947754, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1180833950638771, "rewards/margins": 0.026677558198571205, "rewards/rejected": -0.14476095139980316, "step": 500 }, { "epoch": 0.09, "grad_norm": 0.7807692885398865, "learning_rate": 7.761246178483039e-06, "log_odds_chosen": 0.2518574893474579, "log_odds_ratio": -0.6599031090736389, "logits/chosen": -0.21843962371349335, "logits/rejected": -0.2486545592546463, "logps/chosen": -1.1892979145050049, "logps/rejected": -1.3361047506332397, "loss": 1.1173, "nll_loss": 1.0513516664505005, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.11892978847026825, "rewards/margins": 0.014680701307952404, "rewards/rejected": -0.13361048698425293, "step": 510 }, { "epoch": 0.09, "grad_norm": 0.7994462847709656, "learning_rate": 7.755422914543601e-06, "log_odds_chosen": 0.3900667428970337, "log_odds_ratio": -0.5982797741889954, "logits/chosen": -0.19965076446533203, "logits/rejected": -0.2556760013103485, "logps/chosen": -1.0754297971725464, "logps/rejected": -1.3169327974319458, "loss": 1.1191, "nll_loss": 1.0592612028121948, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1075429767370224, "rewards/margins": 0.024150308221578598, "rewards/rejected": -0.1316932737827301, "step": 520 }, { "epoch": 0.1, "grad_norm": 1.0434073209762573, "learning_rate": 7.749599650604164e-06, "log_odds_chosen": 0.30178144574165344, "log_odds_ratio": -0.6079939007759094, "logits/chosen": -0.23979106545448303, "logits/rejected": -0.2712782025337219, "logps/chosen": -1.1630055904388428, "logps/rejected": -1.3505280017852783, "loss": 1.1351, "nll_loss": 1.0743471384048462, "rewards/accuracies": 0.625, "rewards/chosen": -0.1163005605340004, "rewards/margins": 0.01875222660601139, "rewards/rejected": -0.13505280017852783, "step": 530 }, { "epoch": 0.1, "grad_norm": 0.9403761029243469, "learning_rate": 7.743776386664726e-06, "log_odds_chosen": 0.3626457452774048, "log_odds_ratio": -0.6064115762710571, "logits/chosen": -0.2964823842048645, "logits/rejected": -0.3309454917907715, "logps/chosen": -1.1040284633636475, "logps/rejected": -1.3232171535491943, "loss": 1.0933, "nll_loss": 1.0326130390167236, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.11040283739566803, "rewards/margins": 0.021918874233961105, "rewards/rejected": -0.13232171535491943, "step": 540 }, { "epoch": 0.1, "grad_norm": 1.0006709098815918, "learning_rate": 7.737953122725287e-06, "log_odds_chosen": 0.38954734802246094, "log_odds_ratio": -0.5961380004882812, "logits/chosen": -0.26212555170059204, "logits/rejected": -0.25596773624420166, "logps/chosen": -1.1498854160308838, "logps/rejected": -1.4123402833938599, "loss": 1.1369, "nll_loss": 1.0772807598114014, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1149885281920433, "rewards/margins": 0.0262454841285944, "rewards/rejected": -0.14123402535915375, "step": 550 }, { "epoch": 0.1, "grad_norm": 1.4980974197387695, "learning_rate": 7.732129858785849e-06, "log_odds_chosen": 0.5412741899490356, "log_odds_ratio": -0.5501648187637329, "logits/chosen": -0.26225027441978455, "logits/rejected": -0.2663401961326599, "logps/chosen": -1.0512454509735107, "logps/rejected": -1.4261060953140259, "loss": 1.1554, "nll_loss": 1.1004068851470947, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10512454807758331, "rewards/margins": 0.037486057728528976, "rewards/rejected": -0.14261062443256378, "step": 560 }, { "epoch": 0.1, "grad_norm": 1.3576551675796509, "learning_rate": 7.726306594846411e-06, "log_odds_chosen": 0.43464937806129456, "log_odds_ratio": -0.6025624871253967, "logits/chosen": -0.20509573817253113, "logits/rejected": -0.25421127676963806, "logps/chosen": -1.104231357574463, "logps/rejected": -1.3879539966583252, "loss": 1.1005, "nll_loss": 1.0402498245239258, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.11042313277721405, "rewards/margins": 0.028372278437018394, "rewards/rejected": -0.1387954205274582, "step": 570 }, { "epoch": 0.1, "grad_norm": 1.533570408821106, "learning_rate": 7.720483330906974e-06, "log_odds_chosen": 0.399638831615448, "log_odds_ratio": -0.586500883102417, "logits/chosen": -0.3640395402908325, "logits/rejected": -0.3354605734348297, "logps/chosen": -1.087601900100708, "logps/rejected": -1.341512680053711, "loss": 1.1592, "nll_loss": 1.100534200668335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10876019299030304, "rewards/margins": 0.025391090661287308, "rewards/rejected": -0.13415126502513885, "step": 580 }, { "epoch": 0.11, "grad_norm": 0.598721981048584, "learning_rate": 7.714660066967534e-06, "log_odds_chosen": 0.477273166179657, "log_odds_ratio": -0.5699985027313232, "logits/chosen": -0.2669471800327301, "logits/rejected": -0.2903423011302948, "logps/chosen": -1.0827006101608276, "logps/rejected": -1.398123025894165, "loss": 1.1075, "nll_loss": 1.0505017042160034, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1082700714468956, "rewards/margins": 0.031542230397462845, "rewards/rejected": -0.13981230556964874, "step": 590 }, { "epoch": 0.11, "grad_norm": 0.9932595491409302, "learning_rate": 7.708836803028096e-06, "log_odds_chosen": 0.3443313539028168, "log_odds_ratio": -0.61859130859375, "logits/chosen": -0.2856084704399109, "logits/rejected": -0.3268492817878723, "logps/chosen": -1.112980604171753, "logps/rejected": -1.3373619318008423, "loss": 1.1029, "nll_loss": 1.0410178899765015, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.11129806190729141, "rewards/margins": 0.02243814989924431, "rewards/rejected": -0.13373620808124542, "step": 600 }, { "epoch": 0.11, "grad_norm": 1.115981936454773, "learning_rate": 7.703013539088659e-06, "log_odds_chosen": 0.46272382140159607, "log_odds_ratio": -0.5593470931053162, "logits/chosen": -0.24541564285755157, "logits/rejected": -0.26886048913002014, "logps/chosen": -1.2253077030181885, "logps/rejected": -1.542391061782837, "loss": 1.0993, "nll_loss": 1.0433661937713623, "rewards/accuracies": 0.75, "rewards/chosen": -0.12253077328205109, "rewards/margins": 0.031708355993032455, "rewards/rejected": -0.15423911809921265, "step": 610 }, { "epoch": 0.11, "grad_norm": 1.0807980298995972, "learning_rate": 7.697190275149221e-06, "log_odds_chosen": 0.40578898787498474, "log_odds_ratio": -0.5844072103500366, "logits/chosen": -0.26643887162208557, "logits/rejected": -0.2956678867340088, "logps/chosen": -1.0739221572875977, "logps/rejected": -1.3498246669769287, "loss": 1.0983, "nll_loss": 1.0398545265197754, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10739222913980484, "rewards/margins": 0.027590245008468628, "rewards/rejected": -0.13498248159885406, "step": 620 }, { "epoch": 0.11, "grad_norm": 1.0824393033981323, "learning_rate": 7.691367011209783e-06, "log_odds_chosen": 0.2399287223815918, "log_odds_ratio": -0.6483467817306519, "logits/chosen": -0.23387715220451355, "logits/rejected": -0.2537880837917328, "logps/chosen": -1.0878570079803467, "logps/rejected": -1.2441601753234863, "loss": 1.1431, "nll_loss": 1.0782992839813232, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1087857261300087, "rewards/margins": 0.015630314126610756, "rewards/rejected": -0.12441603094339371, "step": 630 }, { "epoch": 0.12, "grad_norm": 1.312685489654541, "learning_rate": 7.685543747270344e-06, "log_odds_chosen": 0.2941173017024994, "log_odds_ratio": -0.6299890279769897, "logits/chosen": -0.23893220722675323, "logits/rejected": -0.26458939909935, "logps/chosen": -1.0712026357650757, "logps/rejected": -1.277134656906128, "loss": 1.0244, "nll_loss": 0.9613849520683289, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10712026059627533, "rewards/margins": 0.020593199878931046, "rewards/rejected": -0.12771347165107727, "step": 640 }, { "epoch": 0.12, "grad_norm": 0.5005902051925659, "learning_rate": 7.679720483330906e-06, "log_odds_chosen": 0.28637003898620605, "log_odds_ratio": -0.6562920808792114, "logits/chosen": -0.32920941710472107, "logits/rejected": -0.3482641875743866, "logps/chosen": -1.2317559719085693, "logps/rejected": -1.4107776880264282, "loss": 1.1082, "nll_loss": 1.0425524711608887, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12317559868097305, "rewards/margins": 0.01790216937661171, "rewards/rejected": -0.14107775688171387, "step": 650 }, { "epoch": 0.12, "grad_norm": 0.7764565348625183, "learning_rate": 7.673897219391469e-06, "log_odds_chosen": 0.5228947997093201, "log_odds_ratio": -0.5362275838851929, "logits/chosen": -0.25872281193733215, "logits/rejected": -0.3187440037727356, "logps/chosen": -1.0902519226074219, "logps/rejected": -1.4310792684555054, "loss": 1.0802, "nll_loss": 1.0265672206878662, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10902519524097443, "rewards/margins": 0.03408272564411163, "rewards/rejected": -0.14310793578624725, "step": 660 }, { "epoch": 0.12, "grad_norm": 1.2186826467514038, "learning_rate": 7.668073955452031e-06, "log_odds_chosen": 0.24523350596427917, "log_odds_ratio": -0.6810539960861206, "logits/chosen": -0.3111647963523865, "logits/rejected": -0.3507189452648163, "logps/chosen": -1.2376306056976318, "logps/rejected": -1.3808842897415161, "loss": 1.1803, "nll_loss": 1.112222671508789, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12376304715871811, "rewards/margins": 0.014325378462672234, "rewards/rejected": -0.1380884349346161, "step": 670 }, { "epoch": 0.12, "grad_norm": 1.0902864933013916, "learning_rate": 7.662250691512593e-06, "log_odds_chosen": 0.4521639347076416, "log_odds_ratio": -0.604234516620636, "logits/chosen": -0.315155029296875, "logits/rejected": -0.34290218353271484, "logps/chosen": -1.1858965158462524, "logps/rejected": -1.4938578605651855, "loss": 1.1126, "nll_loss": 1.0521882772445679, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.11858963966369629, "rewards/margins": 0.030796144157648087, "rewards/rejected": -0.14938578009605408, "step": 680 }, { "epoch": 0.12, "grad_norm": 1.0857912302017212, "learning_rate": 7.656427427573154e-06, "log_odds_chosen": 0.35988926887512207, "log_odds_ratio": -0.6070387959480286, "logits/chosen": -0.30350548028945923, "logits/rejected": -0.3145049810409546, "logps/chosen": -1.1082563400268555, "logps/rejected": -1.3548405170440674, "loss": 1.1194, "nll_loss": 1.058680772781372, "rewards/accuracies": 0.6875, "rewards/chosen": -0.11082563549280167, "rewards/margins": 0.024658426642417908, "rewards/rejected": -0.13548406958580017, "step": 690 }, { "epoch": 0.13, "grad_norm": 1.2661492824554443, "learning_rate": 7.650604163633716e-06, "log_odds_chosen": 0.3374677300453186, "log_odds_ratio": -0.6508690714836121, "logits/chosen": -0.3213859498500824, "logits/rejected": -0.31398090720176697, "logps/chosen": -1.2048908472061157, "logps/rejected": -1.44016695022583, "loss": 1.2272, "nll_loss": 1.162106990814209, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.12048908323049545, "rewards/margins": 0.023527618497610092, "rewards/rejected": -0.14401671290397644, "step": 700 }, { "epoch": 0.13, "grad_norm": 0.8489270210266113, "learning_rate": 7.644780899694279e-06, "log_odds_chosen": 0.4868837893009186, "log_odds_ratio": -0.569444477558136, "logits/chosen": -0.2966112196445465, "logits/rejected": -0.30416035652160645, "logps/chosen": -1.1295454502105713, "logps/rejected": -1.4607059955596924, "loss": 1.1246, "nll_loss": 1.067684531211853, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.11295454204082489, "rewards/margins": 0.03311605006456375, "rewards/rejected": -0.14607058465480804, "step": 710 }, { "epoch": 0.13, "grad_norm": 1.3026355504989624, "learning_rate": 7.63895763575484e-06, "log_odds_chosen": 0.344705194234848, "log_odds_ratio": -0.6008533239364624, "logits/chosen": -0.32720333337783813, "logits/rejected": -0.34578755497932434, "logps/chosen": -1.1115270853042603, "logps/rejected": -1.3362066745758057, "loss": 1.1358, "nll_loss": 1.0757068395614624, "rewards/accuracies": 0.6875, "rewards/chosen": -0.11115269362926483, "rewards/margins": 0.02246798202395439, "rewards/rejected": -0.13362067937850952, "step": 720 }, { "epoch": 0.13, "grad_norm": 1.5339288711547852, "learning_rate": 7.633134371815401e-06, "log_odds_chosen": 0.3415408134460449, "log_odds_ratio": -0.6154005527496338, "logits/chosen": -0.3425058424472809, "logits/rejected": -0.33901968598365784, "logps/chosen": -1.138835072517395, "logps/rejected": -1.3690025806427002, "loss": 1.1943, "nll_loss": 1.1327401399612427, "rewards/accuracies": 0.625, "rewards/chosen": -0.11388351023197174, "rewards/margins": 0.02301674149930477, "rewards/rejected": -0.13690023124217987, "step": 730 }, { "epoch": 0.13, "grad_norm": 1.1949843168258667, "learning_rate": 7.627311107875965e-06, "log_odds_chosen": 0.2753564715385437, "log_odds_ratio": -0.6625062227249146, "logits/chosen": -0.29811739921569824, "logits/rejected": -0.3414981961250305, "logps/chosen": -1.081146001815796, "logps/rejected": -1.2424064874649048, "loss": 1.1431, "nll_loss": 1.0768808126449585, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10811461508274078, "rewards/margins": 0.016126038506627083, "rewards/rejected": -0.12424063682556152, "step": 740 }, { "epoch": 0.14, "grad_norm": 3.6740400791168213, "learning_rate": 7.621487843936526e-06, "log_odds_chosen": 0.516070544719696, "log_odds_ratio": -0.5642696619033813, "logits/chosen": -0.29928848147392273, "logits/rejected": -0.35982462763786316, "logps/chosen": -1.093741774559021, "logps/rejected": -1.4399880170822144, "loss": 1.075, "nll_loss": 1.0185630321502686, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10937418788671494, "rewards/margins": 0.034624624997377396, "rewards/rejected": -0.14399881660938263, "step": 750 }, { "epoch": 0.14, "grad_norm": 1.1976426839828491, "learning_rate": 7.615664579997088e-06, "log_odds_chosen": 0.15615001320838928, "log_odds_ratio": -0.7201071977615356, "logits/chosen": -0.34353601932525635, "logits/rejected": -0.3323616683483124, "logps/chosen": -1.1542483568191528, "logps/rejected": -1.2572157382965088, "loss": 1.0689, "nll_loss": 0.9969244003295898, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11542483419179916, "rewards/margins": 0.010296729393303394, "rewards/rejected": -0.12572155892848969, "step": 760 }, { "epoch": 0.14, "grad_norm": 1.4891656637191772, "learning_rate": 7.60984131605765e-06, "log_odds_chosen": 0.42357462644577026, "log_odds_ratio": -0.6011026501655579, "logits/chosen": -0.30633580684661865, "logits/rejected": -0.2977932095527649, "logps/chosen": -0.9875582456588745, "logps/rejected": -1.237544298171997, "loss": 1.0333, "nll_loss": 0.9731782078742981, "rewards/accuracies": 0.625, "rewards/chosen": -0.09875582903623581, "rewards/margins": 0.024998605251312256, "rewards/rejected": -0.12375444173812866, "step": 770 }, { "epoch": 0.14, "grad_norm": 0.7760726809501648, "learning_rate": 7.604018052118211e-06, "log_odds_chosen": 0.41998594999313354, "log_odds_ratio": -0.592202365398407, "logits/chosen": -0.32606226205825806, "logits/rejected": -0.29085078835487366, "logps/chosen": -1.0062568187713623, "logps/rejected": -1.238694190979004, "loss": 1.0405, "nll_loss": 0.9813073873519897, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10062569379806519, "rewards/margins": 0.023243743926286697, "rewards/rejected": -0.12386943399906158, "step": 780 }, { "epoch": 0.14, "grad_norm": 1.3088407516479492, "learning_rate": 7.598194788178774e-06, "log_odds_chosen": 0.3967740535736084, "log_odds_ratio": -0.5978984832763672, "logits/chosen": -0.3288530707359314, "logits/rejected": -0.3492429852485657, "logps/chosen": -1.018615961074829, "logps/rejected": -1.2579083442687988, "loss": 1.0464, "nll_loss": 0.9865927696228027, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10186159610748291, "rewards/margins": 0.023929251357913017, "rewards/rejected": -0.12579084932804108, "step": 790 }, { "epoch": 0.14, "grad_norm": 0.9534358382225037, "learning_rate": 7.592371524239336e-06, "log_odds_chosen": 0.5231834650039673, "log_odds_ratio": -0.5512691140174866, "logits/chosen": -0.3196202218532562, "logits/rejected": -0.3314352035522461, "logps/chosen": -1.0690181255340576, "logps/rejected": -1.3845093250274658, "loss": 1.0595, "nll_loss": 1.0043781995773315, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10690182447433472, "rewards/margins": 0.0315491184592247, "rewards/rejected": -0.13845095038414001, "step": 800 }, { "epoch": 0.15, "grad_norm": 0.8771949410438538, "learning_rate": 7.586548260299897e-06, "log_odds_chosen": 0.18916703760623932, "log_odds_ratio": -0.6785593032836914, "logits/chosen": -0.3995281755924225, "logits/rejected": -0.3811416029930115, "logps/chosen": -1.0976070165634155, "logps/rejected": -1.21306574344635, "loss": 1.0863, "nll_loss": 1.0184518098831177, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10976070165634155, "rewards/margins": 0.01154586672782898, "rewards/rejected": -0.12130657583475113, "step": 810 }, { "epoch": 0.15, "grad_norm": 0.9495704770088196, "learning_rate": 7.58072499636046e-06, "log_odds_chosen": 0.3516364097595215, "log_odds_ratio": -0.6316573619842529, "logits/chosen": -0.3535796105861664, "logits/rejected": -0.344553679227829, "logps/chosen": -1.112302541732788, "logps/rejected": -1.2996736764907837, "loss": 1.1134, "nll_loss": 1.050210952758789, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.11123025417327881, "rewards/margins": 0.01873708888888359, "rewards/rejected": -0.1299673616886139, "step": 820 }, { "epoch": 0.15, "grad_norm": 0.8986438512802124, "learning_rate": 7.574901732421022e-06, "log_odds_chosen": 0.5606812238693237, "log_odds_ratio": -0.5470336675643921, "logits/chosen": -0.37523120641708374, "logits/rejected": -0.37770146131515503, "logps/chosen": -0.9329134225845337, "logps/rejected": -1.25912606716156, "loss": 1.0575, "nll_loss": 1.0027515888214111, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09329134225845337, "rewards/margins": 0.03262128308415413, "rewards/rejected": -0.1259126365184784, "step": 830 }, { "epoch": 0.15, "grad_norm": 0.5794965624809265, "learning_rate": 7.5690784684815835e-06, "log_odds_chosen": 0.4418957233428955, "log_odds_ratio": -0.5914565324783325, "logits/chosen": -0.3764459490776062, "logits/rejected": -0.4029974043369293, "logps/chosen": -0.9287961721420288, "logps/rejected": -1.1817586421966553, "loss": 1.1, "nll_loss": 1.0408680438995361, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09287962317466736, "rewards/margins": 0.02529624104499817, "rewards/rejected": -0.11817586421966553, "step": 840 }, { "epoch": 0.15, "grad_norm": 1.5248697996139526, "learning_rate": 7.563255204542146e-06, "log_odds_chosen": 0.34692031145095825, "log_odds_ratio": -0.6192424893379211, "logits/chosen": -0.4035261273384094, "logits/rejected": -0.38099542260169983, "logps/chosen": -0.9760900735855103, "logps/rejected": -1.1962649822235107, "loss": 1.0806, "nll_loss": 1.0186859369277954, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09760899841785431, "rewards/margins": 0.022017499431967735, "rewards/rejected": -0.11962650716304779, "step": 850 }, { "epoch": 0.16, "grad_norm": 0.8907870054244995, "learning_rate": 7.557431940602707e-06, "log_odds_chosen": 0.466513454914093, "log_odds_ratio": -0.6052361130714417, "logits/chosen": -0.4223414361476898, "logits/rejected": -0.42823654413223267, "logps/chosen": -0.9593068361282349, "logps/rejected": -1.228334665298462, "loss": 1.038, "nll_loss": 0.9774872064590454, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09593068063259125, "rewards/margins": 0.026902783662080765, "rewards/rejected": -0.12283346801996231, "step": 860 }, { "epoch": 0.16, "grad_norm": 0.7226473689079285, "learning_rate": 7.5516086766632695e-06, "log_odds_chosen": 0.553143322467804, "log_odds_ratio": -0.5825859308242798, "logits/chosen": -0.3536778390407562, "logits/rejected": -0.37367385625839233, "logps/chosen": -1.0212783813476562, "logps/rejected": -1.3241362571716309, "loss": 1.0562, "nll_loss": 0.9979656934738159, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10212783515453339, "rewards/margins": 0.030285779386758804, "rewards/rejected": -0.13241362571716309, "step": 870 }, { "epoch": 0.16, "grad_norm": 1.348470687866211, "learning_rate": 7.545785412723831e-06, "log_odds_chosen": 0.48230600357055664, "log_odds_ratio": -0.5734744668006897, "logits/chosen": -0.3653802275657654, "logits/rejected": -0.40828999876976013, "logps/chosen": -1.0022938251495361, "logps/rejected": -1.2667487859725952, "loss": 1.0923, "nll_loss": 1.0349771976470947, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10022939741611481, "rewards/margins": 0.026445496827363968, "rewards/rejected": -0.12667489051818848, "step": 880 }, { "epoch": 0.16, "grad_norm": 1.4232394695281982, "learning_rate": 7.539962148784393e-06, "log_odds_chosen": 0.4975571036338806, "log_odds_ratio": -0.5937734842300415, "logits/chosen": -0.37488216161727905, "logits/rejected": -0.4145907461643219, "logps/chosen": -0.9429531097412109, "logps/rejected": -1.2314273118972778, "loss": 1.0408, "nll_loss": 0.9814382791519165, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09429532289505005, "rewards/margins": 0.028847401961684227, "rewards/rejected": -0.12314271926879883, "step": 890 }, { "epoch": 0.16, "grad_norm": 0.7326558828353882, "learning_rate": 7.534138884844956e-06, "log_odds_chosen": 0.35102158784866333, "log_odds_ratio": -0.6554730534553528, "logits/chosen": -0.4667816162109375, "logits/rejected": -0.4587486684322357, "logps/chosen": -1.1637868881225586, "logps/rejected": -1.3463655710220337, "loss": 1.1636, "nll_loss": 1.098016381263733, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.11637868732213974, "rewards/margins": 0.018257874995470047, "rewards/rejected": -0.13463656604290009, "step": 900 }, { "epoch": 0.16, "grad_norm": 0.9966881275177002, "learning_rate": 7.528315620905517e-06, "log_odds_chosen": 0.5971187353134155, "log_odds_ratio": -0.5196677446365356, "logits/chosen": -0.35535866022109985, "logits/rejected": -0.38316792249679565, "logps/chosen": -0.9760394096374512, "logps/rejected": -1.33073091506958, "loss": 0.9965, "nll_loss": 0.9444907903671265, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0976039469242096, "rewards/margins": 0.03546914458274841, "rewards/rejected": -0.133073091506958, "step": 910 }, { "epoch": 0.17, "grad_norm": 1.4429069757461548, "learning_rate": 7.522492356966079e-06, "log_odds_chosen": 0.30998191237449646, "log_odds_ratio": -0.6428655385971069, "logits/chosen": -0.38161906599998474, "logits/rejected": -0.39548197388648987, "logps/chosen": -0.9770835638046265, "logps/rejected": -1.1524428129196167, "loss": 1.071, "nll_loss": 1.0066882371902466, "rewards/accuracies": 0.625, "rewards/chosen": -0.09770835936069489, "rewards/margins": 0.01753593422472477, "rewards/rejected": -0.11524428427219391, "step": 920 }, { "epoch": 0.17, "grad_norm": 0.8039138317108154, "learning_rate": 7.516669093026642e-06, "log_odds_chosen": 0.4522746503353119, "log_odds_ratio": -0.5700639486312866, "logits/chosen": -0.3611920475959778, "logits/rejected": -0.3778507113456726, "logps/chosen": -0.9865026473999023, "logps/rejected": -1.2545497417449951, "loss": 0.9676, "nll_loss": 0.910589337348938, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.098650261759758, "rewards/margins": 0.02680472657084465, "rewards/rejected": -0.12545499205589294, "step": 930 }, { "epoch": 0.17, "grad_norm": 1.1389050483703613, "learning_rate": 7.510845829087203e-06, "log_odds_chosen": 0.5116861462593079, "log_odds_ratio": -0.5802291035652161, "logits/chosen": -0.3478499948978424, "logits/rejected": -0.36575666069984436, "logps/chosen": -1.0669629573822021, "logps/rejected": -1.360779047012329, "loss": 1.0806, "nll_loss": 1.0225512981414795, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10669630765914917, "rewards/margins": 0.02938159741461277, "rewards/rejected": -0.1360778957605362, "step": 940 }, { "epoch": 0.17, "grad_norm": 1.0509241819381714, "learning_rate": 7.505022565147765e-06, "log_odds_chosen": 0.6026821136474609, "log_odds_ratio": -0.5167630314826965, "logits/chosen": -0.3945934772491455, "logits/rejected": -0.40920257568359375, "logps/chosen": -0.9981874227523804, "logps/rejected": -1.37343430519104, "loss": 1.0228, "nll_loss": 0.9711018800735474, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09981875121593475, "rewards/margins": 0.03752467781305313, "rewards/rejected": -0.13734343647956848, "step": 950 }, { "epoch": 0.17, "grad_norm": 0.6846367716789246, "learning_rate": 7.499199301208327e-06, "log_odds_chosen": 0.59569251537323, "log_odds_ratio": -0.5624656081199646, "logits/chosen": -0.3632737994194031, "logits/rejected": -0.4208325445652008, "logps/chosen": -0.9958592653274536, "logps/rejected": -1.3909313678741455, "loss": 1.034, "nll_loss": 0.977750301361084, "rewards/accuracies": 0.625, "rewards/chosen": -0.09958592802286148, "rewards/margins": 0.03950721025466919, "rewards/rejected": -0.13909313082695007, "step": 960 }, { "epoch": 0.18, "grad_norm": 1.6020110845565796, "learning_rate": 7.493376037268888e-06, "log_odds_chosen": 0.3030172884464264, "log_odds_ratio": -0.6338340044021606, "logits/chosen": -0.41403812170028687, "logits/rejected": -0.417144238948822, "logps/chosen": -1.0414340496063232, "logps/rejected": -1.2572132349014282, "loss": 1.0919, "nll_loss": 1.028564691543579, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10414339601993561, "rewards/margins": 0.021577920764684677, "rewards/rejected": -0.12572133541107178, "step": 970 }, { "epoch": 0.18, "grad_norm": 0.6520597338676453, "learning_rate": 7.487552773329451e-06, "log_odds_chosen": 0.4331362843513489, "log_odds_ratio": -0.5906507968902588, "logits/chosen": -0.37529271841049194, "logits/rejected": -0.3968786597251892, "logps/chosen": -1.0254571437835693, "logps/rejected": -1.3230538368225098, "loss": 1.0314, "nll_loss": 0.972353458404541, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10254571586847305, "rewards/margins": 0.029759686440229416, "rewards/rejected": -0.13230539858341217, "step": 980 }, { "epoch": 0.18, "grad_norm": 1.175098180770874, "learning_rate": 7.481729509390013e-06, "log_odds_chosen": 0.7574179768562317, "log_odds_ratio": -0.4986720681190491, "logits/chosen": -0.280872642993927, "logits/rejected": -0.3123845160007477, "logps/chosen": -0.8410719037055969, "logps/rejected": -1.3167189359664917, "loss": 0.8723, "nll_loss": 0.8224380612373352, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.0841071829199791, "rewards/margins": 0.04756471887230873, "rewards/rejected": -0.13167190551757812, "step": 990 }, { "epoch": 0.18, "grad_norm": 0.8352462649345398, "learning_rate": 7.4759062454505745e-06, "log_odds_chosen": 0.3969859778881073, "log_odds_ratio": -0.5995320081710815, "logits/chosen": -0.3335706293582916, "logits/rejected": -0.3678201735019684, "logps/chosen": -0.9420549273490906, "logps/rejected": -1.1878598928451538, "loss": 0.9524, "nll_loss": 0.8924533724784851, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09420549869537354, "rewards/margins": 0.02458050288259983, "rewards/rejected": -0.11878599971532822, "step": 1000 }, { "epoch": 0.18, "grad_norm": 0.8329207301139832, "learning_rate": 7.470082981511137e-06, "log_odds_chosen": 0.4053170084953308, "log_odds_ratio": -0.6159269213676453, "logits/chosen": -0.28889894485473633, "logits/rejected": -0.30135685205459595, "logps/chosen": -1.0672948360443115, "logps/rejected": -1.2995731830596924, "loss": 1.0576, "nll_loss": 0.9960120320320129, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10672948509454727, "rewards/margins": 0.023227838799357414, "rewards/rejected": -0.12995730340480804, "step": 1010 }, { "epoch": 0.18, "grad_norm": 1.6608293056488037, "learning_rate": 7.464259717571699e-06, "log_odds_chosen": 0.2776438593864441, "log_odds_ratio": -0.6814143061637878, "logits/chosen": -0.3903682231903076, "logits/rejected": -0.4070429801940918, "logps/chosen": -1.2205824851989746, "logps/rejected": -1.3949607610702515, "loss": 1.1705, "nll_loss": 1.1023309230804443, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12205825746059418, "rewards/margins": 0.01743781939148903, "rewards/rejected": -0.1394960731267929, "step": 1020 }, { "epoch": 0.19, "grad_norm": 1.8429490327835083, "learning_rate": 7.4584364536322606e-06, "log_odds_chosen": 0.4468922019004822, "log_odds_ratio": -0.6024754047393799, "logits/chosen": -0.3262963891029358, "logits/rejected": -0.30852293968200684, "logps/chosen": -1.005517601966858, "logps/rejected": -1.2748384475708008, "loss": 1.0482, "nll_loss": 0.9879406094551086, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10055176913738251, "rewards/margins": 0.02693208120763302, "rewards/rejected": -0.12748384475708008, "step": 1030 }, { "epoch": 0.19, "grad_norm": 1.0084201097488403, "learning_rate": 7.452613189692822e-06, "log_odds_chosen": 0.3204442858695984, "log_odds_ratio": -0.6441566944122314, "logits/chosen": -0.3281877934932709, "logits/rejected": -0.34244847297668457, "logps/chosen": -1.0889025926589966, "logps/rejected": -1.27254056930542, "loss": 1.0895, "nll_loss": 1.0251226425170898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10889027267694473, "rewards/margins": 0.01836378499865532, "rewards/rejected": -0.12725405395030975, "step": 1040 }, { "epoch": 0.19, "grad_norm": 2.1175448894500732, "learning_rate": 7.446789925753384e-06, "log_odds_chosen": 0.49451178312301636, "log_odds_ratio": -0.5911440849304199, "logits/chosen": -0.392509400844574, "logits/rejected": -0.41400307416915894, "logps/chosen": -1.0346556901931763, "logps/rejected": -1.3819650411605835, "loss": 1.0346, "nll_loss": 0.9754676818847656, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10346555709838867, "rewards/margins": 0.0347309373319149, "rewards/rejected": -0.13819649815559387, "step": 1050 }, { "epoch": 0.19, "grad_norm": 1.266845464706421, "learning_rate": 7.440966661813946e-06, "log_odds_chosen": 0.5281954407691956, "log_odds_ratio": -0.559390664100647, "logits/chosen": -0.3589046001434326, "logits/rejected": -0.36148151755332947, "logps/chosen": -0.9146555066108704, "logps/rejected": -1.2053717374801636, "loss": 1.0095, "nll_loss": 0.9535647630691528, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0914655476808548, "rewards/margins": 0.029071617871522903, "rewards/rejected": -0.1205371618270874, "step": 1060 }, { "epoch": 0.19, "grad_norm": 0.6916521191596985, "learning_rate": 7.435143397874508e-06, "log_odds_chosen": 0.562498927116394, "log_odds_ratio": -0.5646517872810364, "logits/chosen": -0.3899080455303192, "logits/rejected": -0.3930276334285736, "logps/chosen": -0.9596776962280273, "logps/rejected": -1.2929902076721191, "loss": 1.0957, "nll_loss": 1.0391947031021118, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09596777707338333, "rewards/margins": 0.0333312451839447, "rewards/rejected": -0.12929901480674744, "step": 1070 }, { "epoch": 0.2, "grad_norm": 1.1792149543762207, "learning_rate": 7.42932013393507e-06, "log_odds_chosen": 0.551052451133728, "log_odds_ratio": -0.582817554473877, "logits/chosen": -0.38785818219184875, "logits/rejected": -0.4266396462917328, "logps/chosen": -1.1055101156234741, "logps/rejected": -1.4765472412109375, "loss": 1.0829, "nll_loss": 1.0245933532714844, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.11055102199316025, "rewards/margins": 0.03710371255874634, "rewards/rejected": -0.147654727101326, "step": 1080 }, { "epoch": 0.2, "grad_norm": 0.9841828942298889, "learning_rate": 7.423496869995633e-06, "log_odds_chosen": 0.3436262011528015, "log_odds_ratio": -0.6445078253746033, "logits/chosen": -0.39780935645103455, "logits/rejected": -0.39955899119377136, "logps/chosen": -1.16604483127594, "logps/rejected": -1.4091861248016357, "loss": 1.1291, "nll_loss": 1.0646467208862305, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11660448461771011, "rewards/margins": 0.024314161390066147, "rewards/rejected": -0.14091864228248596, "step": 1090 }, { "epoch": 0.2, "grad_norm": 1.4496309757232666, "learning_rate": 7.417673606056194e-06, "log_odds_chosen": 0.5248538255691528, "log_odds_ratio": -0.5791778564453125, "logits/chosen": -0.3379751443862915, "logits/rejected": -0.3429611921310425, "logps/chosen": -1.0097376108169556, "logps/rejected": -1.3185153007507324, "loss": 1.0299, "nll_loss": 0.971939742565155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10097376257181168, "rewards/margins": 0.03087778389453888, "rewards/rejected": -0.13185153901576996, "step": 1100 }, { "epoch": 0.2, "grad_norm": 0.9468082785606384, "learning_rate": 7.4118503421167565e-06, "log_odds_chosen": 0.3713390827178955, "log_odds_ratio": -0.6597203612327576, "logits/chosen": -0.33122798800468445, "logits/rejected": -0.308432936668396, "logps/chosen": -1.116742491722107, "logps/rejected": -1.3301223516464233, "loss": 1.0757, "nll_loss": 1.00968599319458, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11167426407337189, "rewards/margins": 0.02133798785507679, "rewards/rejected": -0.13301225006580353, "step": 1110 }, { "epoch": 0.2, "grad_norm": 1.1661351919174194, "learning_rate": 7.406027078177319e-06, "log_odds_chosen": 0.23989257216453552, "log_odds_ratio": -0.6858891248703003, "logits/chosen": -0.399729460477829, "logits/rejected": -0.4175766110420227, "logps/chosen": -1.0951465368270874, "logps/rejected": -1.2085391283035278, "loss": 1.0652, "nll_loss": 0.9965718388557434, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10951465368270874, "rewards/margins": 0.011339271441102028, "rewards/rejected": -0.12085392326116562, "step": 1120 }, { "epoch": 0.2, "grad_norm": 1.182506799697876, "learning_rate": 7.4002038142378794e-06, "log_odds_chosen": 0.4975626468658447, "log_odds_ratio": -0.5809773206710815, "logits/chosen": -0.3122422993183136, "logits/rejected": -0.3628779947757721, "logps/chosen": -1.0512737035751343, "logps/rejected": -1.3611750602722168, "loss": 1.0589, "nll_loss": 1.0008268356323242, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10512737929821014, "rewards/margins": 0.030990120023489, "rewards/rejected": -0.13611750304698944, "step": 1130 }, { "epoch": 0.21, "grad_norm": 1.0794907808303833, "learning_rate": 7.394380550298442e-06, "log_odds_chosen": 0.44226646423339844, "log_odds_ratio": -0.5803853273391724, "logits/chosen": -0.34379130601882935, "logits/rejected": -0.35066670179367065, "logps/chosen": -1.0103256702423096, "logps/rejected": -1.2785089015960693, "loss": 1.0025, "nll_loss": 0.9445074200630188, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10103257745504379, "rewards/margins": 0.02681831642985344, "rewards/rejected": -0.12785090506076813, "step": 1140 }, { "epoch": 0.21, "grad_norm": 1.5841662883758545, "learning_rate": 7.388557286359004e-06, "log_odds_chosen": 0.5097657442092896, "log_odds_ratio": -0.5746845006942749, "logits/chosen": -0.28022605180740356, "logits/rejected": -0.3266647458076477, "logps/chosen": -0.9454906582832336, "logps/rejected": -1.2473700046539307, "loss": 0.997, "nll_loss": 0.9395227432250977, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09454905986785889, "rewards/margins": 0.03018794022500515, "rewards/rejected": -0.12473700195550919, "step": 1150 }, { "epoch": 0.21, "grad_norm": 1.1745624542236328, "learning_rate": 7.3827340224195655e-06, "log_odds_chosen": 0.44944530725479126, "log_odds_ratio": -0.6229075193405151, "logits/chosen": -0.3116758167743683, "logits/rejected": -0.352629691362381, "logps/chosen": -0.9129802584648132, "logps/rejected": -1.1999928951263428, "loss": 0.9836, "nll_loss": 0.9212974309921265, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09129802882671356, "rewards/margins": 0.028701260685920715, "rewards/rejected": -0.11999928951263428, "step": 1160 }, { "epoch": 0.21, "grad_norm": 1.03397798538208, "learning_rate": 7.376910758480128e-06, "log_odds_chosen": 0.5379278063774109, "log_odds_ratio": -0.5735832452774048, "logits/chosen": -0.31208473443984985, "logits/rejected": -0.3373704254627228, "logps/chosen": -0.9682260751724243, "logps/rejected": -1.293874740600586, "loss": 1.0553, "nll_loss": 0.9979804158210754, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09682260453701019, "rewards/margins": 0.03256487101316452, "rewards/rejected": -0.12938746809959412, "step": 1170 }, { "epoch": 0.21, "grad_norm": 0.9198788404464722, "learning_rate": 7.37108749454069e-06, "log_odds_chosen": 0.5509835481643677, "log_odds_ratio": -0.5716922283172607, "logits/chosen": -0.35105282068252563, "logits/rejected": -0.38916003704071045, "logps/chosen": -0.970038115978241, "logps/rejected": -1.3079442977905273, "loss": 1.0032, "nll_loss": 0.9459899663925171, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09700380265712738, "rewards/margins": 0.033790625631809235, "rewards/rejected": -0.1307944506406784, "step": 1180 }, { "epoch": 0.21, "grad_norm": 0.8531373143196106, "learning_rate": 7.365264230601252e-06, "log_odds_chosen": 0.4643592834472656, "log_odds_ratio": -0.5688709020614624, "logits/chosen": -0.3445436358451843, "logits/rejected": -0.3675573170185089, "logps/chosen": -0.9282194972038269, "logps/rejected": -1.1918764114379883, "loss": 0.9728, "nll_loss": 0.9158981442451477, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09282194823026657, "rewards/margins": 0.02636568807065487, "rewards/rejected": -0.11918763816356659, "step": 1190 }, { "epoch": 0.22, "grad_norm": 1.4299198389053345, "learning_rate": 7.359440966661814e-06, "log_odds_chosen": 0.6456435918807983, "log_odds_ratio": -0.5181793570518494, "logits/chosen": -0.3154214322566986, "logits/rejected": -0.37061676383018494, "logps/chosen": -0.9311151504516602, "logps/rejected": -1.331710934638977, "loss": 1.0204, "nll_loss": 0.9685548543930054, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09311151504516602, "rewards/margins": 0.04005958512425423, "rewards/rejected": -0.13317111134529114, "step": 1200 }, { "epoch": 0.22, "grad_norm": 1.1542985439300537, "learning_rate": 7.353617702722376e-06, "log_odds_chosen": 0.8618080019950867, "log_odds_ratio": -0.47413817048072815, "logits/chosen": -0.3236393332481384, "logits/rejected": -0.3237884044647217, "logps/chosen": -0.8999403119087219, "logps/rejected": -1.4406185150146484, "loss": 0.9189, "nll_loss": 0.8714414834976196, "rewards/accuracies": 0.75, "rewards/chosen": -0.08999402821063995, "rewards/margins": 0.054067812860012054, "rewards/rejected": -0.1440618485212326, "step": 1210 }, { "epoch": 0.22, "grad_norm": 1.0737377405166626, "learning_rate": 7.347794438782937e-06, "log_odds_chosen": 0.6115435361862183, "log_odds_ratio": -0.573743462562561, "logits/chosen": -0.34152212738990784, "logits/rejected": -0.35158854722976685, "logps/chosen": -0.9282305836677551, "logps/rejected": -1.25412917137146, "loss": 1.0161, "nll_loss": 0.958741307258606, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09282305091619492, "rewards/margins": 0.0325898714363575, "rewards/rejected": -0.1254129260778427, "step": 1220 }, { "epoch": 0.22, "grad_norm": 1.0002702474594116, "learning_rate": 7.341971174843499e-06, "log_odds_chosen": 0.4594632685184479, "log_odds_ratio": -0.605194628238678, "logits/chosen": -0.3332158923149109, "logits/rejected": -0.3719615936279297, "logps/chosen": -0.9105242490768433, "logps/rejected": -1.1700128316879272, "loss": 0.9943, "nll_loss": 0.9337489008903503, "rewards/accuracies": 0.625, "rewards/chosen": -0.09105244278907776, "rewards/margins": 0.025948846712708473, "rewards/rejected": -0.11700127273797989, "step": 1230 }, { "epoch": 0.22, "grad_norm": 0.6423687934875488, "learning_rate": 7.3361479109040614e-06, "log_odds_chosen": 0.5466501712799072, "log_odds_ratio": -0.5491333603858948, "logits/chosen": -0.38570788502693176, "logits/rejected": -0.41777318716049194, "logps/chosen": -1.0705764293670654, "logps/rejected": -1.4159528017044067, "loss": 1.1157, "nll_loss": 1.0607718229293823, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10705764591693878, "rewards/margins": 0.03453763201832771, "rewards/rejected": -0.1415952742099762, "step": 1240 }, { "epoch": 0.23, "grad_norm": 0.9487566351890564, "learning_rate": 7.330324646964623e-06, "log_odds_chosen": 0.6835122108459473, "log_odds_ratio": -0.541060209274292, "logits/chosen": -0.3431823253631592, "logits/rejected": -0.3847135305404663, "logps/chosen": -0.8870918154716492, "logps/rejected": -1.319023847579956, "loss": 0.9598, "nll_loss": 0.9056830406188965, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08870919793844223, "rewards/margins": 0.043193183839321136, "rewards/rejected": -0.13190238177776337, "step": 1250 }, { "epoch": 0.23, "grad_norm": 0.7351201176643372, "learning_rate": 7.324501383025185e-06, "log_odds_chosen": 0.5293228030204773, "log_odds_ratio": -0.6223096251487732, "logits/chosen": -0.41755110025405884, "logits/rejected": -0.40872058272361755, "logps/chosen": -0.9203858375549316, "logps/rejected": -1.2283319234848022, "loss": 1.0607, "nll_loss": 0.9984228014945984, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.0920385867357254, "rewards/margins": 0.030794614925980568, "rewards/rejected": -0.12283320724964142, "step": 1260 }, { "epoch": 0.23, "grad_norm": 0.7665841579437256, "learning_rate": 7.3186781190857475e-06, "log_odds_chosen": 0.4791649878025055, "log_odds_ratio": -0.5774362683296204, "logits/chosen": -0.37003204226493835, "logits/rejected": -0.40496787428855896, "logps/chosen": -0.9530180096626282, "logps/rejected": -1.2359671592712402, "loss": 1.0133, "nll_loss": 0.955549418926239, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0953017920255661, "rewards/margins": 0.028294924646615982, "rewards/rejected": -0.12359671294689178, "step": 1270 }, { "epoch": 0.23, "grad_norm": 0.449677050113678, "learning_rate": 7.312854855146309e-06, "log_odds_chosen": 0.5322305560112, "log_odds_ratio": -0.5769363641738892, "logits/chosen": -0.40565329790115356, "logits/rejected": -0.43567219376564026, "logps/chosen": -0.9717713594436646, "logps/rejected": -1.263688087463379, "loss": 1.0753, "nll_loss": 1.0175769329071045, "rewards/accuracies": 0.625, "rewards/chosen": -0.09717713296413422, "rewards/margins": 0.029191669076681137, "rewards/rejected": -0.12636880576610565, "step": 1280 }, { "epoch": 0.23, "grad_norm": 2.150800943374634, "learning_rate": 7.307031591206871e-06, "log_odds_chosen": 0.6120613813400269, "log_odds_ratio": -0.5586797595024109, "logits/chosen": -0.3177061080932617, "logits/rejected": -0.3986894190311432, "logps/chosen": -0.9389210939407349, "logps/rejected": -1.3461209535598755, "loss": 1.0326, "nll_loss": 0.976696789264679, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09389211237430573, "rewards/margins": 0.04071998968720436, "rewards/rejected": -0.13461211323738098, "step": 1290 }, { "epoch": 0.23, "grad_norm": 0.8204323649406433, "learning_rate": 7.301208327267434e-06, "log_odds_chosen": 0.48294463753700256, "log_odds_ratio": -0.5884346961975098, "logits/chosen": -0.38646456599235535, "logits/rejected": -0.36271700263023376, "logps/chosen": -0.8860207796096802, "logps/rejected": -1.1833808422088623, "loss": 0.9879, "nll_loss": 0.9290531277656555, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08860208094120026, "rewards/margins": 0.029736008495092392, "rewards/rejected": -0.11833808571100235, "step": 1300 }, { "epoch": 0.24, "grad_norm": 0.8067964911460876, "learning_rate": 7.295385063327995e-06, "log_odds_chosen": 0.45374226570129395, "log_odds_ratio": -0.6168845295906067, "logits/chosen": -0.3121108412742615, "logits/rejected": -0.3362121284008026, "logps/chosen": -1.0480873584747314, "logps/rejected": -1.3348156213760376, "loss": 0.9928, "nll_loss": 0.9311412572860718, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10480872541666031, "rewards/margins": 0.028672825545072556, "rewards/rejected": -0.13348154723644257, "step": 1310 }, { "epoch": 0.24, "grad_norm": 1.132856845855713, "learning_rate": 7.2895617993885565e-06, "log_odds_chosen": 0.6111911535263062, "log_odds_ratio": -0.5393105745315552, "logits/chosen": -0.3297664523124695, "logits/rejected": -0.3743050992488861, "logps/chosen": -0.9645237922668457, "logps/rejected": -1.344177007675171, "loss": 1.0082, "nll_loss": 0.9542475938796997, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09645237773656845, "rewards/margins": 0.037965334951877594, "rewards/rejected": -0.13441771268844604, "step": 1320 }, { "epoch": 0.24, "grad_norm": 0.9139919281005859, "learning_rate": 7.283738535449119e-06, "log_odds_chosen": 0.4854803681373596, "log_odds_ratio": -0.5775014162063599, "logits/chosen": -0.2660491466522217, "logits/rejected": -0.33125635981559753, "logps/chosen": -0.9659830927848816, "logps/rejected": -1.2355717420578003, "loss": 0.9855, "nll_loss": 0.9277377128601074, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0965983122587204, "rewards/margins": 0.026958853006362915, "rewards/rejected": -0.12355717271566391, "step": 1330 }, { "epoch": 0.24, "grad_norm": 0.8374517560005188, "learning_rate": 7.277915271509681e-06, "log_odds_chosen": 0.28127503395080566, "log_odds_ratio": -0.6820573806762695, "logits/chosen": -0.36101454496383667, "logits/rejected": -0.38573795557022095, "logps/chosen": -0.9789048433303833, "logps/rejected": -1.1818983554840088, "loss": 1.0525, "nll_loss": 0.9843059778213501, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09789048135280609, "rewards/margins": 0.020299362018704414, "rewards/rejected": -0.11818984895944595, "step": 1340 }, { "epoch": 0.24, "grad_norm": 0.6741836071014404, "learning_rate": 7.272092007570243e-06, "log_odds_chosen": 0.5547982454299927, "log_odds_ratio": -0.5649451017379761, "logits/chosen": -0.35805225372314453, "logits/rejected": -0.381672203540802, "logps/chosen": -0.9512104988098145, "logps/rejected": -1.2871150970458984, "loss": 1.0333, "nll_loss": 0.9767767190933228, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09512104839086533, "rewards/margins": 0.03359045460820198, "rewards/rejected": -0.1287115067243576, "step": 1350 }, { "epoch": 0.25, "grad_norm": 1.025458574295044, "learning_rate": 7.266268743630805e-06, "log_odds_chosen": 0.49380841851234436, "log_odds_ratio": -0.5802966356277466, "logits/chosen": -0.248256117105484, "logits/rejected": -0.3065466284751892, "logps/chosen": -0.8993231058120728, "logps/rejected": -1.1698864698410034, "loss": 0.9771, "nll_loss": 0.9190645217895508, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08993230760097504, "rewards/margins": 0.027056332677602768, "rewards/rejected": -0.1169886365532875, "step": 1360 }, { "epoch": 0.25, "grad_norm": 0.9658330678939819, "learning_rate": 7.260445479691367e-06, "log_odds_chosen": 0.5135058164596558, "log_odds_ratio": -0.5862656831741333, "logits/chosen": -0.3769971430301666, "logits/rejected": -0.3671988546848297, "logps/chosen": -0.9479128122329712, "logps/rejected": -1.2691059112548828, "loss": 1.0174, "nll_loss": 0.9587678909301758, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09479127824306488, "rewards/margins": 0.03211931139230728, "rewards/rejected": -0.12691059708595276, "step": 1370 }, { "epoch": 0.25, "grad_norm": 0.6349258422851562, "learning_rate": 7.254622215751929e-06, "log_odds_chosen": 0.599760890007019, "log_odds_ratio": -0.576224684715271, "logits/chosen": -0.3141711354255676, "logits/rejected": -0.363479346036911, "logps/chosen": -1.0505956411361694, "logps/rejected": -1.4687645435333252, "loss": 1.083, "nll_loss": 1.0253790616989136, "rewards/accuracies": 0.625, "rewards/chosen": -0.10505956411361694, "rewards/margins": 0.041816871613264084, "rewards/rejected": -0.14687643945217133, "step": 1380 }, { "epoch": 0.25, "grad_norm": 0.806663990020752, "learning_rate": 7.24879895181249e-06, "log_odds_chosen": 0.5944348573684692, "log_odds_ratio": -0.5526763200759888, "logits/chosen": -0.2973032593727112, "logits/rejected": -0.3449651300907135, "logps/chosen": -0.910653293132782, "logps/rejected": -1.2252440452575684, "loss": 0.9812, "nll_loss": 0.9258831143379211, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09106533229351044, "rewards/margins": 0.03145906329154968, "rewards/rejected": -0.12252439558506012, "step": 1390 }, { "epoch": 0.25, "grad_norm": 1.9585217237472534, "learning_rate": 7.2429756878730525e-06, "log_odds_chosen": 0.5735687017440796, "log_odds_ratio": -0.5608910322189331, "logits/chosen": -0.3576315641403198, "logits/rejected": -0.387492835521698, "logps/chosen": -0.9962593913078308, "logps/rejected": -1.3501719236373901, "loss": 1.0814, "nll_loss": 1.0253442525863647, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09962593764066696, "rewards/margins": 0.03539125993847847, "rewards/rejected": -0.13501721620559692, "step": 1400 }, { "epoch": 0.25, "grad_norm": 0.9405146837234497, "learning_rate": 7.237152423933614e-06, "log_odds_chosen": 0.4652928411960602, "log_odds_ratio": -0.6031362414360046, "logits/chosen": -0.34576138854026794, "logits/rejected": -0.34349748492240906, "logps/chosen": -1.018971562385559, "logps/rejected": -1.3038069009780884, "loss": 1.0472, "nll_loss": 0.9869211316108704, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10189716517925262, "rewards/margins": 0.028483539819717407, "rewards/rejected": -0.13038070499897003, "step": 1410 }, { "epoch": 0.26, "grad_norm": 1.1048870086669922, "learning_rate": 7.231329159994176e-06, "log_odds_chosen": 0.6356562972068787, "log_odds_ratio": -0.5308060646057129, "logits/chosen": -0.3209785521030426, "logits/rejected": -0.3367080092430115, "logps/chosen": -0.9682799577713013, "logps/rejected": -1.3677337169647217, "loss": 0.9475, "nll_loss": 0.8944019079208374, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09682799875736237, "rewards/margins": 0.039945363998413086, "rewards/rejected": -0.13677337765693665, "step": 1420 }, { "epoch": 0.26, "grad_norm": 1.5478053092956543, "learning_rate": 7.2255058960547386e-06, "log_odds_chosen": 0.2101658582687378, "log_odds_ratio": -0.692489504814148, "logits/chosen": -0.34666886925697327, "logits/rejected": -0.3526953160762787, "logps/chosen": -0.9766533970832825, "logps/rejected": -1.099202036857605, "loss": 1.0478, "nll_loss": 0.9785119891166687, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.09766535460948944, "rewards/margins": 0.012254852801561356, "rewards/rejected": -0.1099202036857605, "step": 1430 }, { "epoch": 0.26, "grad_norm": 1.153688669204712, "learning_rate": 7.2196826321153e-06, "log_odds_chosen": 0.3514810800552368, "log_odds_ratio": -0.6305993795394897, "logits/chosen": -0.34288614988327026, "logits/rejected": -0.3488093912601471, "logps/chosen": -1.0037890672683716, "logps/rejected": -1.2000821828842163, "loss": 1.0715, "nll_loss": 1.0084068775177002, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10037890821695328, "rewards/margins": 0.019629308953881264, "rewards/rejected": -0.12000821530818939, "step": 1440 }, { "epoch": 0.26, "grad_norm": 1.224960446357727, "learning_rate": 7.213859368175862e-06, "log_odds_chosen": 0.3187094032764435, "log_odds_ratio": -0.6215401887893677, "logits/chosen": -0.31062254309654236, "logits/rejected": -0.3334897756576538, "logps/chosen": -0.8957698941230774, "logps/rejected": -1.0763921737670898, "loss": 1.0235, "nll_loss": 0.9613516926765442, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08957698941230774, "rewards/margins": 0.01806223951280117, "rewards/rejected": -0.10763921588659286, "step": 1450 }, { "epoch": 0.26, "grad_norm": 0.8180124163627625, "learning_rate": 7.208036104236425e-06, "log_odds_chosen": 0.46529459953308105, "log_odds_ratio": -0.5676907896995544, "logits/chosen": -0.3247535824775696, "logits/rejected": -0.39403122663497925, "logps/chosen": -0.9336856603622437, "logps/rejected": -1.2000067234039307, "loss": 1.0012, "nll_loss": 0.9444776773452759, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09336856752634048, "rewards/margins": 0.026632100343704224, "rewards/rejected": -0.1200006753206253, "step": 1460 }, { "epoch": 0.27, "grad_norm": 1.2947418689727783, "learning_rate": 7.202212840296986e-06, "log_odds_chosen": 0.24858565628528595, "log_odds_ratio": -0.7275146245956421, "logits/chosen": -0.34301838278770447, "logits/rejected": -0.3836689889431, "logps/chosen": -1.0797077417373657, "logps/rejected": -1.202086091041565, "loss": 1.08, "nll_loss": 1.007287621498108, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10797077417373657, "rewards/margins": 0.012237833812832832, "rewards/rejected": -0.12020860612392426, "step": 1470 }, { "epoch": 0.27, "grad_norm": 0.8348037600517273, "learning_rate": 7.1963895763575476e-06, "log_odds_chosen": 0.4048139452934265, "log_odds_ratio": -0.6023651957511902, "logits/chosen": -0.2860475182533264, "logits/rejected": -0.31518831849098206, "logps/chosen": -0.8968518972396851, "logps/rejected": -1.1367640495300293, "loss": 1.0037, "nll_loss": 0.9434369802474976, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08968518674373627, "rewards/margins": 0.023991206660866737, "rewards/rejected": -0.11367639154195786, "step": 1480 }, { "epoch": 0.27, "grad_norm": 1.1033504009246826, "learning_rate": 7.19056631241811e-06, "log_odds_chosen": 0.3861461281776428, "log_odds_ratio": -0.6319631338119507, "logits/chosen": -0.329324334859848, "logits/rejected": -0.3265681266784668, "logps/chosen": -0.9853197932243347, "logps/rejected": -1.2213842868804932, "loss": 1.0146, "nll_loss": 0.9514220356941223, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09853197634220123, "rewards/margins": 0.02360645867884159, "rewards/rejected": -0.12213845551013947, "step": 1490 }, { "epoch": 0.27, "grad_norm": 1.4328731298446655, "learning_rate": 7.184743048478672e-06, "log_odds_chosen": 0.566982090473175, "log_odds_ratio": -0.5453047156333923, "logits/chosen": -0.2849673628807068, "logits/rejected": -0.31650617718696594, "logps/chosen": -0.9042051434516907, "logps/rejected": -1.2444576025009155, "loss": 0.9611, "nll_loss": 0.9066120386123657, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09042052179574966, "rewards/margins": 0.03402525186538696, "rewards/rejected": -0.12444577366113663, "step": 1500 }, { "epoch": 0.27, "grad_norm": 0.9411538243293762, "learning_rate": 7.178919784539234e-06, "log_odds_chosen": 0.6574187278747559, "log_odds_ratio": -0.5834145545959473, "logits/chosen": -0.28295382857322693, "logits/rejected": -0.3845768868923187, "logps/chosen": -0.8513942956924438, "logps/rejected": -1.3180423974990845, "loss": 1.0373, "nll_loss": 0.9789282083511353, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0851394310593605, "rewards/margins": 0.04666482284665108, "rewards/rejected": -0.13180424273014069, "step": 1510 }, { "epoch": 0.27, "grad_norm": 0.6901792883872986, "learning_rate": 7.173096520599796e-06, "log_odds_chosen": 0.4914397597312927, "log_odds_ratio": -0.6235078573226929, "logits/chosen": -0.2798933982849121, "logits/rejected": -0.3196839690208435, "logps/chosen": -0.9667059779167175, "logps/rejected": -1.2527544498443604, "loss": 0.9792, "nll_loss": 0.9168528318405151, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09667058289051056, "rewards/margins": 0.02860487625002861, "rewards/rejected": -0.12527546286582947, "step": 1520 }, { "epoch": 0.28, "grad_norm": 0.9170409440994263, "learning_rate": 7.167273256660358e-06, "log_odds_chosen": 0.46514517068862915, "log_odds_ratio": -0.5963708162307739, "logits/chosen": -0.3535362780094147, "logits/rejected": -0.3404978811740875, "logps/chosen": -0.9499415159225464, "logps/rejected": -1.2293248176574707, "loss": 0.9763, "nll_loss": 0.9166848063468933, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09499415010213852, "rewards/margins": 0.02793833613395691, "rewards/rejected": -0.12293247878551483, "step": 1530 }, { "epoch": 0.28, "grad_norm": 1.1653376817703247, "learning_rate": 7.16144999272092e-06, "log_odds_chosen": 0.5939033627510071, "log_odds_ratio": -0.5412915945053101, "logits/chosen": -0.34380143880844116, "logits/rejected": -0.40813955664634705, "logps/chosen": -0.9544633030891418, "logps/rejected": -1.2961674928665161, "loss": 1.0981, "nll_loss": 1.0439279079437256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09544633328914642, "rewards/margins": 0.034170426428318024, "rewards/rejected": -0.12961676716804504, "step": 1540 }, { "epoch": 0.28, "grad_norm": 1.4316825866699219, "learning_rate": 7.155626728781482e-06, "log_odds_chosen": 0.4217056334018707, "log_odds_ratio": -0.6345925331115723, "logits/chosen": -0.3553635776042938, "logits/rejected": -0.3846450448036194, "logps/chosen": -1.0682843923568726, "logps/rejected": -1.3270084857940674, "loss": 1.0435, "nll_loss": 0.9799984097480774, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10682845115661621, "rewards/margins": 0.025872424244880676, "rewards/rejected": -0.1327008754014969, "step": 1550 }, { "epoch": 0.28, "grad_norm": 0.8913134336471558, "learning_rate": 7.149803464842044e-06, "log_odds_chosen": 0.8833622932434082, "log_odds_ratio": -0.4926014840602875, "logits/chosen": -0.2923971116542816, "logits/rejected": -0.35514816641807556, "logps/chosen": -0.8891391754150391, "logps/rejected": -1.430625319480896, "loss": 1.0246, "nll_loss": 0.9753875732421875, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08891390264034271, "rewards/margins": 0.05414862558245659, "rewards/rejected": -0.1430625319480896, "step": 1560 }, { "epoch": 0.28, "grad_norm": 1.0268759727478027, "learning_rate": 7.143980200902605e-06, "log_odds_chosen": 0.32187631726264954, "log_odds_ratio": -0.642335057258606, "logits/chosen": -0.3550707697868347, "logits/rejected": -0.35717231035232544, "logps/chosen": -1.087780237197876, "logps/rejected": -1.2959010601043701, "loss": 1.0671, "nll_loss": 1.0029114484786987, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10877802222967148, "rewards/margins": 0.02081209048628807, "rewards/rejected": -0.12959010899066925, "step": 1570 }, { "epoch": 0.29, "grad_norm": 0.764402449131012, "learning_rate": 7.138156936963167e-06, "log_odds_chosen": 0.5447977185249329, "log_odds_ratio": -0.5645043253898621, "logits/chosen": -0.27879756689071655, "logits/rejected": -0.340719610452652, "logps/chosen": -0.88838130235672, "logps/rejected": -1.2113538980484009, "loss": 0.9794, "nll_loss": 0.9229621887207031, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0888381376862526, "rewards/margins": 0.03229725360870361, "rewards/rejected": -0.12113537639379501, "step": 1580 }, { "epoch": 0.29, "grad_norm": 1.1508593559265137, "learning_rate": 7.13233367302373e-06, "log_odds_chosen": 0.24824786186218262, "log_odds_ratio": -0.6948032975196838, "logits/chosen": -0.40458765625953674, "logits/rejected": -0.4210253655910492, "logps/chosen": -0.9826368093490601, "logps/rejected": -1.1311357021331787, "loss": 1.0984, "nll_loss": 1.0289161205291748, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.098263680934906, "rewards/margins": 0.0148498909547925, "rewards/rejected": -0.11311358213424683, "step": 1590 }, { "epoch": 0.29, "grad_norm": 0.8140817880630493, "learning_rate": 7.126510409084291e-06, "log_odds_chosen": 0.44691014289855957, "log_odds_ratio": -0.5997231602668762, "logits/chosen": -0.36742717027664185, "logits/rejected": -0.41214919090270996, "logps/chosen": -1.0089651346206665, "logps/rejected": -1.2913377285003662, "loss": 1.0365, "nll_loss": 0.9765187501907349, "rewards/accuracies": 0.625, "rewards/chosen": -0.10089650005102158, "rewards/margins": 0.02823726274073124, "rewards/rejected": -0.12913377583026886, "step": 1600 }, { "epoch": 0.29, "grad_norm": 0.9169033765792847, "learning_rate": 7.120687145144853e-06, "log_odds_chosen": 0.6452026963233948, "log_odds_ratio": -0.5194689035415649, "logits/chosen": -0.3512391746044159, "logits/rejected": -0.37762579321861267, "logps/chosen": -0.8982778787612915, "logps/rejected": -1.300342082977295, "loss": 1.0038, "nll_loss": 0.9518443942070007, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08982778340578079, "rewards/margins": 0.0402064248919487, "rewards/rejected": -0.1300342082977295, "step": 1610 }, { "epoch": 0.29, "grad_norm": 1.4523009061813354, "learning_rate": 7.114863881205416e-06, "log_odds_chosen": 0.4951610565185547, "log_odds_ratio": -0.5961709022521973, "logits/chosen": -0.33776336908340454, "logits/rejected": -0.37025144696235657, "logps/chosen": -1.0465290546417236, "logps/rejected": -1.3635728359222412, "loss": 1.0154, "nll_loss": 0.9557603597640991, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10465290397405624, "rewards/margins": 0.031704384833574295, "rewards/rejected": -0.13635727763175964, "step": 1620 }, { "epoch": 0.29, "grad_norm": 1.2989190816879272, "learning_rate": 7.109040617265977e-06, "log_odds_chosen": 0.535646915435791, "log_odds_ratio": -0.6097526550292969, "logits/chosen": -0.3525809347629547, "logits/rejected": -0.3878706991672516, "logps/chosen": -0.9263569116592407, "logps/rejected": -1.2173206806182861, "loss": 0.9963, "nll_loss": 0.9353706240653992, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09263569116592407, "rewards/margins": 0.029096385464072227, "rewards/rejected": -0.12173207849264145, "step": 1630 }, { "epoch": 0.3, "grad_norm": 1.1236441135406494, "learning_rate": 7.1032173533265394e-06, "log_odds_chosen": 0.7328363656997681, "log_odds_ratio": -0.5077277421951294, "logits/chosen": -0.3232725262641907, "logits/rejected": -0.3601072430610657, "logps/chosen": -0.8846950531005859, "logps/rejected": -1.3101729154586792, "loss": 0.9562, "nll_loss": 0.9054625630378723, "rewards/accuracies": 0.75, "rewards/chosen": -0.0884695053100586, "rewards/margins": 0.0425477959215641, "rewards/rejected": -0.1310172975063324, "step": 1640 }, { "epoch": 0.3, "grad_norm": 1.1808315515518188, "learning_rate": 7.097394089387102e-06, "log_odds_chosen": 0.8097518086433411, "log_odds_ratio": -0.4847545623779297, "logits/chosen": -0.32802054286003113, "logits/rejected": -0.3918638825416565, "logps/chosen": -0.9471396207809448, "logps/rejected": -1.4627947807312012, "loss": 0.9561, "nll_loss": 0.9076499938964844, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0947139710187912, "rewards/margins": 0.0515655092895031, "rewards/rejected": -0.1462794840335846, "step": 1650 }, { "epoch": 0.3, "grad_norm": 0.774590015411377, "learning_rate": 7.091570825447662e-06, "log_odds_chosen": 0.5359390377998352, "log_odds_ratio": -0.6051704287528992, "logits/chosen": -0.35709959268569946, "logits/rejected": -0.3970513939857483, "logps/chosen": -0.94941246509552, "logps/rejected": -1.2806012630462646, "loss": 1.1352, "nll_loss": 1.0746341943740845, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09494125843048096, "rewards/margins": 0.033118873834609985, "rewards/rejected": -0.12806013226509094, "step": 1660 }, { "epoch": 0.3, "grad_norm": 0.907768726348877, "learning_rate": 7.085747561508225e-06, "log_odds_chosen": 0.4143344461917877, "log_odds_ratio": -0.6266435384750366, "logits/chosen": -0.36355313658714294, "logits/rejected": -0.4045397639274597, "logps/chosen": -1.0518794059753418, "logps/rejected": -1.331181287765503, "loss": 1.0651, "nll_loss": 1.0024408102035522, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10518793016672134, "rewards/margins": 0.02793019451200962, "rewards/rejected": -0.13311812281608582, "step": 1670 }, { "epoch": 0.3, "grad_norm": 0.6284062266349792, "learning_rate": 7.079924297568787e-06, "log_odds_chosen": 0.3497571647167206, "log_odds_ratio": -0.6255709528923035, "logits/chosen": -0.348887175321579, "logits/rejected": -0.3884449005126953, "logps/chosen": -1.0277200937271118, "logps/rejected": -1.226231575012207, "loss": 1.0394, "nll_loss": 0.976884663105011, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10277201235294342, "rewards/margins": 0.019851163029670715, "rewards/rejected": -0.12262316793203354, "step": 1680 }, { "epoch": 0.31, "grad_norm": 1.5012038946151733, "learning_rate": 7.0741010336293484e-06, "log_odds_chosen": 0.5618478655815125, "log_odds_ratio": -0.5806422233581543, "logits/chosen": -0.34077805280685425, "logits/rejected": -0.38193902373313904, "logps/chosen": -0.9076236486434937, "logps/rejected": -1.2798479795455933, "loss": 0.9895, "nll_loss": 0.9314451217651367, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09076236188411713, "rewards/margins": 0.03722243756055832, "rewards/rejected": -0.12798479199409485, "step": 1690 }, { "epoch": 0.31, "grad_norm": 1.182563304901123, "learning_rate": 7.068277769689911e-06, "log_odds_chosen": 0.5676418542861938, "log_odds_ratio": -0.5794765949249268, "logits/chosen": -0.33637866377830505, "logits/rejected": -0.40183839201927185, "logps/chosen": -0.913240909576416, "logps/rejected": -1.2547906637191772, "loss": 1.0457, "nll_loss": 0.9877544641494751, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0913240909576416, "rewards/margins": 0.0341549888253212, "rewards/rejected": -0.1254790723323822, "step": 1700 }, { "epoch": 0.31, "grad_norm": 0.5410019159317017, "learning_rate": 7.062454505750473e-06, "log_odds_chosen": 0.615912139415741, "log_odds_ratio": -0.5161094665527344, "logits/chosen": -0.3636830449104309, "logits/rejected": -0.4102718234062195, "logps/chosen": -0.9589177966117859, "logps/rejected": -1.329530954360962, "loss": 1.0449, "nll_loss": 0.9932928085327148, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09589177370071411, "rewards/margins": 0.03706132620573044, "rewards/rejected": -0.13295309245586395, "step": 1710 }, { "epoch": 0.31, "grad_norm": 0.8718307018280029, "learning_rate": 7.056631241811035e-06, "log_odds_chosen": 0.3784236013889313, "log_odds_ratio": -0.6245784163475037, "logits/chosen": -0.4157410264015198, "logits/rejected": -0.42662128806114197, "logps/chosen": -1.0432878732681274, "logps/rejected": -1.2611668109893799, "loss": 1.1002, "nll_loss": 1.0377851724624634, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10432878881692886, "rewards/margins": 0.021787891164422035, "rewards/rejected": -0.12611667811870575, "step": 1720 }, { "epoch": 0.31, "grad_norm": 0.904116690158844, "learning_rate": 7.050807977871597e-06, "log_odds_chosen": 0.5137162804603577, "log_odds_ratio": -0.5803641080856323, "logits/chosen": -0.41569024324417114, "logits/rejected": -0.4513755440711975, "logps/chosen": -0.9598332643508911, "logps/rejected": -1.294640064239502, "loss": 1.0163, "nll_loss": 0.9582997560501099, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09598332643508911, "rewards/margins": 0.0334806889295578, "rewards/rejected": -0.1294640153646469, "step": 1730 }, { "epoch": 0.31, "grad_norm": 1.1222513914108276, "learning_rate": 7.044984713932159e-06, "log_odds_chosen": 0.5173107385635376, "log_odds_ratio": -0.5620402693748474, "logits/chosen": -0.3197651505470276, "logits/rejected": -0.3535226583480835, "logps/chosen": -1.0193755626678467, "logps/rejected": -1.3410420417785645, "loss": 0.9179, "nll_loss": 0.8617299199104309, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10193755477666855, "rewards/margins": 0.03216664493083954, "rewards/rejected": -0.13410422205924988, "step": 1740 }, { "epoch": 0.32, "grad_norm": 0.8727396130561829, "learning_rate": 7.039161449992721e-06, "log_odds_chosen": 0.5304024815559387, "log_odds_ratio": -0.5949326753616333, "logits/chosen": -0.4052054286003113, "logits/rejected": -0.4512867331504822, "logps/chosen": -1.019547462463379, "logps/rejected": -1.3655933141708374, "loss": 1.0608, "nll_loss": 1.0013364553451538, "rewards/accuracies": 0.625, "rewards/chosen": -0.10195475816726685, "rewards/margins": 0.03460458293557167, "rewards/rejected": -0.13655933737754822, "step": 1750 }, { "epoch": 0.32, "grad_norm": 1.6699724197387695, "learning_rate": 7.033338186053282e-06, "log_odds_chosen": 0.4551050066947937, "log_odds_ratio": -0.6072874665260315, "logits/chosen": -0.33777788281440735, "logits/rejected": -0.39305973052978516, "logps/chosen": -1.0166957378387451, "logps/rejected": -1.3022456169128418, "loss": 1.0423, "nll_loss": 0.9816068410873413, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10166957229375839, "rewards/margins": 0.028554990887641907, "rewards/rejected": -0.1302245706319809, "step": 1760 }, { "epoch": 0.32, "grad_norm": 1.614592432975769, "learning_rate": 7.027514922113844e-06, "log_odds_chosen": 0.39455699920654297, "log_odds_ratio": -0.6552709341049194, "logits/chosen": -0.3162342607975006, "logits/rejected": -0.41145092248916626, "logps/chosen": -0.913953423500061, "logps/rejected": -1.165477991104126, "loss": 0.96, "nll_loss": 0.8944529294967651, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09139533340930939, "rewards/margins": 0.025152459740638733, "rewards/rejected": -0.11654778569936752, "step": 1770 }, { "epoch": 0.32, "grad_norm": 0.8290024399757385, "learning_rate": 7.021691658174407e-06, "log_odds_chosen": 0.6237296462059021, "log_odds_ratio": -0.5952363014221191, "logits/chosen": -0.3944126069545746, "logits/rejected": -0.43316787481307983, "logps/chosen": -0.8954101800918579, "logps/rejected": -1.290221929550171, "loss": 1.0224, "nll_loss": 0.9628337621688843, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08954103291034698, "rewards/margins": 0.03948115557432175, "rewards/rejected": -0.12902218103408813, "step": 1780 }, { "epoch": 0.32, "grad_norm": 1.0954138040542603, "learning_rate": 7.015868394234968e-06, "log_odds_chosen": 0.5047041177749634, "log_odds_ratio": -0.5463986396789551, "logits/chosen": -0.39047911763191223, "logits/rejected": -0.40275081992149353, "logps/chosen": -1.0550428628921509, "logps/rejected": -1.3856205940246582, "loss": 1.0086, "nll_loss": 0.9539523124694824, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10550429672002792, "rewards/margins": 0.033057767897844315, "rewards/rejected": -0.13856205344200134, "step": 1790 }, { "epoch": 0.33, "grad_norm": 1.0443518161773682, "learning_rate": 7.0100451302955305e-06, "log_odds_chosen": 0.5747898817062378, "log_odds_ratio": -0.5704335570335388, "logits/chosen": -0.3137153685092926, "logits/rejected": -0.3747533857822418, "logps/chosen": -0.8636377453804016, "logps/rejected": -1.2245159149169922, "loss": 1.0042, "nll_loss": 0.9471246004104614, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0863637775182724, "rewards/margins": 0.03608780354261398, "rewards/rejected": -0.12245158851146698, "step": 1800 }, { "epoch": 0.33, "grad_norm": 1.2862238883972168, "learning_rate": 7.004221866356093e-06, "log_odds_chosen": 0.5296390652656555, "log_odds_ratio": -0.5726187825202942, "logits/chosen": -0.3899995684623718, "logits/rejected": -0.4204530715942383, "logps/chosen": -0.9171603918075562, "logps/rejected": -1.1902835369110107, "loss": 1.0626, "nll_loss": 1.0053044557571411, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09171605110168457, "rewards/margins": 0.027312302961945534, "rewards/rejected": -0.11902834475040436, "step": 1810 }, { "epoch": 0.33, "grad_norm": 1.6314342021942139, "learning_rate": 6.998398602416654e-06, "log_odds_chosen": 0.5884126424789429, "log_odds_ratio": -0.5797205567359924, "logits/chosen": -0.36631208658218384, "logits/rejected": -0.4214417338371277, "logps/chosen": -1.0176228284835815, "logps/rejected": -1.3633677959442139, "loss": 1.0146, "nll_loss": 0.9566680192947388, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10176227241754532, "rewards/margins": 0.03457449749112129, "rewards/rejected": -0.1363367736339569, "step": 1820 }, { "epoch": 0.33, "grad_norm": 0.8122896552085876, "learning_rate": 6.9925753384772165e-06, "log_odds_chosen": 0.6310805082321167, "log_odds_ratio": -0.6001628637313843, "logits/chosen": -0.274971067905426, "logits/rejected": -0.295291006565094, "logps/chosen": -0.9514021873474121, "logps/rejected": -1.3572217226028442, "loss": 0.9502, "nll_loss": 0.8902056813240051, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09514021128416061, "rewards/margins": 0.040581949055194855, "rewards/rejected": -0.13572217524051666, "step": 1830 }, { "epoch": 0.33, "grad_norm": 1.1364367008209229, "learning_rate": 6.986752074537778e-06, "log_odds_chosen": 0.6302961111068726, "log_odds_ratio": -0.5267154574394226, "logits/chosen": -0.3086879849433899, "logits/rejected": -0.3633294999599457, "logps/chosen": -0.9853225946426392, "logps/rejected": -1.3981595039367676, "loss": 0.9728, "nll_loss": 0.9201301336288452, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09853225946426392, "rewards/margins": 0.041283704340457916, "rewards/rejected": -0.13981595635414124, "step": 1840 }, { "epoch": 0.33, "grad_norm": 0.7904521822929382, "learning_rate": 6.9809288105983395e-06, "log_odds_chosen": 0.5368858575820923, "log_odds_ratio": -0.5833784937858582, "logits/chosen": -0.27477526664733887, "logits/rejected": -0.3175152838230133, "logps/chosen": -0.9110499620437622, "logps/rejected": -1.2280490398406982, "loss": 0.9398, "nll_loss": 0.8814668655395508, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09110499918460846, "rewards/margins": 0.03169991075992584, "rewards/rejected": -0.1228049024939537, "step": 1850 }, { "epoch": 0.34, "grad_norm": 0.8617063164710999, "learning_rate": 6.975105546658902e-06, "log_odds_chosen": 0.47567349672317505, "log_odds_ratio": -0.6010562777519226, "logits/chosen": -0.3454052805900574, "logits/rejected": -0.4101662039756775, "logps/chosen": -0.9865895509719849, "logps/rejected": -1.253812551498413, "loss": 0.9747, "nll_loss": 0.9145520329475403, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0986589565873146, "rewards/margins": 0.02672230266034603, "rewards/rejected": -0.1253812611103058, "step": 1860 }, { "epoch": 0.34, "grad_norm": 0.9157552123069763, "learning_rate": 6.969282282719464e-06, "log_odds_chosen": 0.4372219145298004, "log_odds_ratio": -0.6045929789543152, "logits/chosen": -0.44653192162513733, "logits/rejected": -0.4505546987056732, "logps/chosen": -1.074901819229126, "logps/rejected": -1.350311517715454, "loss": 1.063, "nll_loss": 1.002518892288208, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1074901819229126, "rewards/margins": 0.02754097245633602, "rewards/rejected": -0.13503116369247437, "step": 1870 }, { "epoch": 0.34, "grad_norm": 1.502583622932434, "learning_rate": 6.9634590187800256e-06, "log_odds_chosen": 0.5613323450088501, "log_odds_ratio": -0.5771964192390442, "logits/chosen": -0.3930050730705261, "logits/rejected": -0.44371461868286133, "logps/chosen": -0.9873424768447876, "logps/rejected": -1.3379671573638916, "loss": 1.0228, "nll_loss": 0.9650462865829468, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09873425960540771, "rewards/margins": 0.03506246581673622, "rewards/rejected": -0.13379672169685364, "step": 1880 }, { "epoch": 0.34, "grad_norm": 0.5780860781669617, "learning_rate": 6.957635754840588e-06, "log_odds_chosen": 0.6220310926437378, "log_odds_ratio": -0.5668379664421082, "logits/chosen": -0.3505100607872009, "logits/rejected": -0.4041958749294281, "logps/chosen": -0.9813801646232605, "logps/rejected": -1.343157410621643, "loss": 1.0134, "nll_loss": 0.9566676020622253, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09813802689313889, "rewards/margins": 0.03617771714925766, "rewards/rejected": -0.13431574404239655, "step": 1890 }, { "epoch": 0.34, "grad_norm": 0.4777265787124634, "learning_rate": 6.95181249090115e-06, "log_odds_chosen": 0.42654770612716675, "log_odds_ratio": -0.5968258380889893, "logits/chosen": -0.37639492750167847, "logits/rejected": -0.4339643120765686, "logps/chosen": -1.0687663555145264, "logps/rejected": -1.3647792339324951, "loss": 1.067, "nll_loss": 1.0073009729385376, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10687664896249771, "rewards/margins": 0.029601257294416428, "rewards/rejected": -0.13647790253162384, "step": 1900 }, { "epoch": 0.35, "grad_norm": 0.6649044156074524, "learning_rate": 6.945989226961712e-06, "log_odds_chosen": 0.43329086899757385, "log_odds_ratio": -0.6196489930152893, "logits/chosen": -0.3298007547855377, "logits/rejected": -0.401068776845932, "logps/chosen": -0.990477442741394, "logps/rejected": -1.2473641633987427, "loss": 0.9843, "nll_loss": 0.9223047494888306, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09904775023460388, "rewards/margins": 0.025688668712973595, "rewards/rejected": -0.12473641335964203, "step": 1910 }, { "epoch": 0.35, "grad_norm": 1.315058946609497, "learning_rate": 6.940165963022274e-06, "log_odds_chosen": 0.530105710029602, "log_odds_ratio": -0.5921862125396729, "logits/chosen": -0.39285919070243835, "logits/rejected": -0.4322722852230072, "logps/chosen": -1.0640658140182495, "logps/rejected": -1.410599946975708, "loss": 1.0939, "nll_loss": 1.0346629619598389, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10640659183263779, "rewards/margins": 0.034653399139642715, "rewards/rejected": -0.1410599797964096, "step": 1920 }, { "epoch": 0.35, "grad_norm": 0.7587740421295166, "learning_rate": 6.934342699082835e-06, "log_odds_chosen": 0.751360297203064, "log_odds_ratio": -0.5236696004867554, "logits/chosen": -0.35968920588493347, "logits/rejected": -0.41237396001815796, "logps/chosen": -0.9788786172866821, "logps/rejected": -1.4456140995025635, "loss": 1.0208, "nll_loss": 0.9684196710586548, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09788785129785538, "rewards/margins": 0.04667355865240097, "rewards/rejected": -0.14456140995025635, "step": 1930 }, { "epoch": 0.35, "grad_norm": 0.6302559971809387, "learning_rate": 6.928519435143398e-06, "log_odds_chosen": 0.4367288649082184, "log_odds_ratio": -0.6356965899467468, "logits/chosen": -0.3377440273761749, "logits/rejected": -0.3957875370979309, "logps/chosen": -1.087546944618225, "logps/rejected": -1.3826909065246582, "loss": 1.0962, "nll_loss": 1.0325984954833984, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1087547093629837, "rewards/margins": 0.02951439656317234, "rewards/rejected": -0.1382690966129303, "step": 1940 }, { "epoch": 0.35, "grad_norm": 0.6672381162643433, "learning_rate": 6.922696171203959e-06, "log_odds_chosen": 0.6472089886665344, "log_odds_ratio": -0.5463239550590515, "logits/chosen": -0.33907216787338257, "logits/rejected": -0.3891569972038269, "logps/chosen": -0.8778939247131348, "logps/rejected": -1.2696306705474854, "loss": 0.9847, "nll_loss": 0.9300362467765808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08778940141201019, "rewards/margins": 0.0391736701130867, "rewards/rejected": -0.1269630640745163, "step": 1950 }, { "epoch": 0.35, "grad_norm": 0.9312640428543091, "learning_rate": 6.9168729072645215e-06, "log_odds_chosen": 0.6855367422103882, "log_odds_ratio": -0.5426241159439087, "logits/chosen": -0.3388255536556244, "logits/rejected": -0.39959508180618286, "logps/chosen": -0.8858163952827454, "logps/rejected": -1.3462940454483032, "loss": 0.9277, "nll_loss": 0.8734596967697144, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0885816365480423, "rewards/margins": 0.04604776203632355, "rewards/rejected": -0.13462939858436584, "step": 1960 }, { "epoch": 0.36, "grad_norm": 2.3137943744659424, "learning_rate": 6.911049643325084e-06, "log_odds_chosen": 0.5328444242477417, "log_odds_ratio": -0.5675147175788879, "logits/chosen": -0.33348965644836426, "logits/rejected": -0.36870360374450684, "logps/chosen": -0.9644654393196106, "logps/rejected": -1.2675182819366455, "loss": 1.0454, "nll_loss": 0.9886082410812378, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09644654393196106, "rewards/margins": 0.03030528500676155, "rewards/rejected": -0.1267518252134323, "step": 1970 }, { "epoch": 0.36, "grad_norm": 0.7342625856399536, "learning_rate": 6.905226379385645e-06, "log_odds_chosen": 0.3813175857067108, "log_odds_ratio": -0.6699342727661133, "logits/chosen": -0.3162277340888977, "logits/rejected": -0.35589399933815, "logps/chosen": -1.0159353017807007, "logps/rejected": -1.252074122428894, "loss": 1.041, "nll_loss": 0.9740489721298218, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10159353166818619, "rewards/margins": 0.023613888770341873, "rewards/rejected": -0.12520742416381836, "step": 1980 }, { "epoch": 0.36, "grad_norm": 0.9021235704421997, "learning_rate": 6.8994031154462076e-06, "log_odds_chosen": 0.7601310014724731, "log_odds_ratio": -0.4945410192012787, "logits/chosen": -0.35523557662963867, "logits/rejected": -0.37828975915908813, "logps/chosen": -0.9633037447929382, "logps/rejected": -1.4405834674835205, "loss": 0.99, "nll_loss": 0.940527617931366, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09633038938045502, "rewards/margins": 0.047727979719638824, "rewards/rejected": -0.14405836164951324, "step": 1990 }, { "epoch": 0.36, "grad_norm": 0.4736451506614685, "learning_rate": 6.89357985150677e-06, "log_odds_chosen": 0.4646480977535248, "log_odds_ratio": -0.6559361219406128, "logits/chosen": -0.30533161759376526, "logits/rejected": -0.37452375888824463, "logps/chosen": -0.9401071667671204, "logps/rejected": -1.2080457210540771, "loss": 0.9819, "nll_loss": 0.9162567853927612, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09401071071624756, "rewards/margins": 0.026793863624334335, "rewards/rejected": -0.12080458551645279, "step": 2000 }, { "epoch": 0.36, "grad_norm": 0.7754179835319519, "learning_rate": 6.8877565875673305e-06, "log_odds_chosen": 0.6256696581840515, "log_odds_ratio": -0.5275606513023376, "logits/chosen": -0.3516564965248108, "logits/rejected": -0.406394898891449, "logps/chosen": -0.9741449356079102, "logps/rejected": -1.3891098499298096, "loss": 0.9558, "nll_loss": 0.9030143618583679, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09741449356079102, "rewards/margins": 0.04149649664759636, "rewards/rejected": -0.13891097903251648, "step": 2010 }, { "epoch": 0.36, "grad_norm": 0.7154907584190369, "learning_rate": 6.881933323627893e-06, "log_odds_chosen": 0.4335847496986389, "log_odds_ratio": -0.6152317523956299, "logits/chosen": -0.3850800395011902, "logits/rejected": -0.38393911719322205, "logps/chosen": -1.0410282611846924, "logps/rejected": -1.3170279264450073, "loss": 1.0869, "nll_loss": 1.025334119796753, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10410281270742416, "rewards/margins": 0.027599969878792763, "rewards/rejected": -0.13170279562473297, "step": 2020 }, { "epoch": 0.37, "grad_norm": 0.8986865878105164, "learning_rate": 6.876110059688455e-06, "log_odds_chosen": 0.5948430299758911, "log_odds_ratio": -0.5587864518165588, "logits/chosen": -0.349942147731781, "logits/rejected": -0.41519179940223694, "logps/chosen": -0.9230004549026489, "logps/rejected": -1.3495103120803833, "loss": 1.0273, "nll_loss": 0.9713996648788452, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09230004251003265, "rewards/margins": 0.0426509864628315, "rewards/rejected": -0.13495102524757385, "step": 2030 }, { "epoch": 0.37, "grad_norm": 1.152334213256836, "learning_rate": 6.870286795749017e-06, "log_odds_chosen": 0.34722867608070374, "log_odds_ratio": -0.6710313558578491, "logits/chosen": -0.3837626278400421, "logits/rejected": -0.3969534635543823, "logps/chosen": -1.0239957571029663, "logps/rejected": -1.237477421760559, "loss": 1.0862, "nll_loss": 1.0191189050674438, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10239957273006439, "rewards/margins": 0.021348167210817337, "rewards/rejected": -0.12374775111675262, "step": 2040 }, { "epoch": 0.37, "grad_norm": 0.756015419960022, "learning_rate": 6.864463531809579e-06, "log_odds_chosen": 0.48199620842933655, "log_odds_ratio": -0.5751310586929321, "logits/chosen": -0.3649478852748871, "logits/rejected": -0.40983134508132935, "logps/chosen": -1.0215938091278076, "logps/rejected": -1.3313084840774536, "loss": 1.0334, "nll_loss": 0.975843071937561, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10215938091278076, "rewards/margins": 0.0309714674949646, "rewards/rejected": -0.13313084840774536, "step": 2050 }, { "epoch": 0.37, "grad_norm": 0.9733452796936035, "learning_rate": 6.858640267870141e-06, "log_odds_chosen": 0.665124773979187, "log_odds_ratio": -0.5681694746017456, "logits/chosen": -0.39392274618148804, "logits/rejected": -0.43787112832069397, "logps/chosen": -0.9010134935379028, "logps/rejected": -1.3361163139343262, "loss": 0.9935, "nll_loss": 0.9366915822029114, "rewards/accuracies": 0.625, "rewards/chosen": -0.09010134637355804, "rewards/margins": 0.04351028427481651, "rewards/rejected": -0.13361163437366486, "step": 2060 }, { "epoch": 0.37, "grad_norm": 1.235131859779358, "learning_rate": 6.852817003930703e-06, "log_odds_chosen": 0.600813627243042, "log_odds_ratio": -0.5714324712753296, "logits/chosen": -0.29560619592666626, "logits/rejected": -0.3389769196510315, "logps/chosen": -0.9260609745979309, "logps/rejected": -1.3035285472869873, "loss": 1.0465, "nll_loss": 0.9893512725830078, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09260609745979309, "rewards/margins": 0.03774676471948624, "rewards/rejected": -0.13035285472869873, "step": 2070 }, { "epoch": 0.38, "grad_norm": 0.881033182144165, "learning_rate": 6.846993739991265e-06, "log_odds_chosen": 0.552410900592804, "log_odds_ratio": -0.5525115132331848, "logits/chosen": -0.34073910117149353, "logits/rejected": -0.36035147309303284, "logps/chosen": -0.9047195315361023, "logps/rejected": -1.2465412616729736, "loss": 0.906, "nll_loss": 0.8507863283157349, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09047196060419083, "rewards/margins": 0.03418216481804848, "rewards/rejected": -0.1246541291475296, "step": 2080 }, { "epoch": 0.38, "grad_norm": 1.3036489486694336, "learning_rate": 6.841170476051827e-06, "log_odds_chosen": 0.3467450737953186, "log_odds_ratio": -0.6337326765060425, "logits/chosen": -0.4385066032409668, "logits/rejected": -0.461122989654541, "logps/chosen": -1.1040109395980835, "logps/rejected": -1.33207368850708, "loss": 1.1146, "nll_loss": 1.0512287616729736, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11040109395980835, "rewards/margins": 0.02280627377331257, "rewards/rejected": -0.13320736587047577, "step": 2090 }, { "epoch": 0.38, "grad_norm": 0.663144052028656, "learning_rate": 6.835347212112388e-06, "log_odds_chosen": 0.5738186836242676, "log_odds_ratio": -0.5866661071777344, "logits/chosen": -0.3205263614654541, "logits/rejected": -0.39872634410858154, "logps/chosen": -0.9723861813545227, "logps/rejected": -1.3176357746124268, "loss": 1.0031, "nll_loss": 0.9444729089736938, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09723862260580063, "rewards/margins": 0.03452496603131294, "rewards/rejected": -0.13176357746124268, "step": 2100 }, { "epoch": 0.38, "grad_norm": 1.065043330192566, "learning_rate": 6.82952394817295e-06, "log_odds_chosen": 0.7342604398727417, "log_odds_ratio": -0.519620954990387, "logits/chosen": -0.33630579710006714, "logits/rejected": -0.41530171036720276, "logps/chosen": -0.9176281094551086, "logps/rejected": -1.3705017566680908, "loss": 0.9411, "nll_loss": 0.8891534805297852, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09176281839609146, "rewards/margins": 0.0452873595058918, "rewards/rejected": -0.13705015182495117, "step": 2110 }, { "epoch": 0.38, "grad_norm": 1.0710101127624512, "learning_rate": 6.8237006842335125e-06, "log_odds_chosen": 0.7710472345352173, "log_odds_ratio": -0.5303458571434021, "logits/chosen": -0.3510195016860962, "logits/rejected": -0.4198974668979645, "logps/chosen": -1.0233778953552246, "logps/rejected": -1.4768149852752686, "loss": 1.0737, "nll_loss": 1.020646572113037, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1023377999663353, "rewards/margins": 0.045343708246946335, "rewards/rejected": -0.14768150448799133, "step": 2120 }, { "epoch": 0.38, "grad_norm": 0.6870781779289246, "learning_rate": 6.817877420294074e-06, "log_odds_chosen": 0.478823184967041, "log_odds_ratio": -0.5921360850334167, "logits/chosen": -0.36411404609680176, "logits/rejected": -0.4298684000968933, "logps/chosen": -0.9525805711746216, "logps/rejected": -1.2109096050262451, "loss": 1.0417, "nll_loss": 0.9824945330619812, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09525805711746216, "rewards/margins": 0.02583291009068489, "rewards/rejected": -0.12109096348285675, "step": 2130 }, { "epoch": 0.39, "grad_norm": 0.8713396191596985, "learning_rate": 6.812054156354636e-06, "log_odds_chosen": 0.5773652791976929, "log_odds_ratio": -0.5538418292999268, "logits/chosen": -0.3354545533657074, "logits/rejected": -0.385932594537735, "logps/chosen": -1.016210913658142, "logps/rejected": -1.4001071453094482, "loss": 0.9637, "nll_loss": 0.9083443880081177, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10162109136581421, "rewards/margins": 0.03838961571455002, "rewards/rejected": -0.14001069962978363, "step": 2140 }, { "epoch": 0.39, "grad_norm": 1.1261780261993408, "learning_rate": 6.806230892415199e-06, "log_odds_chosen": 0.5818010568618774, "log_odds_ratio": -0.5739784240722656, "logits/chosen": -0.38004809617996216, "logits/rejected": -0.41545504331588745, "logps/chosen": -0.9510005116462708, "logps/rejected": -1.3282325267791748, "loss": 1.0095, "nll_loss": 0.9521392583847046, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0951000526547432, "rewards/margins": 0.037723198533058167, "rewards/rejected": -0.13282324373722076, "step": 2150 }, { "epoch": 0.39, "grad_norm": 1.1771738529205322, "learning_rate": 6.800407628475761e-06, "log_odds_chosen": 0.6635382175445557, "log_odds_ratio": -0.5700774192810059, "logits/chosen": -0.29149946570396423, "logits/rejected": -0.3473144471645355, "logps/chosen": -0.9014791250228882, "logps/rejected": -1.2821435928344727, "loss": 0.9432, "nll_loss": 0.8862012624740601, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09014792740345001, "rewards/margins": 0.038066450506448746, "rewards/rejected": -0.12821438908576965, "step": 2160 }, { "epoch": 0.39, "grad_norm": 1.0287370681762695, "learning_rate": 6.794584364536322e-06, "log_odds_chosen": 0.6836004257202148, "log_odds_ratio": -0.5302466154098511, "logits/chosen": -0.3840259611606598, "logits/rejected": -0.42339396476745605, "logps/chosen": -0.9160095453262329, "logps/rejected": -1.375124216079712, "loss": 1.0308, "nll_loss": 0.9777463674545288, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0916009470820427, "rewards/margins": 0.045911483466625214, "rewards/rejected": -0.1375124156475067, "step": 2170 }, { "epoch": 0.39, "grad_norm": 1.1369781494140625, "learning_rate": 6.788761100596885e-06, "log_odds_chosen": 0.6386801600456238, "log_odds_ratio": -0.5835193395614624, "logits/chosen": -0.35931721329689026, "logits/rejected": -0.43148794770240784, "logps/chosen": -0.9243070483207703, "logps/rejected": -1.3196786642074585, "loss": 1.0626, "nll_loss": 1.004262924194336, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09243070334196091, "rewards/margins": 0.03953716158866882, "rewards/rejected": -0.13196787238121033, "step": 2180 }, { "epoch": 0.4, "grad_norm": 0.809481143951416, "learning_rate": 6.782937836657447e-06, "log_odds_chosen": 0.5534670948982239, "log_odds_ratio": -0.5876575112342834, "logits/chosen": -0.39520007371902466, "logits/rejected": -0.44970574975013733, "logps/chosen": -1.018445611000061, "logps/rejected": -1.3711169958114624, "loss": 1.0772, "nll_loss": 1.0184389352798462, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10184456408023834, "rewards/margins": 0.03526713699102402, "rewards/rejected": -0.13711170852184296, "step": 2190 }, { "epoch": 0.4, "grad_norm": 1.0092328786849976, "learning_rate": 6.777114572718008e-06, "log_odds_chosen": 0.968769371509552, "log_odds_ratio": -0.45703181624412537, "logits/chosen": -0.3396906554698944, "logits/rejected": -0.41497963666915894, "logps/chosen": -0.9728395342826843, "logps/rejected": -1.5849764347076416, "loss": 0.9097, "nll_loss": 0.8639856576919556, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09728394448757172, "rewards/margins": 0.06121370196342468, "rewards/rejected": -0.1584976464509964, "step": 2200 }, { "epoch": 0.4, "grad_norm": 1.4918605089187622, "learning_rate": 6.77129130877857e-06, "log_odds_chosen": 0.6453096270561218, "log_odds_ratio": -0.5849459171295166, "logits/chosen": -0.32868796586990356, "logits/rejected": -0.3679044842720032, "logps/chosen": -0.9020195007324219, "logps/rejected": -1.2888437509536743, "loss": 0.9864, "nll_loss": 0.9279425740242004, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0902019590139389, "rewards/margins": 0.038682423532009125, "rewards/rejected": -0.12888437509536743, "step": 2210 }, { "epoch": 0.4, "grad_norm": 1.0304147005081177, "learning_rate": 6.765468044839132e-06, "log_odds_chosen": 0.558380663394928, "log_odds_ratio": -0.589785635471344, "logits/chosen": -0.3739710748195648, "logits/rejected": -0.40226370096206665, "logps/chosen": -1.023913025856018, "logps/rejected": -1.3768985271453857, "loss": 1.0085, "nll_loss": 0.9495282173156738, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.102391317486763, "rewards/margins": 0.035298533737659454, "rewards/rejected": -0.13768985867500305, "step": 2220 }, { "epoch": 0.4, "grad_norm": 1.6191908121109009, "learning_rate": 6.759644780899694e-06, "log_odds_chosen": 0.6424797773361206, "log_odds_ratio": -0.5855919718742371, "logits/chosen": -0.3532762825489044, "logits/rejected": -0.412332147359848, "logps/chosen": -0.9896075129508972, "logps/rejected": -1.3922007083892822, "loss": 0.9935, "nll_loss": 0.9349050521850586, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0989607572555542, "rewards/margins": 0.040259331464767456, "rewards/rejected": -0.13922008872032166, "step": 2230 }, { "epoch": 0.4, "grad_norm": 0.8883562684059143, "learning_rate": 6.753821516960256e-06, "log_odds_chosen": 0.841235339641571, "log_odds_ratio": -0.5459184050559998, "logits/chosen": -0.3101753890514374, "logits/rejected": -0.32989877462387085, "logps/chosen": -0.7897628545761108, "logps/rejected": -1.3077296018600464, "loss": 0.8489, "nll_loss": 0.7942813038825989, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.07897628843784332, "rewards/margins": 0.05179668590426445, "rewards/rejected": -0.13077297806739807, "step": 2240 }, { "epoch": 0.41, "grad_norm": 1.5552319288253784, "learning_rate": 6.747998253020818e-06, "log_odds_chosen": 0.6456782817840576, "log_odds_ratio": -0.5549284219741821, "logits/chosen": -0.38464441895484924, "logits/rejected": -0.44848066568374634, "logps/chosen": -1.0553745031356812, "logps/rejected": -1.4854891300201416, "loss": 1.0404, "nll_loss": 0.9849538803100586, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10553745180368423, "rewards/margins": 0.043011464178562164, "rewards/rejected": -0.1485489159822464, "step": 2250 }, { "epoch": 0.41, "grad_norm": 1.2503700256347656, "learning_rate": 6.74217498908138e-06, "log_odds_chosen": 0.42475882172584534, "log_odds_ratio": -0.6306148171424866, "logits/chosen": -0.3681567311286926, "logits/rejected": -0.38218241930007935, "logps/chosen": -1.0094163417816162, "logps/rejected": -1.2551424503326416, "loss": 1.0313, "nll_loss": 0.9682766199111938, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10094162076711655, "rewards/margins": 0.024572614580392838, "rewards/rejected": -0.12551423907279968, "step": 2260 }, { "epoch": 0.41, "grad_norm": 0.9619736671447754, "learning_rate": 6.736351725141942e-06, "log_odds_chosen": 0.8118526339530945, "log_odds_ratio": -0.5391420722007751, "logits/chosen": -0.37076014280319214, "logits/rejected": -0.41540661454200745, "logps/chosen": -0.9378563165664673, "logps/rejected": -1.4630939960479736, "loss": 0.964, "nll_loss": 0.9101201295852661, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09378562867641449, "rewards/margins": 0.05252378061413765, "rewards/rejected": -0.14630940556526184, "step": 2270 }, { "epoch": 0.41, "grad_norm": 1.0768691301345825, "learning_rate": 6.7305284612025035e-06, "log_odds_chosen": 0.6344785690307617, "log_odds_ratio": -0.565656304359436, "logits/chosen": -0.331177294254303, "logits/rejected": -0.377986341714859, "logps/chosen": -0.9524379968643188, "logps/rejected": -1.366152048110962, "loss": 1.0225, "nll_loss": 0.9659032821655273, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09524379670619965, "rewards/margins": 0.041371412575244904, "rewards/rejected": -0.13661520183086395, "step": 2280 }, { "epoch": 0.41, "grad_norm": 0.9441320896148682, "learning_rate": 6.724705197263065e-06, "log_odds_chosen": 0.4996124804019928, "log_odds_ratio": -0.6170133352279663, "logits/chosen": -0.39471060037612915, "logits/rejected": -0.42773985862731934, "logps/chosen": -1.0002835988998413, "logps/rejected": -1.3339643478393555, "loss": 1.0856, "nll_loss": 1.0238593816757202, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1000283733010292, "rewards/margins": 0.0333680734038353, "rewards/rejected": -0.1333964318037033, "step": 2290 }, { "epoch": 0.42, "grad_norm": 0.9333765506744385, "learning_rate": 6.718881933323627e-06, "log_odds_chosen": 0.5398795008659363, "log_odds_ratio": -0.5720388889312744, "logits/chosen": -0.33184653520584106, "logits/rejected": -0.3918634355068207, "logps/chosen": -0.924095630645752, "logps/rejected": -1.2493730783462524, "loss": 1.017, "nll_loss": 0.9597843885421753, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09240957349538803, "rewards/margins": 0.03252773731946945, "rewards/rejected": -0.12493731081485748, "step": 2300 }, { "epoch": 0.42, "grad_norm": 1.224831223487854, "learning_rate": 6.71305866938419e-06, "log_odds_chosen": 0.7087077498435974, "log_odds_ratio": -0.49888911843299866, "logits/chosen": -0.361178994178772, "logits/rejected": -0.4183693826198578, "logps/chosen": -0.9371638298034668, "logps/rejected": -1.3673518896102905, "loss": 1.0179, "nll_loss": 0.9680109024047852, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09371639043092728, "rewards/margins": 0.043018803000450134, "rewards/rejected": -0.13673518598079681, "step": 2310 }, { "epoch": 0.42, "grad_norm": 1.2092949151992798, "learning_rate": 6.707235405444751e-06, "log_odds_chosen": 0.6781972646713257, "log_odds_ratio": -0.5294966697692871, "logits/chosen": -0.42441660165786743, "logits/rejected": -0.4300518035888672, "logps/chosen": -0.9629390835762024, "logps/rejected": -1.3670963048934937, "loss": 1.0294, "nll_loss": 0.9764898419380188, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09629391133785248, "rewards/margins": 0.04041573405265808, "rewards/rejected": -0.13670964539051056, "step": 2320 }, { "epoch": 0.42, "grad_norm": 0.727232813835144, "learning_rate": 6.701412141505313e-06, "log_odds_chosen": 0.4993107318878174, "log_odds_ratio": -0.6099010705947876, "logits/chosen": -0.39678144454956055, "logits/rejected": -0.4318881630897522, "logps/chosen": -0.9463534355163574, "logps/rejected": -1.2829571962356567, "loss": 1.0367, "nll_loss": 0.9757480621337891, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09463534504175186, "rewards/margins": 0.03366038203239441, "rewards/rejected": -0.12829571962356567, "step": 2330 }, { "epoch": 0.42, "grad_norm": 0.9829471111297607, "learning_rate": 6.695588877565876e-06, "log_odds_chosen": 0.4144318103790283, "log_odds_ratio": -0.6522342562675476, "logits/chosen": -0.37262946367263794, "logits/rejected": -0.3711271286010742, "logps/chosen": -0.9915045499801636, "logps/rejected": -1.2454833984375, "loss": 1.0185, "nll_loss": 0.953311562538147, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09915046393871307, "rewards/margins": 0.02539788745343685, "rewards/rejected": -0.12454833835363388, "step": 2340 }, { "epoch": 0.42, "grad_norm": 0.849780797958374, "learning_rate": 6.689765613626437e-06, "log_odds_chosen": 0.6342957615852356, "log_odds_ratio": -0.5933780670166016, "logits/chosen": -0.3510368764400482, "logits/rejected": -0.38174304366111755, "logps/chosen": -1.001947283744812, "logps/rejected": -1.4229815006256104, "loss": 1.038, "nll_loss": 0.9786350131034851, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10019473731517792, "rewards/margins": 0.04210341349244118, "rewards/rejected": -0.1422981470823288, "step": 2350 }, { "epoch": 0.43, "grad_norm": 0.9741145968437195, "learning_rate": 6.6839423496869995e-06, "log_odds_chosen": 0.5828785300254822, "log_odds_ratio": -0.5875527262687683, "logits/chosen": -0.3308332562446594, "logits/rejected": -0.3834843635559082, "logps/chosen": -0.9138933420181274, "logps/rejected": -1.27740478515625, "loss": 1.0159, "nll_loss": 0.9571261405944824, "rewards/accuracies": 0.625, "rewards/chosen": -0.09138933569192886, "rewards/margins": 0.03635113686323166, "rewards/rejected": -0.12774047255516052, "step": 2360 }, { "epoch": 0.43, "grad_norm": 0.7441773414611816, "learning_rate": 6.678119085747561e-06, "log_odds_chosen": 0.5344163775444031, "log_odds_ratio": -0.6083695888519287, "logits/chosen": -0.31939536333084106, "logits/rejected": -0.3396163284778595, "logps/chosen": -0.9387407302856445, "logps/rejected": -1.2810204029083252, "loss": 1.0482, "nll_loss": 0.9873501658439636, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09387408196926117, "rewards/margins": 0.034227967262268066, "rewards/rejected": -0.12810204923152924, "step": 2370 }, { "epoch": 0.43, "grad_norm": 0.9099089503288269, "learning_rate": 6.672295821808123e-06, "log_odds_chosen": 0.46537071466445923, "log_odds_ratio": -0.6207537651062012, "logits/chosen": -0.34910348057746887, "logits/rejected": -0.3974645435810089, "logps/chosen": -1.0489540100097656, "logps/rejected": -1.3541288375854492, "loss": 1.0742, "nll_loss": 1.01216459274292, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10489541292190552, "rewards/margins": 0.03051748313009739, "rewards/rejected": -0.13541290163993835, "step": 2380 }, { "epoch": 0.43, "grad_norm": 0.9425209164619446, "learning_rate": 6.666472557868685e-06, "log_odds_chosen": 0.5351072549819946, "log_odds_ratio": -0.5699527859687805, "logits/chosen": -0.39522382616996765, "logits/rejected": -0.44066038727760315, "logps/chosen": -0.9757105112075806, "logps/rejected": -1.321337342262268, "loss": 1.0655, "nll_loss": 1.0084631443023682, "rewards/accuracies": 0.625, "rewards/chosen": -0.09757105261087418, "rewards/margins": 0.03456268459558487, "rewards/rejected": -0.13213373720645905, "step": 2390 }, { "epoch": 0.43, "grad_norm": 0.7204871773719788, "learning_rate": 6.660649293929247e-06, "log_odds_chosen": 0.6166509985923767, "log_odds_ratio": -0.5407067537307739, "logits/chosen": -0.37788647413253784, "logits/rejected": -0.39217525720596313, "logps/chosen": -1.0124753713607788, "logps/rejected": -1.399766206741333, "loss": 1.0106, "nll_loss": 0.9564822316169739, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10124754905700684, "rewards/margins": 0.03872908279299736, "rewards/rejected": -0.1399766206741333, "step": 2400 }, { "epoch": 0.44, "grad_norm": 0.7321575284004211, "learning_rate": 6.654826029989809e-06, "log_odds_chosen": 0.48690706491470337, "log_odds_ratio": -0.6029237508773804, "logits/chosen": -0.3971622586250305, "logits/rejected": -0.4110518991947174, "logps/chosen": -0.9805097579956055, "logps/rejected": -1.2300655841827393, "loss": 1.0499, "nll_loss": 0.9895747900009155, "rewards/accuracies": 0.625, "rewards/chosen": -0.09805097430944443, "rewards/margins": 0.024955574423074722, "rewards/rejected": -0.12300655990839005, "step": 2410 }, { "epoch": 0.44, "grad_norm": 1.1652004718780518, "learning_rate": 6.649002766050371e-06, "log_odds_chosen": 0.6261937022209167, "log_odds_ratio": -0.5773937106132507, "logits/chosen": -0.3494192957878113, "logits/rejected": -0.37246376276016235, "logps/chosen": -1.0030348300933838, "logps/rejected": -1.390746831893921, "loss": 0.9871, "nll_loss": 0.9293266534805298, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10030348598957062, "rewards/margins": 0.03877120837569237, "rewards/rejected": -0.13907471299171448, "step": 2420 }, { "epoch": 0.44, "grad_norm": 1.1041061878204346, "learning_rate": 6.643179502110933e-06, "log_odds_chosen": 0.6732785105705261, "log_odds_ratio": -0.5620766878128052, "logits/chosen": -0.4026264250278473, "logits/rejected": -0.4409736096858978, "logps/chosen": -0.9442132115364075, "logps/rejected": -1.4226003885269165, "loss": 1.0086, "nll_loss": 0.9523922801017761, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09442131966352463, "rewards/margins": 0.0478387214243412, "rewards/rejected": -0.14226004481315613, "step": 2430 }, { "epoch": 0.44, "grad_norm": 1.5153762102127075, "learning_rate": 6.637356238171495e-06, "log_odds_chosen": 0.7149797677993774, "log_odds_ratio": -0.5272139310836792, "logits/chosen": -0.3368612229824066, "logits/rejected": -0.396007776260376, "logps/chosen": -0.9288423657417297, "logps/rejected": -1.3468871116638184, "loss": 1.0085, "nll_loss": 0.9557502865791321, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09288422763347626, "rewards/margins": 0.041804488748311996, "rewards/rejected": -0.13468872010707855, "step": 2440 }, { "epoch": 0.44, "grad_norm": 0.9731908440589905, "learning_rate": 6.631532974232057e-06, "log_odds_chosen": 0.5724431872367859, "log_odds_ratio": -0.5888754725456238, "logits/chosen": -0.36172300577163696, "logits/rejected": -0.41580086946487427, "logps/chosen": -0.9775680303573608, "logps/rejected": -1.3245165348052979, "loss": 0.9907, "nll_loss": 0.931767463684082, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09775681793689728, "rewards/margins": 0.034694839268922806, "rewards/rejected": -0.13245165348052979, "step": 2450 }, { "epoch": 0.44, "grad_norm": 1.017647624015808, "learning_rate": 6.625709710292618e-06, "log_odds_chosen": 0.754473090171814, "log_odds_ratio": -0.5083373785018921, "logits/chosen": -0.31709548830986023, "logits/rejected": -0.4046868681907654, "logps/chosen": -0.939433753490448, "logps/rejected": -1.394449234008789, "loss": 0.9886, "nll_loss": 0.9377379417419434, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09394337981939316, "rewards/margins": 0.045501552522182465, "rewards/rejected": -0.13944493234157562, "step": 2460 }, { "epoch": 0.45, "grad_norm": 0.9073895215988159, "learning_rate": 6.619886446353181e-06, "log_odds_chosen": 0.6407888531684875, "log_odds_ratio": -0.5432699918746948, "logits/chosen": -0.34865349531173706, "logits/rejected": -0.3891986012458801, "logps/chosen": -0.8993372917175293, "logps/rejected": -1.3464696407318115, "loss": 1.0326, "nll_loss": 0.9782568216323853, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08993373811244965, "rewards/margins": 0.04471323639154434, "rewards/rejected": -0.1346469670534134, "step": 2470 }, { "epoch": 0.45, "grad_norm": 0.6044934988021851, "learning_rate": 6.614063182413742e-06, "log_odds_chosen": 0.46049004793167114, "log_odds_ratio": -0.5819103717803955, "logits/chosen": -0.35496190190315247, "logits/rejected": -0.39264383912086487, "logps/chosen": -0.978875458240509, "logps/rejected": -1.270918369293213, "loss": 1.0487, "nll_loss": 0.9905277490615845, "rewards/accuracies": 0.625, "rewards/chosen": -0.0978875383734703, "rewards/margins": 0.029204288497567177, "rewards/rejected": -0.12709183990955353, "step": 2480 }, { "epoch": 0.45, "grad_norm": 1.590712547302246, "learning_rate": 6.608239918474304e-06, "log_odds_chosen": 0.630506694316864, "log_odds_ratio": -0.586438775062561, "logits/chosen": -0.38816604018211365, "logits/rejected": -0.4340592920780182, "logps/chosen": -0.9471012949943542, "logps/rejected": -1.3487260341644287, "loss": 0.9928, "nll_loss": 0.9341457486152649, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09471012651920319, "rewards/margins": 0.040162477642297745, "rewards/rejected": -0.13487261533737183, "step": 2490 }, { "epoch": 0.45, "grad_norm": 1.3605127334594727, "learning_rate": 6.602416654534867e-06, "log_odds_chosen": 0.6695212125778198, "log_odds_ratio": -0.5592930912971497, "logits/chosen": -0.36375027894973755, "logits/rejected": -0.40861162543296814, "logps/chosen": -0.9220840334892273, "logps/rejected": -1.3491367101669312, "loss": 0.9604, "nll_loss": 0.9045180082321167, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.0922083854675293, "rewards/margins": 0.04270528256893158, "rewards/rejected": -0.13491368293762207, "step": 2500 }, { "epoch": 0.45, "grad_norm": 1.2861924171447754, "learning_rate": 6.596593390595428e-06, "log_odds_chosen": 0.5616058707237244, "log_odds_ratio": -0.5883401036262512, "logits/chosen": -0.25284165143966675, "logits/rejected": -0.37594661116600037, "logps/chosen": -0.9259670972824097, "logps/rejected": -1.263522982597351, "loss": 0.9541, "nll_loss": 0.8952864408493042, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09259669482707977, "rewards/margins": 0.033755604177713394, "rewards/rejected": -0.12635231018066406, "step": 2510 }, { "epoch": 0.46, "grad_norm": 0.7333442568778992, "learning_rate": 6.5907701266559905e-06, "log_odds_chosen": 0.518241822719574, "log_odds_ratio": -0.5922364592552185, "logits/chosen": -0.3900024890899658, "logits/rejected": -0.4332877993583679, "logps/chosen": -0.9187110662460327, "logps/rejected": -1.223683476448059, "loss": 1.0167, "nll_loss": 0.9575027227401733, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09187111258506775, "rewards/margins": 0.030497241765260696, "rewards/rejected": -0.12236835807561874, "step": 2520 }, { "epoch": 0.46, "grad_norm": 1.4825656414031982, "learning_rate": 6.584946862716553e-06, "log_odds_chosen": 0.582595944404602, "log_odds_ratio": -0.5741361379623413, "logits/chosen": -0.3265005946159363, "logits/rejected": -0.40415525436401367, "logps/chosen": -0.9316970705986023, "logps/rejected": -1.3179895877838135, "loss": 0.9669, "nll_loss": 0.9095318913459778, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09316971898078918, "rewards/margins": 0.03862924873828888, "rewards/rejected": -0.13179895281791687, "step": 2530 }, { "epoch": 0.46, "grad_norm": 1.1516770124435425, "learning_rate": 6.579123598777114e-06, "log_odds_chosen": 0.5853735208511353, "log_odds_ratio": -0.5575556755065918, "logits/chosen": -0.37003493309020996, "logits/rejected": -0.4401502013206482, "logps/chosen": -1.0224106311798096, "logps/rejected": -1.4017164707183838, "loss": 1.096, "nll_loss": 1.040209174156189, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10224107652902603, "rewards/margins": 0.03793057054281235, "rewards/rejected": -0.14017164707183838, "step": 2540 }, { "epoch": 0.46, "grad_norm": 0.772507905960083, "learning_rate": 6.573300334837676e-06, "log_odds_chosen": 0.6209217309951782, "log_odds_ratio": -0.5777196288108826, "logits/chosen": -0.32888883352279663, "logits/rejected": -0.3678010106086731, "logps/chosen": -0.9577111005783081, "logps/rejected": -1.3549540042877197, "loss": 0.9416, "nll_loss": 0.8838027715682983, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09577111154794693, "rewards/margins": 0.03972429037094116, "rewards/rejected": -0.1354953944683075, "step": 2550 }, { "epoch": 0.46, "grad_norm": 0.6387426853179932, "learning_rate": 6.567477070898238e-06, "log_odds_chosen": 0.5519171953201294, "log_odds_ratio": -0.5468162298202515, "logits/chosen": -0.3772895336151123, "logits/rejected": -0.42758235335350037, "logps/chosen": -1.0784213542938232, "logps/rejected": -1.4518009424209595, "loss": 0.9968, "nll_loss": 0.9421661496162415, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10784213244915009, "rewards/margins": 0.03733794763684273, "rewards/rejected": -0.14518007636070251, "step": 2560 }, { "epoch": 0.46, "grad_norm": 0.7245638966560364, "learning_rate": 6.5616538069588e-06, "log_odds_chosen": 0.5130826234817505, "log_odds_ratio": -0.6046417951583862, "logits/chosen": -0.2720004618167877, "logits/rejected": -0.32857561111450195, "logps/chosen": -0.9239827990531921, "logps/rejected": -1.2177114486694336, "loss": 0.9891, "nll_loss": 0.9286392331123352, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09239828586578369, "rewards/margins": 0.029372859746217728, "rewards/rejected": -0.12177114188671112, "step": 2570 }, { "epoch": 0.47, "grad_norm": 0.7606828808784485, "learning_rate": 6.555830543019362e-06, "log_odds_chosen": 0.4750826954841614, "log_odds_ratio": -0.5803383588790894, "logits/chosen": -0.33285465836524963, "logits/rejected": -0.37880927324295044, "logps/chosen": -0.9121491312980652, "logps/rejected": -1.1974008083343506, "loss": 1.0248, "nll_loss": 0.9667941927909851, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09121491014957428, "rewards/margins": 0.028525155037641525, "rewards/rejected": -0.11974005401134491, "step": 2580 }, { "epoch": 0.47, "grad_norm": 1.215981125831604, "learning_rate": 6.550007279079924e-06, "log_odds_chosen": 0.7042616605758667, "log_odds_ratio": -0.550471305847168, "logits/chosen": -0.33983561396598816, "logits/rejected": -0.36023131012916565, "logps/chosen": -0.9646269083023071, "logps/rejected": -1.3918030261993408, "loss": 0.9539, "nll_loss": 0.8988884687423706, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.096462681889534, "rewards/margins": 0.04271761700510979, "rewards/rejected": -0.13918031752109528, "step": 2590 }, { "epoch": 0.47, "grad_norm": 1.175630807876587, "learning_rate": 6.5441840151404864e-06, "log_odds_chosen": 0.5247041583061218, "log_odds_ratio": -0.6049832105636597, "logits/chosen": -0.35429611802101135, "logits/rejected": -0.3579476773738861, "logps/chosen": -0.8990931510925293, "logps/rejected": -1.199903130531311, "loss": 1.0378, "nll_loss": 0.9773503541946411, "rewards/accuracies": 0.625, "rewards/chosen": -0.08990932255983353, "rewards/margins": 0.030080992728471756, "rewards/rejected": -0.11999031156301498, "step": 2600 }, { "epoch": 0.47, "grad_norm": 1.0978769063949585, "learning_rate": 6.538360751201048e-06, "log_odds_chosen": 0.40354281663894653, "log_odds_ratio": -0.6332866549491882, "logits/chosen": -0.3224504590034485, "logits/rejected": -0.36072710156440735, "logps/chosen": -1.123956322669983, "logps/rejected": -1.387921929359436, "loss": 1.0701, "nll_loss": 1.0067265033721924, "rewards/accuracies": 0.625, "rewards/chosen": -0.11239562183618546, "rewards/margins": 0.026396561414003372, "rewards/rejected": -0.13879218697547913, "step": 2610 }, { "epoch": 0.47, "grad_norm": 0.9290404915809631, "learning_rate": 6.53253748726161e-06, "log_odds_chosen": 0.6468437910079956, "log_odds_ratio": -0.5385705232620239, "logits/chosen": -0.36749547719955444, "logits/rejected": -0.4251924157142639, "logps/chosen": -0.9306084513664246, "logps/rejected": -1.3251934051513672, "loss": 0.9871, "nll_loss": 0.9332555532455444, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09306085854768753, "rewards/margins": 0.03945847973227501, "rewards/rejected": -0.13251933455467224, "step": 2620 }, { "epoch": 0.48, "grad_norm": 1.0460034608840942, "learning_rate": 6.5267142233221725e-06, "log_odds_chosen": 0.5371777415275574, "log_odds_ratio": -0.6011015176773071, "logits/chosen": -0.3856775164604187, "logits/rejected": -0.39130908250808716, "logps/chosen": -0.9254153966903687, "logps/rejected": -1.268058180809021, "loss": 1.0057, "nll_loss": 0.9455927014350891, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09254153072834015, "rewards/margins": 0.03426428511738777, "rewards/rejected": -0.12680582702159882, "step": 2630 }, { "epoch": 0.48, "grad_norm": 1.1770684719085693, "learning_rate": 6.520890959382733e-06, "log_odds_chosen": 0.754479706287384, "log_odds_ratio": -0.5277892351150513, "logits/chosen": -0.41099995374679565, "logits/rejected": -0.46807852387428284, "logps/chosen": -0.9225029945373535, "logps/rejected": -1.4032045602798462, "loss": 1.0005, "nll_loss": 0.9477365612983704, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09225030243396759, "rewards/margins": 0.04807015508413315, "rewards/rejected": -0.14032046496868134, "step": 2640 }, { "epoch": 0.48, "grad_norm": 1.7522804737091064, "learning_rate": 6.5150676954432954e-06, "log_odds_chosen": 0.7221927046775818, "log_odds_ratio": -0.5742099285125732, "logits/chosen": -0.29052841663360596, "logits/rejected": -0.3499542772769928, "logps/chosen": -0.8708510398864746, "logps/rejected": -1.3332278728485107, "loss": 0.9638, "nll_loss": 0.9063900709152222, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08708511292934418, "rewards/margins": 0.046237677335739136, "rewards/rejected": -0.1333227902650833, "step": 2650 }, { "epoch": 0.48, "grad_norm": 1.2086186408996582, "learning_rate": 6.509244431503858e-06, "log_odds_chosen": 0.6395819783210754, "log_odds_ratio": -0.5765899419784546, "logits/chosen": -0.31575196981430054, "logits/rejected": -0.3708987832069397, "logps/chosen": -0.8991060256958008, "logps/rejected": -1.2960377931594849, "loss": 1.0116, "nll_loss": 0.953917384147644, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.0899106115102768, "rewards/margins": 0.03969316929578781, "rewards/rejected": -0.129603773355484, "step": 2660 }, { "epoch": 0.48, "grad_norm": 0.7068976163864136, "learning_rate": 6.503421167564419e-06, "log_odds_chosen": 0.6602069139480591, "log_odds_ratio": -0.581697940826416, "logits/chosen": -0.332303524017334, "logits/rejected": -0.3719666600227356, "logps/chosen": -0.884446918964386, "logps/rejected": -1.2823951244354248, "loss": 0.9566, "nll_loss": 0.8984075784683228, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08844468742609024, "rewards/margins": 0.03979482874274254, "rewards/rejected": -0.12823952734470367, "step": 2670 }, { "epoch": 0.48, "grad_norm": 1.3176428079605103, "learning_rate": 6.4975979036249815e-06, "log_odds_chosen": 0.6552340388298035, "log_odds_ratio": -0.5691067576408386, "logits/chosen": -0.3273809254169464, "logits/rejected": -0.36774808168411255, "logps/chosen": -0.9798242449760437, "logps/rejected": -1.389387845993042, "loss": 0.9677, "nll_loss": 0.9107893109321594, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09798242151737213, "rewards/margins": 0.04095636308193207, "rewards/rejected": -0.1389387845993042, "step": 2680 }, { "epoch": 0.49, "grad_norm": 1.0215779542922974, "learning_rate": 6.491774639685544e-06, "log_odds_chosen": 1.010939598083496, "log_odds_ratio": -0.4497356414794922, "logits/chosen": -0.29089489579200745, "logits/rejected": -0.38202494382858276, "logps/chosen": -0.9118770360946655, "logps/rejected": -1.5500422716140747, "loss": 0.9683, "nll_loss": 0.9233266115188599, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09118770807981491, "rewards/margins": 0.06381651014089584, "rewards/rejected": -0.15500421822071075, "step": 2690 }, { "epoch": 0.49, "grad_norm": 1.354233741760254, "learning_rate": 6.485951375746105e-06, "log_odds_chosen": 0.5281728506088257, "log_odds_ratio": -0.571759045124054, "logits/chosen": -0.3615049421787262, "logits/rejected": -0.36989089846611023, "logps/chosen": -0.9388942718505859, "logps/rejected": -1.2711718082427979, "loss": 0.994, "nll_loss": 0.9367810487747192, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09388942271471024, "rewards/margins": 0.03322775661945343, "rewards/rejected": -0.12711718678474426, "step": 2700 }, { "epoch": 0.49, "grad_norm": 1.767279863357544, "learning_rate": 6.480128111806668e-06, "log_odds_chosen": 0.5129133462905884, "log_odds_ratio": -0.5640901327133179, "logits/chosen": -0.4442782998085022, "logits/rejected": -0.47071051597595215, "logps/chosen": -1.0512162446975708, "logps/rejected": -1.3872233629226685, "loss": 1.0637, "nll_loss": 1.0073240995407104, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10512162744998932, "rewards/margins": 0.033600710332393646, "rewards/rejected": -0.13872233033180237, "step": 2710 }, { "epoch": 0.49, "grad_norm": 1.4116398096084595, "learning_rate": 6.47430484786723e-06, "log_odds_chosen": 0.6858533620834351, "log_odds_ratio": -0.571151614189148, "logits/chosen": -0.3957204222679138, "logits/rejected": -0.4555203914642334, "logps/chosen": -0.7919386625289917, "logps/rejected": -1.2326372861862183, "loss": 1.0122, "nll_loss": 0.9550908803939819, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07919386774301529, "rewards/margins": 0.044069863855838776, "rewards/rejected": -0.12326373159885406, "step": 2720 }, { "epoch": 0.49, "grad_norm": 0.7749652862548828, "learning_rate": 6.4684815839277905e-06, "log_odds_chosen": 0.7230443954467773, "log_odds_ratio": -0.537756085395813, "logits/chosen": -0.3686821460723877, "logits/rejected": -0.4431026577949524, "logps/chosen": -0.9403525590896606, "logps/rejected": -1.4275939464569092, "loss": 0.9609, "nll_loss": 0.9071076512336731, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09403526782989502, "rewards/margins": 0.04872414469718933, "rewards/rejected": -0.14275939762592316, "step": 2730 }, { "epoch": 0.49, "grad_norm": 0.7996389865875244, "learning_rate": 6.462658319988353e-06, "log_odds_chosen": 0.8235070109367371, "log_odds_ratio": -0.5080887079238892, "logits/chosen": -0.34345048666000366, "logits/rejected": -0.42161065340042114, "logps/chosen": -0.9218958020210266, "logps/rejected": -1.4511245489120483, "loss": 0.9716, "nll_loss": 0.9207679033279419, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09218958765268326, "rewards/margins": 0.05292288213968277, "rewards/rejected": -0.14511245489120483, "step": 2740 }, { "epoch": 0.5, "grad_norm": 1.5153478384017944, "learning_rate": 6.456835056048915e-06, "log_odds_chosen": 0.69731605052948, "log_odds_ratio": -0.526605486869812, "logits/chosen": -0.4176858961582184, "logits/rejected": -0.4533267021179199, "logps/chosen": -1.0750231742858887, "logps/rejected": -1.5430954694747925, "loss": 1.0532, "nll_loss": 1.000571608543396, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10750231891870499, "rewards/margins": 0.04680723696947098, "rewards/rejected": -0.15430954098701477, "step": 2750 }, { "epoch": 0.5, "grad_norm": 1.3526057004928589, "learning_rate": 6.451011792109477e-06, "log_odds_chosen": 0.6370762586593628, "log_odds_ratio": -0.5361495018005371, "logits/chosen": -0.36857062578201294, "logits/rejected": -0.4371257722377777, "logps/chosen": -0.9362856149673462, "logps/rejected": -1.307198405265808, "loss": 1.0073, "nll_loss": 0.95367032289505, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09362856298685074, "rewards/margins": 0.03709127753973007, "rewards/rejected": -0.1307198405265808, "step": 2760 }, { "epoch": 0.5, "eval_log_odds_chosen": 0.5925426483154297, "eval_log_odds_ratio": -0.5730059146881104, "eval_logits/chosen": -0.3731546401977539, "eval_logits/rejected": -0.40579819679260254, "eval_logps/chosen": -0.9562295079231262, "eval_logps/rejected": -1.3317075967788696, "eval_loss": 1.0005911588668823, "eval_nll_loss": 0.943290650844574, "eval_rewards/accuracies": 0.6516516804695129, "eval_rewards/chosen": -0.0956229493021965, "eval_rewards/margins": 0.037547819316387177, "eval_rewards/rejected": -0.13317078351974487, "eval_runtime": 2286.9458, "eval_samples_per_second": 1.019, "eval_steps_per_second": 1.019, "step": 2768 }, { "epoch": 0.5, "grad_norm": 1.127935528755188, "learning_rate": 6.445188528170039e-06, "log_odds_chosen": 0.40119534730911255, "log_odds_ratio": -0.6752806901931763, "logits/chosen": -0.32637766003608704, "logits/rejected": -0.34356263279914856, "logps/chosen": -0.938979983329773, "logps/rejected": -1.1499440670013428, "loss": 0.9678, "nll_loss": 0.9002933502197266, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.0938979983329773, "rewards/margins": 0.021096404641866684, "rewards/rejected": -0.11499440670013428, "step": 2770 }, { "epoch": 0.5, "grad_norm": 0.8931524157524109, "learning_rate": 6.439365264230601e-06, "log_odds_chosen": 0.5248688459396362, "log_odds_ratio": -0.6203723549842834, "logits/chosen": -0.4128592610359192, "logits/rejected": -0.46203988790512085, "logps/chosen": -0.9933439493179321, "logps/rejected": -1.3095693588256836, "loss": 1.1102, "nll_loss": 1.0481542348861694, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09933439642190933, "rewards/margins": 0.03162253648042679, "rewards/rejected": -0.13095691800117493, "step": 2780 }, { "epoch": 0.5, "grad_norm": 1.6323082447052002, "learning_rate": 6.4335420002911636e-06, "log_odds_chosen": 0.42494791746139526, "log_odds_ratio": -0.6246153712272644, "logits/chosen": -0.38673996925354004, "logits/rejected": -0.42045697569847107, "logps/chosen": -1.0206129550933838, "logps/rejected": -1.2617638111114502, "loss": 0.9711, "nll_loss": 0.9086559414863586, "rewards/accuracies": 0.625, "rewards/chosen": -0.10206129401922226, "rewards/margins": 0.02411508932709694, "rewards/rejected": -0.1261763870716095, "step": 2790 }, { "epoch": 0.51, "grad_norm": 1.0086363554000854, "learning_rate": 6.427718736351725e-06, "log_odds_chosen": 0.6424874067306519, "log_odds_ratio": -0.5433818101882935, "logits/chosen": -0.4411230981349945, "logits/rejected": -0.45607155561447144, "logps/chosen": -0.8835335969924927, "logps/rejected": -1.297825574874878, "loss": 0.9854, "nll_loss": 0.9310863614082336, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08835335820913315, "rewards/margins": 0.041429195553064346, "rewards/rejected": -0.1297825574874878, "step": 2800 }, { "epoch": 0.51, "grad_norm": 0.996560275554657, "learning_rate": 6.421895472412287e-06, "log_odds_chosen": 0.647859513759613, "log_odds_ratio": -0.5474862456321716, "logits/chosen": -0.36230072379112244, "logits/rejected": -0.3996432423591614, "logps/chosen": -0.9513490796089172, "logps/rejected": -1.3442670106887817, "loss": 0.9573, "nll_loss": 0.9025037884712219, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09513489902019501, "rewards/margins": 0.039291806519031525, "rewards/rejected": -0.13442671298980713, "step": 2810 }, { "epoch": 0.51, "grad_norm": 0.7576774954795837, "learning_rate": 6.416072208472849e-06, "log_odds_chosen": 0.8065885305404663, "log_odds_ratio": -0.5174297094345093, "logits/chosen": -0.3431718349456787, "logits/rejected": -0.37808164954185486, "logps/chosen": -0.828266978263855, "logps/rejected": -1.2940632104873657, "loss": 0.9534, "nll_loss": 0.9016314744949341, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08282670378684998, "rewards/margins": 0.04657962545752525, "rewards/rejected": -0.12940633296966553, "step": 2820 }, { "epoch": 0.51, "grad_norm": 1.1549729108810425, "learning_rate": 6.41024894453341e-06, "log_odds_chosen": 0.37550097703933716, "log_odds_ratio": -0.6334739327430725, "logits/chosen": -0.42225581407546997, "logits/rejected": -0.4576474130153656, "logps/chosen": -0.941022515296936, "logps/rejected": -1.16748046875, "loss": 1.0423, "nll_loss": 0.9789831042289734, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09410224854946136, "rewards/margins": 0.022645797580480576, "rewards/rejected": -0.11674805730581284, "step": 2830 }, { "epoch": 0.51, "grad_norm": 1.2891383171081543, "learning_rate": 6.4044256805939726e-06, "log_odds_chosen": 0.7490788698196411, "log_odds_ratio": -0.5351554751396179, "logits/chosen": -0.3456578850746155, "logits/rejected": -0.40108785033226013, "logps/chosen": -0.8461182713508606, "logps/rejected": -1.3408584594726562, "loss": 0.9503, "nll_loss": 0.8967713117599487, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08461182564496994, "rewards/margins": 0.04947403073310852, "rewards/rejected": -0.13408586382865906, "step": 2840 }, { "epoch": 0.51, "grad_norm": 0.6824889183044434, "learning_rate": 6.398602416654535e-06, "log_odds_chosen": 0.8293973207473755, "log_odds_ratio": -0.5298095941543579, "logits/chosen": -0.3383901119232178, "logits/rejected": -0.37419217824935913, "logps/chosen": -0.8970960378646851, "logps/rejected": -1.441548228263855, "loss": 0.9307, "nll_loss": 0.8776991963386536, "rewards/accuracies": 0.625, "rewards/chosen": -0.08970960974693298, "rewards/margins": 0.05444520711898804, "rewards/rejected": -0.14415481686592102, "step": 2850 }, { "epoch": 0.52, "grad_norm": 1.3923622369766235, "learning_rate": 6.392779152715096e-06, "log_odds_chosen": 0.6607835292816162, "log_odds_ratio": -0.5144139528274536, "logits/chosen": -0.3709713816642761, "logits/rejected": -0.44595274329185486, "logps/chosen": -1.0105422735214233, "logps/rejected": -1.4535988569259644, "loss": 1.023, "nll_loss": 0.971581757068634, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.10105422884225845, "rewards/margins": 0.044305648654699326, "rewards/rejected": -0.14535988867282867, "step": 2860 }, { "epoch": 0.52, "grad_norm": 0.8290739059448242, "learning_rate": 6.386955888775659e-06, "log_odds_chosen": 0.5817979574203491, "log_odds_ratio": -0.6108132004737854, "logits/chosen": -0.4003733694553375, "logits/rejected": -0.4213009774684906, "logps/chosen": -0.8927229046821594, "logps/rejected": -1.2568292617797852, "loss": 0.996, "nll_loss": 0.934949517250061, "rewards/accuracies": 0.625, "rewards/chosen": -0.08927230536937714, "rewards/margins": 0.036410633474588394, "rewards/rejected": -0.12568292021751404, "step": 2870 }, { "epoch": 0.52, "grad_norm": 1.026987910270691, "learning_rate": 6.381132624836221e-06, "log_odds_chosen": 0.9320286512374878, "log_odds_ratio": -0.46994876861572266, "logits/chosen": -0.3082696795463562, "logits/rejected": -0.36887750029563904, "logps/chosen": -0.7973732948303223, "logps/rejected": -1.398707628250122, "loss": 0.9171, "nll_loss": 0.8701435327529907, "rewards/accuracies": 0.75, "rewards/chosen": -0.07973732799291611, "rewards/margins": 0.0601334273815155, "rewards/rejected": -0.1398707628250122, "step": 2880 }, { "epoch": 0.52, "grad_norm": 1.0939037799835205, "learning_rate": 6.375309360896782e-06, "log_odds_chosen": 0.3428114056587219, "log_odds_ratio": -0.6756697297096252, "logits/chosen": -0.3726629614830017, "logits/rejected": -0.3697716295719147, "logps/chosen": -1.040447473526001, "logps/rejected": -1.2464678287506104, "loss": 1.0178, "nll_loss": 0.9502217173576355, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10404475033283234, "rewards/margins": 0.020602049306035042, "rewards/rejected": -0.12464678287506104, "step": 2890 }, { "epoch": 0.52, "grad_norm": 0.9675848484039307, "learning_rate": 6.369486096957344e-06, "log_odds_chosen": 0.7951258420944214, "log_odds_ratio": -0.4850694537162781, "logits/chosen": -0.2807965576648712, "logits/rejected": -0.3710668087005615, "logps/chosen": -0.9030061960220337, "logps/rejected": -1.4071217775344849, "loss": 0.9388, "nll_loss": 0.8902514576911926, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09030061960220337, "rewards/margins": 0.050411563366651535, "rewards/rejected": -0.140712171792984, "step": 2900 }, { "epoch": 0.53, "grad_norm": 1.603076696395874, "learning_rate": 6.363662833017906e-06, "log_odds_chosen": 0.6028963923454285, "log_odds_ratio": -0.6104676127433777, "logits/chosen": -0.31884264945983887, "logits/rejected": -0.3657948076725006, "logps/chosen": -1.0222609043121338, "logps/rejected": -1.4125055074691772, "loss": 1.0856, "nll_loss": 1.024524450302124, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10222609341144562, "rewards/margins": 0.039024461060762405, "rewards/rejected": -0.14125055074691772, "step": 2910 }, { "epoch": 0.53, "grad_norm": 1.0004611015319824, "learning_rate": 6.357839569078468e-06, "log_odds_chosen": 0.443314790725708, "log_odds_ratio": -0.6308013200759888, "logits/chosen": -0.31897619366645813, "logits/rejected": -0.3597314953804016, "logps/chosen": -0.9442898035049438, "logps/rejected": -1.2072503566741943, "loss": 0.908, "nll_loss": 0.8449575304985046, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09442898631095886, "rewards/margins": 0.02629604935646057, "rewards/rejected": -0.12072502076625824, "step": 2920 }, { "epoch": 0.53, "grad_norm": 0.8449299335479736, "learning_rate": 6.35201630513903e-06, "log_odds_chosen": 0.7141034007072449, "log_odds_ratio": -0.5272042155265808, "logits/chosen": -0.29719918966293335, "logits/rejected": -0.3617883622646332, "logps/chosen": -0.9249895215034485, "logps/rejected": -1.4064514636993408, "loss": 0.9018, "nll_loss": 0.849094569683075, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09249895811080933, "rewards/margins": 0.04814619570970535, "rewards/rejected": -0.14064516127109528, "step": 2930 }, { "epoch": 0.53, "grad_norm": 0.9977993369102478, "learning_rate": 6.346193041199592e-06, "log_odds_chosen": 0.5973039865493774, "log_odds_ratio": -0.5831558108329773, "logits/chosen": -0.36963245272636414, "logits/rejected": -0.3873246908187866, "logps/chosen": -1.0007551908493042, "logps/rejected": -1.412858247756958, "loss": 1.0077, "nll_loss": 0.9494079351425171, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10007552057504654, "rewards/margins": 0.04121030494570732, "rewards/rejected": -0.14128582179546356, "step": 2940 }, { "epoch": 0.53, "grad_norm": 0.69256192445755, "learning_rate": 6.340369777260154e-06, "log_odds_chosen": 0.784423828125, "log_odds_ratio": -0.5472939014434814, "logits/chosen": -0.3177037835121155, "logits/rejected": -0.3788725733757019, "logps/chosen": -0.9119611978530884, "logps/rejected": -1.4428807497024536, "loss": 0.9072, "nll_loss": 0.852469801902771, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09119612723588943, "rewards/margins": 0.05309196189045906, "rewards/rejected": -0.1442880928516388, "step": 2950 }, { "epoch": 0.53, "grad_norm": 0.8197088837623596, "learning_rate": 6.334546513320716e-06, "log_odds_chosen": 0.7240030169487, "log_odds_ratio": -0.5557405948638916, "logits/chosen": -0.3689579963684082, "logits/rejected": -0.4157228469848633, "logps/chosen": -0.9546843767166138, "logps/rejected": -1.4102638959884644, "loss": 1.0052, "nll_loss": 0.9496715664863586, "rewards/accuracies": 0.625, "rewards/chosen": -0.0954684391617775, "rewards/margins": 0.04555796831846237, "rewards/rejected": -0.14102640748023987, "step": 2960 }, { "epoch": 0.54, "grad_norm": 1.157364845275879, "learning_rate": 6.328723249381278e-06, "log_odds_chosen": 0.46397870779037476, "log_odds_ratio": -0.6432394981384277, "logits/chosen": -0.36046457290649414, "logits/rejected": -0.32585659623146057, "logps/chosen": -0.9779598116874695, "logps/rejected": -1.2871477603912354, "loss": 0.9652, "nll_loss": 0.9008302688598633, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0977959856390953, "rewards/margins": 0.03091878816485405, "rewards/rejected": -0.12871477007865906, "step": 2970 }, { "epoch": 0.54, "grad_norm": 0.8545392155647278, "learning_rate": 6.32289998544184e-06, "log_odds_chosen": 0.7115742564201355, "log_odds_ratio": -0.5449367761611938, "logits/chosen": -0.3283035159111023, "logits/rejected": -0.39339348673820496, "logps/chosen": -0.8771149516105652, "logps/rejected": -1.3346948623657227, "loss": 1.0092, "nll_loss": 0.9547283053398132, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08771149814128876, "rewards/margins": 0.04575798287987709, "rewards/rejected": -0.13346949219703674, "step": 2980 }, { "epoch": 0.54, "grad_norm": 0.9508376717567444, "learning_rate": 6.317076721502401e-06, "log_odds_chosen": 0.532355546951294, "log_odds_ratio": -0.5811368227005005, "logits/chosen": -0.38911187648773193, "logits/rejected": -0.42666807770729065, "logps/chosen": -1.050180435180664, "logps/rejected": -1.404990792274475, "loss": 0.9887, "nll_loss": 0.9305723309516907, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10501804202795029, "rewards/margins": 0.03548102825880051, "rewards/rejected": -0.140499085187912, "step": 2990 }, { "epoch": 0.54, "grad_norm": 1.369917631149292, "learning_rate": 6.311253457562964e-06, "log_odds_chosen": 0.5318782329559326, "log_odds_ratio": -0.5593219995498657, "logits/chosen": -0.4074210226535797, "logits/rejected": -0.4076215326786041, "logps/chosen": -0.960712730884552, "logps/rejected": -1.3129749298095703, "loss": 0.9791, "nll_loss": 0.9231414794921875, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0960712656378746, "rewards/margins": 0.03522622585296631, "rewards/rejected": -0.1312974989414215, "step": 3000 }, { "epoch": 0.54, "grad_norm": 2.4820430278778076, "learning_rate": 6.305430193623526e-06, "log_odds_chosen": 0.6888760328292847, "log_odds_ratio": -0.5727877616882324, "logits/chosen": -0.3978230953216553, "logits/rejected": -0.40785154700279236, "logps/chosen": -0.9991810917854309, "logps/rejected": -1.4873907566070557, "loss": 0.9746, "nll_loss": 0.9173223376274109, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09991810470819473, "rewards/margins": 0.04882097989320755, "rewards/rejected": -0.1487390697002411, "step": 3010 }, { "epoch": 0.55, "grad_norm": 1.1071016788482666, "learning_rate": 6.299606929684087e-06, "log_odds_chosen": 0.49799299240112305, "log_odds_ratio": -0.6224324107170105, "logits/chosen": -0.3779663145542145, "logits/rejected": -0.3969855308532715, "logps/chosen": -1.0049419403076172, "logps/rejected": -1.3126152753829956, "loss": 1.0151, "nll_loss": 0.9528893232345581, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10049420595169067, "rewards/margins": 0.030767327174544334, "rewards/rejected": -0.13126154243946075, "step": 3020 }, { "epoch": 0.55, "grad_norm": 0.5869792103767395, "learning_rate": 6.29378366574465e-06, "log_odds_chosen": 0.6354817748069763, "log_odds_ratio": -0.5479956865310669, "logits/chosen": -0.3983832001686096, "logits/rejected": -0.41133037209510803, "logps/chosen": -0.9372811317443848, "logps/rejected": -1.3125584125518799, "loss": 1.0118, "nll_loss": 0.9569603800773621, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09372811019420624, "rewards/margins": 0.037527717649936676, "rewards/rejected": -0.1312558352947235, "step": 3030 }, { "epoch": 0.55, "grad_norm": 0.6390839219093323, "learning_rate": 6.287960401805212e-06, "log_odds_chosen": 0.8298152089118958, "log_odds_ratio": -0.5246734023094177, "logits/chosen": -0.3871910870075226, "logits/rejected": -0.4038742184638977, "logps/chosen": -0.9234312772750854, "logps/rejected": -1.4239513874053955, "loss": 1.0285, "nll_loss": 0.9760168194770813, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09234314411878586, "rewards/margins": 0.0500519797205925, "rewards/rejected": -0.14239510893821716, "step": 3040 }, { "epoch": 0.55, "grad_norm": 1.0352649688720703, "learning_rate": 6.2821371378657734e-06, "log_odds_chosen": 0.7161882519721985, "log_odds_ratio": -0.5397850275039673, "logits/chosen": -0.30927398800849915, "logits/rejected": -0.3645241856575012, "logps/chosen": -0.9100635647773743, "logps/rejected": -1.3684438467025757, "loss": 0.9382, "nll_loss": 0.8842523694038391, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09100636094808578, "rewards/margins": 0.04583803564310074, "rewards/rejected": -0.13684439659118652, "step": 3050 }, { "epoch": 0.55, "grad_norm": 1.2245994806289673, "learning_rate": 6.276313873926336e-06, "log_odds_chosen": 0.3765376806259155, "log_odds_ratio": -0.6475566029548645, "logits/chosen": -0.35767942667007446, "logits/rejected": -0.40610751509666443, "logps/chosen": -0.9529776573181152, "logps/rejected": -1.1977792978286743, "loss": 0.9646, "nll_loss": 0.8998012542724609, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09529776871204376, "rewards/margins": 0.024480151012539864, "rewards/rejected": -0.11977791786193848, "step": 3060 }, { "epoch": 0.55, "grad_norm": 0.6054956912994385, "learning_rate": 6.270490609986898e-06, "log_odds_chosen": 0.49144425988197327, "log_odds_ratio": -0.6276726722717285, "logits/chosen": -0.3699381649494171, "logits/rejected": -0.39319050312042236, "logps/chosen": -0.9125950932502747, "logps/rejected": -1.2397955656051636, "loss": 1.0078, "nll_loss": 0.9449881315231323, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09125951677560806, "rewards/margins": 0.03272005170583725, "rewards/rejected": -0.12397956848144531, "step": 3070 }, { "epoch": 0.56, "grad_norm": 1.454533576965332, "learning_rate": 6.264667346047459e-06, "log_odds_chosen": 0.5676583051681519, "log_odds_ratio": -0.6090052723884583, "logits/chosen": -0.39846283197402954, "logits/rejected": -0.4198426306247711, "logps/chosen": -0.9415773153305054, "logps/rejected": -1.296958327293396, "loss": 1.0253, "nll_loss": 0.9643552899360657, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09415774047374725, "rewards/margins": 0.03553809970617294, "rewards/rejected": -0.1296958327293396, "step": 3080 }, { "epoch": 0.56, "grad_norm": 1.0474345684051514, "learning_rate": 6.258844082108021e-06, "log_odds_chosen": 0.6766977310180664, "log_odds_ratio": -0.5505935549736023, "logits/chosen": -0.37675073742866516, "logits/rejected": -0.40171122550964355, "logps/chosen": -1.0111701488494873, "logps/rejected": -1.48032546043396, "loss": 1.0248, "nll_loss": 0.9696931838989258, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10111702978610992, "rewards/margins": 0.04691552370786667, "rewards/rejected": -0.1480325609445572, "step": 3090 }, { "epoch": 0.56, "grad_norm": 0.677821695804596, "learning_rate": 6.253020818168583e-06, "log_odds_chosen": 0.806612491607666, "log_odds_ratio": -0.49709954857826233, "logits/chosen": -0.35511764883995056, "logits/rejected": -0.3808368742465973, "logps/chosen": -0.9699333310127258, "logps/rejected": -1.517735481262207, "loss": 0.9724, "nll_loss": 0.9226738214492798, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.0969933345913887, "rewards/margins": 0.05478020757436752, "rewards/rejected": -0.15177355706691742, "step": 3100 }, { "epoch": 0.56, "grad_norm": 1.076366901397705, "learning_rate": 6.247197554229145e-06, "log_odds_chosen": 0.5176805853843689, "log_odds_ratio": -0.6054424047470093, "logits/chosen": -0.368263840675354, "logits/rejected": -0.37648746371269226, "logps/chosen": -1.000482439994812, "logps/rejected": -1.343254804611206, "loss": 1.0322, "nll_loss": 0.9716836214065552, "rewards/accuracies": 0.625, "rewards/chosen": -0.1000482439994812, "rewards/margins": 0.03427725285291672, "rewards/rejected": -0.13432548940181732, "step": 3110 }, { "epoch": 0.56, "grad_norm": 0.7154731750488281, "learning_rate": 6.241374290289707e-06, "log_odds_chosen": 0.6836196184158325, "log_odds_ratio": -0.5482660531997681, "logits/chosen": -0.3415481448173523, "logits/rejected": -0.3720071315765381, "logps/chosen": -0.8983786702156067, "logps/rejected": -1.293168306350708, "loss": 0.9264, "nll_loss": 0.8715240359306335, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08983787894248962, "rewards/margins": 0.03947895020246506, "rewards/rejected": -0.12931683659553528, "step": 3120 }, { "epoch": 0.57, "grad_norm": 0.8524779677391052, "learning_rate": 6.235551026350269e-06, "log_odds_chosen": 0.6569803953170776, "log_odds_ratio": -0.5352288484573364, "logits/chosen": -0.3533919155597687, "logits/rejected": -0.43142709136009216, "logps/chosen": -0.9483108520507812, "logps/rejected": -1.3620734214782715, "loss": 1.0218, "nll_loss": 0.96832275390625, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09483110159635544, "rewards/margins": 0.04137624427676201, "rewards/rejected": -0.13620734214782715, "step": 3130 }, { "epoch": 0.57, "grad_norm": 1.4360308647155762, "learning_rate": 6.229727762410831e-06, "log_odds_chosen": 0.6728376746177673, "log_odds_ratio": -0.5485900640487671, "logits/chosen": -0.4550979733467102, "logits/rejected": -0.4855107367038727, "logps/chosen": -0.9604121446609497, "logps/rejected": -1.4198859930038452, "loss": 1.0696, "nll_loss": 1.014716386795044, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0960412248969078, "rewards/margins": 0.04594738408923149, "rewards/rejected": -0.141988605260849, "step": 3140 }, { "epoch": 0.57, "grad_norm": 0.6961885094642639, "learning_rate": 6.223904498471393e-06, "log_odds_chosen": 0.4872314929962158, "log_odds_ratio": -0.6163089871406555, "logits/chosen": -0.46947789192199707, "logits/rejected": -0.4593765139579773, "logps/chosen": -0.9408215284347534, "logps/rejected": -1.2545238733291626, "loss": 1.0243, "nll_loss": 0.9627069234848022, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09408216178417206, "rewards/margins": 0.03137023001909256, "rewards/rejected": -0.12545239925384521, "step": 3150 }, { "epoch": 0.57, "grad_norm": 2.0956509113311768, "learning_rate": 6.2180812345319555e-06, "log_odds_chosen": 0.9468412399291992, "log_odds_ratio": -0.47234565019607544, "logits/chosen": -0.3197837471961975, "logits/rejected": -0.3366406261920929, "logps/chosen": -0.8767255544662476, "logps/rejected": -1.4780380725860596, "loss": 0.9153, "nll_loss": 0.8680181503295898, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08767254650592804, "rewards/margins": 0.060131270438432693, "rewards/rejected": -0.14780382812023163, "step": 3160 }, { "epoch": 0.57, "grad_norm": 1.5426450967788696, "learning_rate": 6.212257970592516e-06, "log_odds_chosen": 0.5291934609413147, "log_odds_ratio": -0.5959833860397339, "logits/chosen": -0.45292121171951294, "logits/rejected": -0.449939489364624, "logps/chosen": -0.976009726524353, "logps/rejected": -1.3378291130065918, "loss": 1.0219, "nll_loss": 0.9623022079467773, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09760098159313202, "rewards/margins": 0.03618193417787552, "rewards/rejected": -0.13378292322158813, "step": 3170 }, { "epoch": 0.57, "grad_norm": 1.670472264289856, "learning_rate": 6.206434706653078e-06, "log_odds_chosen": 0.671677827835083, "log_odds_ratio": -0.6096156239509583, "logits/chosen": -0.4184805452823639, "logits/rejected": -0.44354119896888733, "logps/chosen": -1.0512487888336182, "logps/rejected": -1.49226975440979, "loss": 1.0419, "nll_loss": 0.980981171131134, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10512487590312958, "rewards/margins": 0.04410209506750107, "rewards/rejected": -0.14922699332237244, "step": 3180 }, { "epoch": 0.58, "grad_norm": 1.4701049327850342, "learning_rate": 6.200611442713641e-06, "log_odds_chosen": 0.6520159244537354, "log_odds_ratio": -0.5479967594146729, "logits/chosen": -0.34326881170272827, "logits/rejected": -0.40131497383117676, "logps/chosen": -0.9332335591316223, "logps/rejected": -1.3544549942016602, "loss": 1.0696, "nll_loss": 1.0148441791534424, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09332336485385895, "rewards/margins": 0.042122118175029755, "rewards/rejected": -0.1354454755783081, "step": 3190 }, { "epoch": 0.58, "grad_norm": 1.3066092729568481, "learning_rate": 6.194788178774202e-06, "log_odds_chosen": 0.6795617938041687, "log_odds_ratio": -0.5576266050338745, "logits/chosen": -0.3325703740119934, "logits/rejected": -0.3916395604610443, "logps/chosen": -1.0032011270523071, "logps/rejected": -1.4542853832244873, "loss": 1.0795, "nll_loss": 1.0237706899642944, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10032012313604355, "rewards/margins": 0.04510842263698578, "rewards/rejected": -0.14542852342128754, "step": 3200 }, { "epoch": 0.58, "grad_norm": 1.0712743997573853, "learning_rate": 6.1889649148347645e-06, "log_odds_chosen": 0.537954568862915, "log_odds_ratio": -0.5886083841323853, "logits/chosen": -0.4207335114479065, "logits/rejected": -0.39842933416366577, "logps/chosen": -1.0041154623031616, "logps/rejected": -1.3683401346206665, "loss": 1.035, "nll_loss": 0.9761091470718384, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1004115492105484, "rewards/margins": 0.03642246127128601, "rewards/rejected": -0.13683399558067322, "step": 3210 }, { "epoch": 0.58, "grad_norm": 1.3003288507461548, "learning_rate": 6.183141650895327e-06, "log_odds_chosen": 0.4215551018714905, "log_odds_ratio": -0.673181414604187, "logits/chosen": -0.38877248764038086, "logits/rejected": -0.40194326639175415, "logps/chosen": -1.0147056579589844, "logps/rejected": -1.3024789094924927, "loss": 1.0088, "nll_loss": 0.9414365887641907, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10147056728601456, "rewards/margins": 0.028777319937944412, "rewards/rejected": -0.13024787604808807, "step": 3220 }, { "epoch": 0.58, "grad_norm": 1.1259840726852417, "learning_rate": 6.177318386955889e-06, "log_odds_chosen": 0.65233314037323, "log_odds_ratio": -0.5480107069015503, "logits/chosen": -0.38454505801200867, "logits/rejected": -0.3793458938598633, "logps/chosen": -0.920840859413147, "logps/rejected": -1.2983678579330444, "loss": 0.9632, "nll_loss": 0.9083762168884277, "rewards/accuracies": 0.625, "rewards/chosen": -0.09208408743143082, "rewards/margins": 0.03775270655751228, "rewards/rejected": -0.1298367828130722, "step": 3230 }, { "epoch": 0.59, "grad_norm": 1.1241098642349243, "learning_rate": 6.1714951230164505e-06, "log_odds_chosen": 0.44269585609436035, "log_odds_ratio": -0.6357613801956177, "logits/chosen": -0.4421209394931793, "logits/rejected": -0.4722370207309723, "logps/chosen": -1.0277878046035767, "logps/rejected": -1.2992737293243408, "loss": 1.0363, "nll_loss": 0.9727180600166321, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10277877748012543, "rewards/margins": 0.02714858576655388, "rewards/rejected": -0.1299273669719696, "step": 3240 }, { "epoch": 0.59, "grad_norm": 1.237729549407959, "learning_rate": 6.165671859077013e-06, "log_odds_chosen": 1.0110903978347778, "log_odds_ratio": -0.46358394622802734, "logits/chosen": -0.28028032183647156, "logits/rejected": -0.3303956091403961, "logps/chosen": -0.9014646410942078, "logps/rejected": -1.5677226781845093, "loss": 0.9381, "nll_loss": 0.8917421102523804, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09014646708965302, "rewards/margins": 0.06662581115961075, "rewards/rejected": -0.15677228569984436, "step": 3250 }, { "epoch": 0.59, "grad_norm": 1.3306907415390015, "learning_rate": 6.159848595137574e-06, "log_odds_chosen": 0.5208662748336792, "log_odds_ratio": -0.6377922296524048, "logits/chosen": -0.4297618865966797, "logits/rejected": -0.42744913697242737, "logps/chosen": -1.0824130773544312, "logps/rejected": -1.4034180641174316, "loss": 1.0254, "nll_loss": 0.9615737795829773, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10824130475521088, "rewards/margins": 0.03210049122571945, "rewards/rejected": -0.14034178853034973, "step": 3260 }, { "epoch": 0.59, "grad_norm": 1.011088490486145, "learning_rate": 6.154025331198136e-06, "log_odds_chosen": 0.3441120982170105, "log_odds_ratio": -0.660956621170044, "logits/chosen": -0.4124126434326172, "logits/rejected": -0.3904462456703186, "logps/chosen": -1.0279223918914795, "logps/rejected": -1.287450909614563, "loss": 1.0092, "nll_loss": 0.9431363940238953, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10279224067926407, "rewards/margins": 0.025952842086553574, "rewards/rejected": -0.12874507904052734, "step": 3270 }, { "epoch": 0.59, "grad_norm": 1.1600735187530518, "learning_rate": 6.148202067258698e-06, "log_odds_chosen": 0.9012781381607056, "log_odds_ratio": -0.4865642189979553, "logits/chosen": -0.33863896131515503, "logits/rejected": -0.36632639169692993, "logps/chosen": -0.8989202380180359, "logps/rejected": -1.4705233573913574, "loss": 0.8402, "nll_loss": 0.7914935946464539, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.08989204466342926, "rewards/margins": 0.05716029554605484, "rewards/rejected": -0.1470523327589035, "step": 3280 }, { "epoch": 0.59, "grad_norm": 0.9526046514511108, "learning_rate": 6.14237880331926e-06, "log_odds_chosen": 0.625182032585144, "log_odds_ratio": -0.5688682794570923, "logits/chosen": -0.38069894909858704, "logits/rejected": -0.4275147318840027, "logps/chosen": -0.8982593417167664, "logps/rejected": -1.2962671518325806, "loss": 0.9541, "nll_loss": 0.8972567319869995, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08982594311237335, "rewards/margins": 0.039800770580768585, "rewards/rejected": -0.12962672114372253, "step": 3290 }, { "epoch": 0.6, "grad_norm": 0.8836661577224731, "learning_rate": 6.136555539379822e-06, "log_odds_chosen": 0.6172818541526794, "log_odds_ratio": -0.5925203561782837, "logits/chosen": -0.4373806416988373, "logits/rejected": -0.5029473304748535, "logps/chosen": -0.9505300521850586, "logps/rejected": -1.3144811391830444, "loss": 1.0003, "nll_loss": 0.9410818219184875, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09505301713943481, "rewards/margins": 0.0363951250910759, "rewards/rejected": -0.13144811987876892, "step": 3300 }, { "epoch": 0.6, "grad_norm": 0.7096745371818542, "learning_rate": 6.130732275440384e-06, "log_odds_chosen": 0.6025352478027344, "log_odds_ratio": -0.598220944404602, "logits/chosen": -0.3227522373199463, "logits/rejected": -0.4034956097602844, "logps/chosen": -0.9186944961547852, "logps/rejected": -1.2923507690429688, "loss": 0.9583, "nll_loss": 0.8985183835029602, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09186945855617523, "rewards/margins": 0.03736562281847, "rewards/rejected": -0.12923508882522583, "step": 3310 }, { "epoch": 0.6, "grad_norm": 0.6237562894821167, "learning_rate": 6.1249090115009465e-06, "log_odds_chosen": 0.8236312866210938, "log_odds_ratio": -0.5209259986877441, "logits/chosen": -0.3754587769508362, "logits/rejected": -0.423706591129303, "logps/chosen": -0.8962365984916687, "logps/rejected": -1.4527724981307983, "loss": 0.9374, "nll_loss": 0.8853539228439331, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08962366729974747, "rewards/margins": 0.055653590708971024, "rewards/rejected": -0.1452772468328476, "step": 3320 }, { "epoch": 0.6, "grad_norm": 1.383882999420166, "learning_rate": 6.119085747561508e-06, "log_odds_chosen": 0.49736565351486206, "log_odds_ratio": -0.6017710566520691, "logits/chosen": -0.47567468881607056, "logits/rejected": -0.4611749053001404, "logps/chosen": -1.004540205001831, "logps/rejected": -1.3043259382247925, "loss": 1.0461, "nll_loss": 0.985937237739563, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10045403242111206, "rewards/margins": 0.0299785528331995, "rewards/rejected": -0.13043257594108582, "step": 3330 }, { "epoch": 0.6, "grad_norm": 2.3500630855560303, "learning_rate": 6.11326248362207e-06, "log_odds_chosen": 0.7103801965713501, "log_odds_ratio": -0.5463491678237915, "logits/chosen": -0.3539440631866455, "logits/rejected": -0.39365509152412415, "logps/chosen": -0.9359360933303833, "logps/rejected": -1.3999426364898682, "loss": 0.9283, "nll_loss": 0.87371426820755, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09359361231327057, "rewards/margins": 0.046400636434555054, "rewards/rejected": -0.13999423384666443, "step": 3340 }, { "epoch": 0.61, "grad_norm": 1.4304038286209106, "learning_rate": 6.107439219682632e-06, "log_odds_chosen": 0.9102018475532532, "log_odds_ratio": -0.5017456412315369, "logits/chosen": -0.37354880571365356, "logits/rejected": -0.39932340383529663, "logps/chosen": -0.8635073900222778, "logps/rejected": -1.4160997867584229, "loss": 0.9113, "nll_loss": 0.8611549139022827, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08635074645280838, "rewards/margins": 0.05525922775268555, "rewards/rejected": -0.14160996675491333, "step": 3350 }, { "epoch": 0.61, "grad_norm": 0.8932999968528748, "learning_rate": 6.101615955743193e-06, "log_odds_chosen": 0.4051954746246338, "log_odds_ratio": -0.6483933329582214, "logits/chosen": -0.4127295911312103, "logits/rejected": -0.44972100853919983, "logps/chosen": -0.9479262232780457, "logps/rejected": -1.258101224899292, "loss": 0.9926, "nll_loss": 0.9278033375740051, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09479261934757233, "rewards/margins": 0.03101750835776329, "rewards/rejected": -0.12581013143062592, "step": 3360 }, { "epoch": 0.61, "grad_norm": 0.8245841860771179, "learning_rate": 6.0957926918037555e-06, "log_odds_chosen": 0.7083054184913635, "log_odds_ratio": -0.513762891292572, "logits/chosen": -0.40341416001319885, "logits/rejected": -0.42578190565109253, "logps/chosen": -0.9963234066963196, "logps/rejected": -1.4809606075286865, "loss": 1.0241, "nll_loss": 0.9727651476860046, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09963233768939972, "rewards/margins": 0.04846369847655296, "rewards/rejected": -0.14809605479240417, "step": 3370 }, { "epoch": 0.61, "grad_norm": 0.7346036434173584, "learning_rate": 6.089969427864318e-06, "log_odds_chosen": 0.7608426213264465, "log_odds_ratio": -0.5603666305541992, "logits/chosen": -0.4570868909358978, "logits/rejected": -0.4746372699737549, "logps/chosen": -1.0287775993347168, "logps/rejected": -1.521809697151184, "loss": 1.044, "nll_loss": 0.9879173040390015, "rewards/accuracies": 0.625, "rewards/chosen": -0.10287775844335556, "rewards/margins": 0.04930321127176285, "rewards/rejected": -0.1521809697151184, "step": 3380 }, { "epoch": 0.61, "grad_norm": 0.9534040689468384, "learning_rate": 6.084146163924879e-06, "log_odds_chosen": 0.8834999203681946, "log_odds_ratio": -0.5292294025421143, "logits/chosen": -0.4181883931159973, "logits/rejected": -0.44676756858825684, "logps/chosen": -0.941826343536377, "logps/rejected": -1.5739504098892212, "loss": 0.9869, "nll_loss": 0.9340030550956726, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09418264031410217, "rewards/margins": 0.06321238726377487, "rewards/rejected": -0.15739500522613525, "step": 3390 }, { "epoch": 0.61, "grad_norm": 1.0045500993728638, "learning_rate": 6.0783228999854416e-06, "log_odds_chosen": 0.6764132380485535, "log_odds_ratio": -0.5628331303596497, "logits/chosen": -0.42462119460105896, "logits/rejected": -0.4508097767829895, "logps/chosen": -0.9737855792045593, "logps/rejected": -1.395506501197815, "loss": 1.003, "nll_loss": 0.9467493891716003, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09737856686115265, "rewards/margins": 0.04217210039496422, "rewards/rejected": -0.13955065608024597, "step": 3400 }, { "epoch": 0.62, "grad_norm": 0.7688149809837341, "learning_rate": 6.072499636046004e-06, "log_odds_chosen": 0.7314145565032959, "log_odds_ratio": -0.5189186930656433, "logits/chosen": -0.3948749005794525, "logits/rejected": -0.41057324409484863, "logps/chosen": -1.0129984617233276, "logps/rejected": -1.5188207626342773, "loss": 0.9809, "nll_loss": 0.9290172457695007, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10129984468221664, "rewards/margins": 0.05058223009109497, "rewards/rejected": -0.15188206732273102, "step": 3410 }, { "epoch": 0.62, "grad_norm": 0.5660412311553955, "learning_rate": 6.066676372106565e-06, "log_odds_chosen": 0.40381866693496704, "log_odds_ratio": -0.6203995943069458, "logits/chosen": -0.3358531892299652, "logits/rejected": -0.3932770788669586, "logps/chosen": -0.9776952862739563, "logps/rejected": -1.2311264276504517, "loss": 1.0244, "nll_loss": 0.9623534083366394, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09776954352855682, "rewards/margins": 0.025343095883727074, "rewards/rejected": -0.12311263382434845, "step": 3420 }, { "epoch": 0.62, "grad_norm": 0.8201726675033569, "learning_rate": 6.060853108167128e-06, "log_odds_chosen": 0.8580142259597778, "log_odds_ratio": -0.5500550866127014, "logits/chosen": -0.38643431663513184, "logits/rejected": -0.42080745100975037, "logps/chosen": -0.9666447639465332, "logps/rejected": -1.4860246181488037, "loss": 1.0203, "nll_loss": 0.9652493596076965, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09666448831558228, "rewards/margins": 0.05193798616528511, "rewards/rejected": -0.1486024558544159, "step": 3430 }, { "epoch": 0.62, "grad_norm": 1.111434817314148, "learning_rate": 6.055029844227689e-06, "log_odds_chosen": 0.8360646963119507, "log_odds_ratio": -0.5022442936897278, "logits/chosen": -0.3898642361164093, "logits/rejected": -0.3858799636363983, "logps/chosen": -0.9356891512870789, "logps/rejected": -1.484752893447876, "loss": 0.969, "nll_loss": 0.9187499284744263, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09356891363859177, "rewards/margins": 0.054906368255615234, "rewards/rejected": -0.1484752744436264, "step": 3440 }, { "epoch": 0.62, "grad_norm": 0.8832417726516724, "learning_rate": 6.0492065802882514e-06, "log_odds_chosen": 0.5634386539459229, "log_odds_ratio": -0.6130571365356445, "logits/chosen": -0.4038141369819641, "logits/rejected": -0.44958561658859253, "logps/chosen": -1.0071508884429932, "logps/rejected": -1.4017693996429443, "loss": 1.0727, "nll_loss": 1.0114319324493408, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10071510076522827, "rewards/margins": 0.03946184366941452, "rewards/rejected": -0.140176922082901, "step": 3450 }, { "epoch": 0.63, "grad_norm": 1.0038444995880127, "learning_rate": 6.043383316348813e-06, "log_odds_chosen": 0.7370051741600037, "log_odds_ratio": -0.5406359434127808, "logits/chosen": -0.43003183603286743, "logits/rejected": -0.4604741036891937, "logps/chosen": -1.009166955947876, "logps/rejected": -1.5070292949676514, "loss": 0.9398, "nll_loss": 0.885736346244812, "rewards/accuracies": 0.625, "rewards/chosen": -0.10091669857501984, "rewards/margins": 0.04978623613715172, "rewards/rejected": -0.15070292353630066, "step": 3460 }, { "epoch": 0.63, "grad_norm": 0.7314472794532776, "learning_rate": 6.037560052409375e-06, "log_odds_chosen": 0.9042521715164185, "log_odds_ratio": -0.5049646496772766, "logits/chosen": -0.3623233139514923, "logits/rejected": -0.4032473564147949, "logps/chosen": -0.8811138272285461, "logps/rejected": -1.4935479164123535, "loss": 0.9903, "nll_loss": 0.9398002624511719, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08811138570308685, "rewards/margins": 0.061243414878845215, "rewards/rejected": -0.14935480058193207, "step": 3470 }, { "epoch": 0.63, "grad_norm": 0.7558674216270447, "learning_rate": 6.0317367884699375e-06, "log_odds_chosen": 0.6601871252059937, "log_odds_ratio": -0.5303488373756409, "logits/chosen": -0.41661277413368225, "logits/rejected": -0.4547964036464691, "logps/chosen": -1.0405640602111816, "logps/rejected": -1.4991106986999512, "loss": 1.0412, "nll_loss": 0.988142192363739, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10405639559030533, "rewards/margins": 0.04585467278957367, "rewards/rejected": -0.1499110758304596, "step": 3480 }, { "epoch": 0.63, "grad_norm": 1.1181609630584717, "learning_rate": 6.025913524530499e-06, "log_odds_chosen": 0.6442530751228333, "log_odds_ratio": -0.5743609666824341, "logits/chosen": -0.4180160462856293, "logits/rejected": -0.4605022370815277, "logps/chosen": -1.0506532192230225, "logps/rejected": -1.4884201288223267, "loss": 1.0156, "nll_loss": 0.9582085609436035, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10506530851125717, "rewards/margins": 0.04377670958638191, "rewards/rejected": -0.14884202182292938, "step": 3490 }, { "epoch": 0.63, "grad_norm": 0.6729230880737305, "learning_rate": 6.020090260591061e-06, "log_odds_chosen": 0.6248651742935181, "log_odds_ratio": -0.5695139169692993, "logits/chosen": -0.4435412883758545, "logits/rejected": -0.49575695395469666, "logps/chosen": -1.0582239627838135, "logps/rejected": -1.4656044244766235, "loss": 1.0504, "nll_loss": 0.9934715032577515, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10582239925861359, "rewards/margins": 0.04073803871870041, "rewards/rejected": -0.1465604454278946, "step": 3500 }, { "epoch": 0.63, "grad_norm": 1.7165751457214355, "learning_rate": 6.014266996651624e-06, "log_odds_chosen": 0.6814987063407898, "log_odds_ratio": -0.5507006049156189, "logits/chosen": -0.3809913396835327, "logits/rejected": -0.44108277559280396, "logps/chosen": -0.9436809420585632, "logps/rejected": -1.4071236848831177, "loss": 0.9835, "nll_loss": 0.9284241795539856, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0943681001663208, "rewards/margins": 0.046344272792339325, "rewards/rejected": -0.14071236550807953, "step": 3510 }, { "epoch": 0.64, "grad_norm": 0.9776991605758667, "learning_rate": 6.008443732712184e-06, "log_odds_chosen": 0.8048272132873535, "log_odds_ratio": -0.5047035217285156, "logits/chosen": -0.3753579556941986, "logits/rejected": -0.4107332229614258, "logps/chosen": -0.8500427007675171, "logps/rejected": -1.3697882890701294, "loss": 0.9852, "nll_loss": 0.9346874952316284, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0850042775273323, "rewards/margins": 0.05197455734014511, "rewards/rejected": -0.13697883486747742, "step": 3520 }, { "epoch": 0.64, "grad_norm": 0.9801293611526489, "learning_rate": 6.0026204687727465e-06, "log_odds_chosen": 0.6912647485733032, "log_odds_ratio": -0.5525920987129211, "logits/chosen": -0.3607081174850464, "logits/rejected": -0.35922548174858093, "logps/chosen": -0.9259079098701477, "logps/rejected": -1.367457389831543, "loss": 0.955, "nll_loss": 0.8997598886489868, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09259079396724701, "rewards/margins": 0.04415493831038475, "rewards/rejected": -0.13674573600292206, "step": 3530 }, { "epoch": 0.64, "grad_norm": 2.1034672260284424, "learning_rate": 5.996797204833309e-06, "log_odds_chosen": 0.5489142537117004, "log_odds_ratio": -0.6321261525154114, "logits/chosen": -0.3296765387058258, "logits/rejected": -0.3923155665397644, "logps/chosen": -1.0097711086273193, "logps/rejected": -1.359426498413086, "loss": 1.0141, "nll_loss": 0.9508882761001587, "rewards/accuracies": 0.625, "rewards/chosen": -0.10097712278366089, "rewards/margins": 0.03496553748846054, "rewards/rejected": -0.13594265282154083, "step": 3540 }, { "epoch": 0.64, "grad_norm": 0.7424829006195068, "learning_rate": 5.99097394089387e-06, "log_odds_chosen": 0.7225674390792847, "log_odds_ratio": -0.5436952114105225, "logits/chosen": -0.4128730893135071, "logits/rejected": -0.46673256158828735, "logps/chosen": -0.9159235954284668, "logps/rejected": -1.370883822441101, "loss": 1.0188, "nll_loss": 0.9644242525100708, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09159235656261444, "rewards/margins": 0.04549603909254074, "rewards/rejected": -0.13708840310573578, "step": 3550 }, { "epoch": 0.64, "grad_norm": 1.374440312385559, "learning_rate": 5.985150676954433e-06, "log_odds_chosen": 0.8487750887870789, "log_odds_ratio": -0.5091025829315186, "logits/chosen": -0.3625090718269348, "logits/rejected": -0.3949008882045746, "logps/chosen": -0.8889452219009399, "logps/rejected": -1.4368683099746704, "loss": 0.9307, "nll_loss": 0.8798302412033081, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08889452368021011, "rewards/margins": 0.05479230731725693, "rewards/rejected": -0.14368684589862823, "step": 3560 }, { "epoch": 0.64, "grad_norm": 1.3159680366516113, "learning_rate": 5.979327413014995e-06, "log_odds_chosen": 0.43507567048072815, "log_odds_ratio": -0.6821829676628113, "logits/chosen": -0.40561336278915405, "logits/rejected": -0.42101773619651794, "logps/chosen": -1.0213924646377563, "logps/rejected": -1.3662772178649902, "loss": 1.0356, "nll_loss": 0.9673385620117188, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.10213924944400787, "rewards/margins": 0.034488484263420105, "rewards/rejected": -0.13662774860858917, "step": 3570 }, { "epoch": 0.65, "grad_norm": 0.8232075572013855, "learning_rate": 5.973504149075556e-06, "log_odds_chosen": 0.45091715455055237, "log_odds_ratio": -0.6581467390060425, "logits/chosen": -0.40304097533226013, "logits/rejected": -0.41084232926368713, "logps/chosen": -0.9728569984436035, "logps/rejected": -1.2779741287231445, "loss": 1.0213, "nll_loss": 0.9555120468139648, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09728571027517319, "rewards/margins": 0.030511703342199326, "rewards/rejected": -0.12779740989208221, "step": 3580 }, { "epoch": 0.65, "grad_norm": 1.1752350330352783, "learning_rate": 5.967680885136119e-06, "log_odds_chosen": 0.5622051358222961, "log_odds_ratio": -0.621745228767395, "logits/chosen": -0.3896362781524658, "logits/rejected": -0.4081563949584961, "logps/chosen": -0.8804425001144409, "logps/rejected": -1.2338899374008179, "loss": 0.9236, "nll_loss": 0.8613778948783875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08804424852132797, "rewards/margins": 0.03534474968910217, "rewards/rejected": -0.12338900566101074, "step": 3590 }, { "epoch": 0.65, "grad_norm": 0.8000437021255493, "learning_rate": 5.961857621196681e-06, "log_odds_chosen": 0.643998920917511, "log_odds_ratio": -0.5432684421539307, "logits/chosen": -0.36141398549079895, "logits/rejected": -0.39153724908828735, "logps/chosen": -0.8999090194702148, "logps/rejected": -1.299071192741394, "loss": 0.9452, "nll_loss": 0.8909200429916382, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08999090641736984, "rewards/margins": 0.03991622105240822, "rewards/rejected": -0.12990711629390717, "step": 3600 }, { "epoch": 0.65, "grad_norm": 1.351096510887146, "learning_rate": 5.956034357257242e-06, "log_odds_chosen": 0.6249284744262695, "log_odds_ratio": -0.5861242413520813, "logits/chosen": -0.3442951738834381, "logits/rejected": -0.37348872423171997, "logps/chosen": -1.0251295566558838, "logps/rejected": -1.4468566179275513, "loss": 1.0134, "nll_loss": 0.9547685384750366, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10251295566558838, "rewards/margins": 0.04217272251844406, "rewards/rejected": -0.14468567073345184, "step": 3610 }, { "epoch": 0.65, "grad_norm": 0.854928731918335, "learning_rate": 5.950211093317804e-06, "log_odds_chosen": 0.36324846744537354, "log_odds_ratio": -0.6676633358001709, "logits/chosen": -0.3442520201206207, "logits/rejected": -0.3605124354362488, "logps/chosen": -0.9377188682556152, "logps/rejected": -1.135480523109436, "loss": 0.9911, "nll_loss": 0.9242986440658569, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09377188980579376, "rewards/margins": 0.01977616176009178, "rewards/rejected": -0.11354805529117584, "step": 3620 }, { "epoch": 0.66, "grad_norm": 0.8976321816444397, "learning_rate": 5.944387829378366e-06, "log_odds_chosen": 0.7812395095825195, "log_odds_ratio": -0.5367813110351562, "logits/chosen": -0.341721773147583, "logits/rejected": -0.37966251373291016, "logps/chosen": -0.9868513345718384, "logps/rejected": -1.5086472034454346, "loss": 0.962, "nll_loss": 0.908337414264679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0986851304769516, "rewards/margins": 0.05217960476875305, "rewards/rejected": -0.15086473524570465, "step": 3630 }, { "epoch": 0.66, "grad_norm": 1.1400549411773682, "learning_rate": 5.938564565438928e-06, "log_odds_chosen": 0.7185646295547485, "log_odds_ratio": -0.5341478586196899, "logits/chosen": -0.31118088960647583, "logits/rejected": -0.35552436113357544, "logps/chosen": -0.9433846473693848, "logps/rejected": -1.4317991733551025, "loss": 1.0236, "nll_loss": 0.9701422452926636, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09433845430612564, "rewards/margins": 0.048841461539268494, "rewards/rejected": -0.14317990839481354, "step": 3640 }, { "epoch": 0.66, "grad_norm": 1.4526828527450562, "learning_rate": 5.93274130149949e-06, "log_odds_chosen": 0.673244297504425, "log_odds_ratio": -0.5420977473258972, "logits/chosen": -0.3347756266593933, "logits/rejected": -0.3824438452720642, "logps/chosen": -0.8603219985961914, "logps/rejected": -1.2901854515075684, "loss": 0.9376, "nll_loss": 0.8833721280097961, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0860322043299675, "rewards/margins": 0.04298635199666023, "rewards/rejected": -0.12901854515075684, "step": 3650 }, { "epoch": 0.66, "grad_norm": 1.4973233938217163, "learning_rate": 5.926918037560052e-06, "log_odds_chosen": 0.8371866345405579, "log_odds_ratio": -0.5633835792541504, "logits/chosen": -0.3284203112125397, "logits/rejected": -0.38096609711647034, "logps/chosen": -0.8801159858703613, "logps/rejected": -1.420422077178955, "loss": 0.9905, "nll_loss": 0.9342048764228821, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08801160752773285, "rewards/margins": 0.05403059720993042, "rewards/rejected": -0.14204218983650208, "step": 3660 }, { "epoch": 0.66, "grad_norm": 1.4614819288253784, "learning_rate": 5.921094773620615e-06, "log_odds_chosen": 0.9499984979629517, "log_odds_ratio": -0.475381076335907, "logits/chosen": -0.2903318405151367, "logits/rejected": -0.35688871145248413, "logps/chosen": -0.8208259344100952, "logps/rejected": -1.4491088390350342, "loss": 0.8832, "nll_loss": 0.8356998562812805, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0820825919508934, "rewards/margins": 0.06282828003168106, "rewards/rejected": -0.14491088688373566, "step": 3670 }, { "epoch": 0.66, "grad_norm": 1.2535169124603271, "learning_rate": 5.915271509681176e-06, "log_odds_chosen": 0.7504245638847351, "log_odds_ratio": -0.5328863859176636, "logits/chosen": -0.3805959224700928, "logits/rejected": -0.3868086338043213, "logps/chosen": -0.9830179214477539, "logps/rejected": -1.489959478378296, "loss": 0.9957, "nll_loss": 0.9423898458480835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09830178320407867, "rewards/margins": 0.05069415643811226, "rewards/rejected": -0.14899595081806183, "step": 3680 }, { "epoch": 0.67, "grad_norm": 1.5583924055099487, "learning_rate": 5.909448245741738e-06, "log_odds_chosen": 0.6056820750236511, "log_odds_ratio": -0.5717626810073853, "logits/chosen": -0.38489967584609985, "logits/rejected": -0.4191763401031494, "logps/chosen": -1.0282983779907227, "logps/rejected": -1.4288691282272339, "loss": 0.9785, "nll_loss": 0.9213641285896301, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10282983630895615, "rewards/margins": 0.04005708545446396, "rewards/rejected": -0.1428869366645813, "step": 3690 }, { "epoch": 0.67, "grad_norm": 2.3320605754852295, "learning_rate": 5.903624981802301e-06, "log_odds_chosen": 1.079827904701233, "log_odds_ratio": -0.4331550598144531, "logits/chosen": -0.3357813060283661, "logits/rejected": -0.3873363733291626, "logps/chosen": -0.8198086023330688, "logps/rejected": -1.5241756439208984, "loss": 0.8674, "nll_loss": 0.8240398168563843, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.081980861723423, "rewards/margins": 0.07043670862913132, "rewards/rejected": -0.15241757035255432, "step": 3700 }, { "epoch": 0.67, "grad_norm": 1.4478205442428589, "learning_rate": 5.897801717862861e-06, "log_odds_chosen": 0.7121813893318176, "log_odds_ratio": -0.5246703028678894, "logits/chosen": -0.372341126203537, "logits/rejected": -0.40585875511169434, "logps/chosen": -0.9683243036270142, "logps/rejected": -1.4463528394699097, "loss": 0.9554, "nll_loss": 0.9029725193977356, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09683243185281754, "rewards/margins": 0.04780285805463791, "rewards/rejected": -0.14463528990745544, "step": 3710 }, { "epoch": 0.67, "grad_norm": 0.7791962623596191, "learning_rate": 5.891978453923424e-06, "log_odds_chosen": 0.6235748529434204, "log_odds_ratio": -0.5931678414344788, "logits/chosen": -0.37661951780319214, "logits/rejected": -0.38163405656814575, "logps/chosen": -0.9610217809677124, "logps/rejected": -1.3512619733810425, "loss": 0.948, "nll_loss": 0.8886823654174805, "rewards/accuracies": 0.625, "rewards/chosen": -0.09610219299793243, "rewards/margins": 0.03902401775121689, "rewards/rejected": -0.13512620329856873, "step": 3720 }, { "epoch": 0.67, "grad_norm": 1.0583852529525757, "learning_rate": 5.886155189983986e-06, "log_odds_chosen": 0.7444362640380859, "log_odds_ratio": -0.5444598197937012, "logits/chosen": -0.36561426520347595, "logits/rejected": -0.42521706223487854, "logps/chosen": -0.9446412324905396, "logps/rejected": -1.4132436513900757, "loss": 0.9795, "nll_loss": 0.9250993728637695, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09446412324905396, "rewards/margins": 0.04686024412512779, "rewards/rejected": -0.14132437109947205, "step": 3730 }, { "epoch": 0.68, "grad_norm": 1.1684253215789795, "learning_rate": 5.880331926044547e-06, "log_odds_chosen": 0.7757261395454407, "log_odds_ratio": -0.5709885358810425, "logits/chosen": -0.29772838950157166, "logits/rejected": -0.292438268661499, "logps/chosen": -0.9121503829956055, "logps/rejected": -1.3928760290145874, "loss": 0.9107, "nll_loss": 0.8535849452018738, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09121503680944443, "rewards/margins": 0.048072561621665955, "rewards/rejected": -0.13928762078285217, "step": 3740 }, { "epoch": 0.68, "grad_norm": 1.7918241024017334, "learning_rate": 5.87450866210511e-06, "log_odds_chosen": 0.6071350574493408, "log_odds_ratio": -0.5676754117012024, "logits/chosen": -0.37720435857772827, "logits/rejected": -0.4086214601993561, "logps/chosen": -0.9288979768753052, "logps/rejected": -1.316955327987671, "loss": 1.0195, "nll_loss": 0.9627493023872375, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09288980066776276, "rewards/margins": 0.03880572319030762, "rewards/rejected": -0.13169552385807037, "step": 3750 }, { "epoch": 0.68, "grad_norm": 0.7484256029129028, "learning_rate": 5.868685398165672e-06, "log_odds_chosen": 0.5376003384590149, "log_odds_ratio": -0.6325076818466187, "logits/chosen": -0.399244487285614, "logits/rejected": -0.4125980734825134, "logps/chosen": -0.9814577102661133, "logps/rejected": -1.357806921005249, "loss": 1.0426, "nll_loss": 0.9793224334716797, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.09814576804637909, "rewards/margins": 0.03763493150472641, "rewards/rejected": -0.1357807070016861, "step": 3760 }, { "epoch": 0.68, "grad_norm": 1.0059581995010376, "learning_rate": 5.8628621342262335e-06, "log_odds_chosen": 0.6811798810958862, "log_odds_ratio": -0.5329106450080872, "logits/chosen": -0.39979439973831177, "logits/rejected": -0.429983913898468, "logps/chosen": -0.9924589991569519, "logps/rejected": -1.4579194784164429, "loss": 0.953, "nll_loss": 0.8996642231941223, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09924589097499847, "rewards/margins": 0.046546053141355515, "rewards/rejected": -0.1457919478416443, "step": 3770 }, { "epoch": 0.68, "grad_norm": 0.6539117097854614, "learning_rate": 5.857038870286796e-06, "log_odds_chosen": 0.8818238973617554, "log_odds_ratio": -0.5463408827781677, "logits/chosen": -0.33649712800979614, "logits/rejected": -0.3999442160129547, "logps/chosen": -0.9039346575737, "logps/rejected": -1.4749833345413208, "loss": 1.0149, "nll_loss": 0.9602839350700378, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09039346128702164, "rewards/margins": 0.05710485577583313, "rewards/rejected": -0.14749832451343536, "step": 3780 }, { "epoch": 0.68, "grad_norm": 1.743882179260254, "learning_rate": 5.851215606347357e-06, "log_odds_chosen": 0.7480627298355103, "log_odds_ratio": -0.5538538098335266, "logits/chosen": -0.4183572232723236, "logits/rejected": -0.4714382290840149, "logps/chosen": -0.9981712102890015, "logps/rejected": -1.461587905883789, "loss": 0.9667, "nll_loss": 0.9112657308578491, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09981712698936462, "rewards/margins": 0.046341657638549805, "rewards/rejected": -0.14615878462791443, "step": 3790 }, { "epoch": 0.69, "grad_norm": 1.5963619947433472, "learning_rate": 5.845392342407919e-06, "log_odds_chosen": 0.4980127215385437, "log_odds_ratio": -0.6052228808403015, "logits/chosen": -0.4400199353694916, "logits/rejected": -0.4431309103965759, "logps/chosen": -0.9494178891181946, "logps/rejected": -1.2823858261108398, "loss": 0.9827, "nll_loss": 0.9221373796463013, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09494178742170334, "rewards/margins": 0.03329680114984512, "rewards/rejected": -0.12823858857154846, "step": 3800 }, { "epoch": 0.69, "grad_norm": 1.0663025379180908, "learning_rate": 5.839569078468481e-06, "log_odds_chosen": 0.6610610485076904, "log_odds_ratio": -0.5737951993942261, "logits/chosen": -0.40231895446777344, "logits/rejected": -0.44721460342407227, "logps/chosen": -0.9405794143676758, "logps/rejected": -1.3562818765640259, "loss": 0.9729, "nll_loss": 0.9154725074768066, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09405793994665146, "rewards/margins": 0.04157023876905441, "rewards/rejected": -0.13562817871570587, "step": 3810 }, { "epoch": 0.69, "grad_norm": 0.6941992044448853, "learning_rate": 5.833745814529043e-06, "log_odds_chosen": 0.3320769965648651, "log_odds_ratio": -0.6687268018722534, "logits/chosen": -0.4455398619174957, "logits/rejected": -0.46363648772239685, "logps/chosen": -1.063694715499878, "logps/rejected": -1.2793588638305664, "loss": 1.1192, "nll_loss": 1.0523333549499512, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10636948049068451, "rewards/margins": 0.02156640589237213, "rewards/rejected": -0.12793588638305664, "step": 3820 }, { "epoch": 0.69, "grad_norm": 0.7818502187728882, "learning_rate": 5.827922550589605e-06, "log_odds_chosen": 0.7564540505409241, "log_odds_ratio": -0.5458071231842041, "logits/chosen": -0.39144763350486755, "logits/rejected": -0.43861907720565796, "logps/chosen": -0.9263273477554321, "logps/rejected": -1.4547199010849, "loss": 0.9623, "nll_loss": 0.9077495336532593, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0926327258348465, "rewards/margins": 0.05283927172422409, "rewards/rejected": -0.14547200500965118, "step": 3830 }, { "epoch": 0.69, "grad_norm": 1.542531132698059, "learning_rate": 5.822099286650167e-06, "log_odds_chosen": 0.609904408454895, "log_odds_ratio": -0.570465087890625, "logits/chosen": -0.38601264357566833, "logits/rejected": -0.45281514525413513, "logps/chosen": -0.917966365814209, "logps/rejected": -1.28914213180542, "loss": 0.9971, "nll_loss": 0.9401028752326965, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0917966365814209, "rewards/margins": 0.03711757808923721, "rewards/rejected": -0.12891420722007751, "step": 3840 }, { "epoch": 0.7, "grad_norm": 0.7471659779548645, "learning_rate": 5.816276022710729e-06, "log_odds_chosen": 0.36103135347366333, "log_odds_ratio": -0.6382339596748352, "logits/chosen": -0.4501968026161194, "logits/rejected": -0.46252304315567017, "logps/chosen": -1.0752949714660645, "logps/rejected": -1.3563129901885986, "loss": 0.9825, "nll_loss": 0.9186823964118958, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10752949863672256, "rewards/margins": 0.028101811185479164, "rewards/rejected": -0.13563130795955658, "step": 3850 }, { "epoch": 0.7, "grad_norm": 0.9536011815071106, "learning_rate": 5.810452758771292e-06, "log_odds_chosen": 0.7206624746322632, "log_odds_ratio": -0.5553077459335327, "logits/chosen": -0.4182675778865814, "logits/rejected": -0.45456624031066895, "logps/chosen": -0.9347223043441772, "logps/rejected": -1.4037220478057861, "loss": 1.0241, "nll_loss": 0.9685796499252319, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09347222745418549, "rewards/margins": 0.04689997434616089, "rewards/rejected": -0.14037221670150757, "step": 3860 }, { "epoch": 0.7, "grad_norm": 1.2044615745544434, "learning_rate": 5.804629494831853e-06, "log_odds_chosen": 0.7299908399581909, "log_odds_ratio": -0.5608175992965698, "logits/chosen": -0.3640984892845154, "logits/rejected": -0.4097954332828522, "logps/chosen": -0.9941253662109375, "logps/rejected": -1.464939832687378, "loss": 0.9944, "nll_loss": 0.9383255243301392, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09941253811120987, "rewards/margins": 0.04708145186305046, "rewards/rejected": -0.14649398624897003, "step": 3870 }, { "epoch": 0.7, "grad_norm": 1.311955451965332, "learning_rate": 5.798806230892415e-06, "log_odds_chosen": 0.7198322415351868, "log_odds_ratio": -0.5975719690322876, "logits/chosen": -0.38058674335479736, "logits/rejected": -0.42003631591796875, "logps/chosen": -0.8995689153671265, "logps/rejected": -1.3722004890441895, "loss": 0.9498, "nll_loss": 0.8900270462036133, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08995689451694489, "rewards/margins": 0.047263167798519135, "rewards/rejected": -0.13722005486488342, "step": 3880 }, { "epoch": 0.7, "grad_norm": 1.3792704343795776, "learning_rate": 5.792982966952977e-06, "log_odds_chosen": 0.5147228837013245, "log_odds_ratio": -0.6110295057296753, "logits/chosen": -0.45408734679222107, "logits/rejected": -0.4423491358757019, "logps/chosen": -1.0131367444992065, "logps/rejected": -1.3439712524414062, "loss": 0.9864, "nll_loss": 0.92534339427948, "rewards/accuracies": 0.625, "rewards/chosen": -0.10131368786096573, "rewards/margins": 0.03308345377445221, "rewards/rejected": -0.13439713418483734, "step": 3890 }, { "epoch": 0.7, "grad_norm": 1.2134101390838623, "learning_rate": 5.7871597030135384e-06, "log_odds_chosen": 0.8219815492630005, "log_odds_ratio": -0.5065579414367676, "logits/chosen": -0.4033416211605072, "logits/rejected": -0.4501563608646393, "logps/chosen": -0.9055941700935364, "logps/rejected": -1.4462974071502686, "loss": 0.9623, "nll_loss": 0.9116464853286743, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09055942296981812, "rewards/margins": 0.054070331156253815, "rewards/rejected": -0.14462974667549133, "step": 3900 }, { "epoch": 0.71, "grad_norm": 1.2515047788619995, "learning_rate": 5.781336439074101e-06, "log_odds_chosen": 0.46673351526260376, "log_odds_ratio": -0.627618134021759, "logits/chosen": -0.3921314775943756, "logits/rejected": -0.4186267852783203, "logps/chosen": -1.0056527853012085, "logps/rejected": -1.3170154094696045, "loss": 1.0003, "nll_loss": 0.9375259280204773, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.10056529194116592, "rewards/margins": 0.031136253848671913, "rewards/rejected": -0.1317015439271927, "step": 3910 }, { "epoch": 0.71, "grad_norm": 2.3137080669403076, "learning_rate": 5.775513175134663e-06, "log_odds_chosen": 0.7130810618400574, "log_odds_ratio": -0.5645673871040344, "logits/chosen": -0.42148932814598083, "logits/rejected": -0.44052475690841675, "logps/chosen": -1.0017608404159546, "logps/rejected": -1.5046265125274658, "loss": 0.9761, "nll_loss": 0.9195976257324219, "rewards/accuracies": 0.625, "rewards/chosen": -0.10017608106136322, "rewards/margins": 0.05028656870126724, "rewards/rejected": -0.15046265721321106, "step": 3920 }, { "epoch": 0.71, "grad_norm": 1.1239081621170044, "learning_rate": 5.7696899111952245e-06, "log_odds_chosen": 0.6651091575622559, "log_odds_ratio": -0.5374134182929993, "logits/chosen": -0.3734278082847595, "logits/rejected": -0.4400072693824768, "logps/chosen": -0.8720897436141968, "logps/rejected": -1.2871179580688477, "loss": 0.9574, "nll_loss": 0.9036323428153992, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08720897883176804, "rewards/margins": 0.04150282219052315, "rewards/rejected": -0.12871180474758148, "step": 3930 }, { "epoch": 0.71, "grad_norm": 0.7715298533439636, "learning_rate": 5.763866647255787e-06, "log_odds_chosen": 0.5933562517166138, "log_odds_ratio": -0.5951903462409973, "logits/chosen": -0.38817816972732544, "logits/rejected": -0.4285859167575836, "logps/chosen": -0.9582163691520691, "logps/rejected": -1.3706133365631104, "loss": 0.9348, "nll_loss": 0.8752536773681641, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09582163393497467, "rewards/margins": 0.041239701211452484, "rewards/rejected": -0.13706132769584656, "step": 3940 }, { "epoch": 0.71, "grad_norm": 0.7734090685844421, "learning_rate": 5.758043383316349e-06, "log_odds_chosen": 0.8612390756607056, "log_odds_ratio": -0.49259430170059204, "logits/chosen": -0.41785088181495667, "logits/rejected": -0.4185684323310852, "logps/chosen": -0.8485938906669617, "logps/rejected": -1.3895423412322998, "loss": 0.9617, "nll_loss": 0.9124394655227661, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08485939353704453, "rewards/margins": 0.054094843566417694, "rewards/rejected": -0.13895423710346222, "step": 3950 }, { "epoch": 0.72, "grad_norm": 1.0429623126983643, "learning_rate": 5.752220119376911e-06, "log_odds_chosen": 0.9815561175346375, "log_odds_ratio": -0.5164690017700195, "logits/chosen": -0.3655704855918884, "logits/rejected": -0.41669750213623047, "logps/chosen": -0.8296697735786438, "logps/rejected": -1.476696491241455, "loss": 0.9442, "nll_loss": 0.8925908803939819, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08296697586774826, "rewards/margins": 0.06470267474651337, "rewards/rejected": -0.14766964316368103, "step": 3960 }, { "epoch": 0.72, "grad_norm": 1.7995119094848633, "learning_rate": 5.746396855437472e-06, "log_odds_chosen": 0.717779278755188, "log_odds_ratio": -0.6176232695579529, "logits/chosen": -0.4269459843635559, "logits/rejected": -0.4877711832523346, "logps/chosen": -1.0023086071014404, "logps/rejected": -1.502467155456543, "loss": 1.0947, "nll_loss": 1.0329031944274902, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1002308577299118, "rewards/margins": 0.05001585930585861, "rewards/rejected": -0.15024670958518982, "step": 3970 }, { "epoch": 0.72, "grad_norm": 0.7775267362594604, "learning_rate": 5.740573591498034e-06, "log_odds_chosen": 0.66571444272995, "log_odds_ratio": -0.605377733707428, "logits/chosen": -0.33649665117263794, "logits/rejected": -0.4037664532661438, "logps/chosen": -0.9523887634277344, "logps/rejected": -1.3614118099212646, "loss": 0.9883, "nll_loss": 0.9277440905570984, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09523888677358627, "rewards/margins": 0.04090230539441109, "rewards/rejected": -0.13614118099212646, "step": 3980 }, { "epoch": 0.72, "grad_norm": 1.1453152894973755, "learning_rate": 5.734750327558596e-06, "log_odds_chosen": 0.6597369909286499, "log_odds_ratio": -0.5555736422538757, "logits/chosen": -0.38733386993408203, "logits/rejected": -0.4213401675224304, "logps/chosen": -0.920432448387146, "logps/rejected": -1.325626015663147, "loss": 0.9479, "nll_loss": 0.8923455476760864, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09204325079917908, "rewards/margins": 0.0405193530023098, "rewards/rejected": -0.13256260752677917, "step": 3990 }, { "epoch": 0.72, "grad_norm": 1.2393028736114502, "learning_rate": 5.728927063619158e-06, "log_odds_chosen": 0.7080034613609314, "log_odds_ratio": -0.5932170748710632, "logits/chosen": -0.32581627368927, "logits/rejected": -0.392342209815979, "logps/chosen": -0.9800984263420105, "logps/rejected": -1.3934428691864014, "loss": 0.9699, "nll_loss": 0.9105931520462036, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09800984710454941, "rewards/margins": 0.04133444279432297, "rewards/rejected": -0.13934428989887238, "step": 4000 }, { "epoch": 0.72, "grad_norm": 1.5815924406051636, "learning_rate": 5.7231037996797204e-06, "log_odds_chosen": 0.9281368255615234, "log_odds_ratio": -0.4948226809501648, "logits/chosen": -0.3796248733997345, "logits/rejected": -0.42877936363220215, "logps/chosen": -0.8506223559379578, "logps/rejected": -1.4400184154510498, "loss": 0.9755, "nll_loss": 0.926009476184845, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08506224304437637, "rewards/margins": 0.05893959850072861, "rewards/rejected": -0.14400185644626617, "step": 4010 }, { "epoch": 0.73, "grad_norm": 1.324011206626892, "learning_rate": 5.717280535740282e-06, "log_odds_chosen": 0.8670433759689331, "log_odds_ratio": -0.5069810152053833, "logits/chosen": -0.41438978910446167, "logits/rejected": -0.4259106516838074, "logps/chosen": -0.9058378338813782, "logps/rejected": -1.4797426462173462, "loss": 0.9619, "nll_loss": 0.9111762046813965, "rewards/accuracies": 0.75, "rewards/chosen": -0.09058378636837006, "rewards/margins": 0.0573904812335968, "rewards/rejected": -0.14797425270080566, "step": 4020 }, { "epoch": 0.73, "grad_norm": 1.286294937133789, "learning_rate": 5.711457271800844e-06, "log_odds_chosen": 0.7373972535133362, "log_odds_ratio": -0.5619674921035767, "logits/chosen": -0.3601471185684204, "logits/rejected": -0.3947383463382721, "logps/chosen": -0.9275285601615906, "logps/rejected": -1.4038407802581787, "loss": 0.9351, "nll_loss": 0.8788579702377319, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0927528589963913, "rewards/margins": 0.04763120785355568, "rewards/rejected": -0.14038407802581787, "step": 4030 }, { "epoch": 0.73, "grad_norm": 2.57619571685791, "learning_rate": 5.7056340078614065e-06, "log_odds_chosen": 0.5859150886535645, "log_odds_ratio": -0.5528852343559265, "logits/chosen": -0.3482866883277893, "logits/rejected": -0.3774065375328064, "logps/chosen": -1.0320050716400146, "logps/rejected": -1.4059724807739258, "loss": 0.9701, "nll_loss": 0.9148612022399902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10320049524307251, "rewards/margins": 0.03739675134420395, "rewards/rejected": -0.14059725403785706, "step": 4040 }, { "epoch": 0.73, "grad_norm": 1.194467544555664, "learning_rate": 5.699810743921968e-06, "log_odds_chosen": 0.7288400530815125, "log_odds_ratio": -0.5820309519767761, "logits/chosen": -0.3455452024936676, "logits/rejected": -0.38425126671791077, "logps/chosen": -0.9070445895195007, "logps/rejected": -1.4033013582229614, "loss": 0.9193, "nll_loss": 0.8611399531364441, "rewards/accuracies": 0.625, "rewards/chosen": -0.09070447087287903, "rewards/margins": 0.04962567239999771, "rewards/rejected": -0.14033015072345734, "step": 4050 }, { "epoch": 0.73, "grad_norm": 1.142704725265503, "learning_rate": 5.6939874799825295e-06, "log_odds_chosen": 0.4837094843387604, "log_odds_ratio": -0.6172317266464233, "logits/chosen": -0.44991883635520935, "logits/rejected": -0.4560603201389313, "logps/chosen": -1.0034350156784058, "logps/rejected": -1.2976691722869873, "loss": 1.1288, "nll_loss": 1.0670511722564697, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1003435030579567, "rewards/margins": 0.0294234249740839, "rewards/rejected": -0.12976691126823425, "step": 4060 }, { "epoch": 0.74, "grad_norm": 1.461199402809143, "learning_rate": 5.688164216043092e-06, "log_odds_chosen": 0.6147937178611755, "log_odds_ratio": -0.5649920701980591, "logits/chosen": -0.3980977535247803, "logits/rejected": -0.4219932556152344, "logps/chosen": -0.9132669568061829, "logps/rejected": -1.2988579273223877, "loss": 0.9656, "nll_loss": 0.9091387987136841, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09132669121026993, "rewards/margins": 0.03855909779667854, "rewards/rejected": -0.12988579273223877, "step": 4070 }, { "epoch": 0.74, "grad_norm": 1.2351607084274292, "learning_rate": 5.682340952103654e-06, "log_odds_chosen": 0.5310704112052917, "log_odds_ratio": -0.6117419004440308, "logits/chosen": -0.3770817518234253, "logits/rejected": -0.4008653163909912, "logps/chosen": -0.9646285772323608, "logps/rejected": -1.3340489864349365, "loss": 0.9654, "nll_loss": 0.9041979908943176, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09646286070346832, "rewards/margins": 0.03694205358624458, "rewards/rejected": -0.1334048956632614, "step": 4080 }, { "epoch": 0.74, "grad_norm": 1.237679362297058, "learning_rate": 5.6765176881642155e-06, "log_odds_chosen": 0.8533763885498047, "log_odds_ratio": -0.486391544342041, "logits/chosen": -0.40168827772140503, "logits/rejected": -0.4379729628562927, "logps/chosen": -0.8621419072151184, "logps/rejected": -1.3524459600448608, "loss": 1.0422, "nll_loss": 0.9935612678527832, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08621419221162796, "rewards/margins": 0.04903041943907738, "rewards/rejected": -0.13524460792541504, "step": 4090 }, { "epoch": 0.74, "grad_norm": 0.5430986285209656, "learning_rate": 5.670694424224778e-06, "log_odds_chosen": 0.46620744466781616, "log_odds_ratio": -0.6109627485275269, "logits/chosen": -0.4185555577278137, "logits/rejected": -0.44289064407348633, "logps/chosen": -0.9953963160514832, "logps/rejected": -1.2747821807861328, "loss": 1.0714, "nll_loss": 1.0102908611297607, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09953964501619339, "rewards/margins": 0.02793857827782631, "rewards/rejected": -0.1274782121181488, "step": 4100 }, { "epoch": 0.74, "grad_norm": 0.8633012175559998, "learning_rate": 5.66487116028534e-06, "log_odds_chosen": 0.6167644262313843, "log_odds_ratio": -0.5935536623001099, "logits/chosen": -0.34687721729278564, "logits/rejected": -0.3849531412124634, "logps/chosen": -0.8814305067062378, "logps/rejected": -1.2702577114105225, "loss": 0.9524, "nll_loss": 0.892997145652771, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08814306557178497, "rewards/margins": 0.03888271749019623, "rewards/rejected": -0.12702576816082, "step": 4110 }, { "epoch": 0.74, "grad_norm": 1.4002399444580078, "learning_rate": 5.659047896345902e-06, "log_odds_chosen": 0.5371646285057068, "log_odds_ratio": -0.5932449698448181, "logits/chosen": -0.40844884514808655, "logits/rejected": -0.4111505150794983, "logps/chosen": -1.0008736848831177, "logps/rejected": -1.3484508991241455, "loss": 0.9959, "nll_loss": 0.9365564584732056, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10008736699819565, "rewards/margins": 0.03475772216916084, "rewards/rejected": -0.1348450928926468, "step": 4120 }, { "epoch": 0.75, "grad_norm": 1.1915359497070312, "learning_rate": 5.653224632406464e-06, "log_odds_chosen": 0.5471753478050232, "log_odds_ratio": -0.5521697998046875, "logits/chosen": -0.4373193681240082, "logits/rejected": -0.48634281754493713, "logps/chosen": -1.02994966506958, "logps/rejected": -1.420654058456421, "loss": 1.0112, "nll_loss": 0.9560235738754272, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10299496352672577, "rewards/margins": 0.03907042369246483, "rewards/rejected": -0.1420653760433197, "step": 4130 }, { "epoch": 0.75, "grad_norm": 0.7222558856010437, "learning_rate": 5.647401368467026e-06, "log_odds_chosen": 0.4993858337402344, "log_odds_ratio": -0.641208827495575, "logits/chosen": -0.37473541498184204, "logits/rejected": -0.4002726078033447, "logps/chosen": -1.0152702331542969, "logps/rejected": -1.339428186416626, "loss": 0.9919, "nll_loss": 0.9277341961860657, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10152701288461685, "rewards/margins": 0.032415807247161865, "rewards/rejected": -0.13394282758235931, "step": 4140 }, { "epoch": 0.75, "grad_norm": 1.2790604829788208, "learning_rate": 5.641578104527587e-06, "log_odds_chosen": 0.4182191789150238, "log_odds_ratio": -0.6287438273429871, "logits/chosen": -0.45408788323402405, "logits/rejected": -0.4832921624183655, "logps/chosen": -1.0568732023239136, "logps/rejected": -1.3412824869155884, "loss": 1.0578, "nll_loss": 0.9948795437812805, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10568732023239136, "rewards/margins": 0.028440916910767555, "rewards/rejected": -0.13412824273109436, "step": 4150 }, { "epoch": 0.75, "grad_norm": 1.3140400648117065, "learning_rate": 5.635754840588149e-06, "log_odds_chosen": 0.6435329914093018, "log_odds_ratio": -0.6096024513244629, "logits/chosen": -0.33399510383605957, "logits/rejected": -0.36445289850234985, "logps/chosen": -0.8361338376998901, "logps/rejected": -1.2639483213424683, "loss": 0.9501, "nll_loss": 0.8891298174858093, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08361340314149857, "rewards/margins": 0.042781438678503036, "rewards/rejected": -0.1263948380947113, "step": 4160 }, { "epoch": 0.75, "grad_norm": 1.5185847282409668, "learning_rate": 5.6299315766487115e-06, "log_odds_chosen": 0.6422057747840881, "log_odds_ratio": -0.5717750787734985, "logits/chosen": -0.4139169156551361, "logits/rejected": -0.49386876821517944, "logps/chosen": -0.9835192561149597, "logps/rejected": -1.4217901229858398, "loss": 1.0613, "nll_loss": 1.0041275024414062, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09835191816091537, "rewards/margins": 0.04382709413766861, "rewards/rejected": -0.14217904210090637, "step": 4170 }, { "epoch": 0.76, "grad_norm": 0.852863609790802, "learning_rate": 5.624108312709273e-06, "log_odds_chosen": 0.5467121005058289, "log_odds_ratio": -0.5848273038864136, "logits/chosen": -0.4137410521507263, "logits/rejected": -0.4252719283103943, "logps/chosen": -0.9758207201957703, "logps/rejected": -1.328446626663208, "loss": 0.9739, "nll_loss": 0.9153772592544556, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09758206456899643, "rewards/margins": 0.035262592136859894, "rewards/rejected": -0.13284465670585632, "step": 4180 }, { "epoch": 0.76, "grad_norm": 1.452010154724121, "learning_rate": 5.618285048769835e-06, "log_odds_chosen": 0.8447461128234863, "log_odds_ratio": -0.5126752853393555, "logits/chosen": -0.3651435971260071, "logits/rejected": -0.44532880187034607, "logps/chosen": -0.940411388874054, "logps/rejected": -1.488644003868103, "loss": 0.9913, "nll_loss": 0.9400469064712524, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09404114633798599, "rewards/margins": 0.05482326075434685, "rewards/rejected": -0.14886441826820374, "step": 4190 }, { "epoch": 0.76, "grad_norm": 1.0430749654769897, "learning_rate": 5.6124617848303976e-06, "log_odds_chosen": 0.7600489854812622, "log_odds_ratio": -0.547897219657898, "logits/chosen": -0.40632161498069763, "logits/rejected": -0.4137774407863617, "logps/chosen": -0.9630746841430664, "logps/rejected": -1.4354467391967773, "loss": 1.0375, "nll_loss": 0.9827224612236023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09630747139453888, "rewards/margins": 0.04723720625042915, "rewards/rejected": -0.14354465901851654, "step": 4200 }, { "epoch": 0.76, "grad_norm": 0.9618161916732788, "learning_rate": 5.606638520890959e-06, "log_odds_chosen": 0.821193516254425, "log_odds_ratio": -0.5814388990402222, "logits/chosen": -0.36870136857032776, "logits/rejected": -0.40048137307167053, "logps/chosen": -0.8505674600601196, "logps/rejected": -1.3683149814605713, "loss": 0.9269, "nll_loss": 0.8687930107116699, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08505673706531525, "rewards/margins": 0.051774751394987106, "rewards/rejected": -0.13683149218559265, "step": 4210 }, { "epoch": 0.76, "grad_norm": 1.4219672679901123, "learning_rate": 5.600815256951521e-06, "log_odds_chosen": 0.8768616914749146, "log_odds_ratio": -0.5199444890022278, "logits/chosen": -0.3847038149833679, "logits/rejected": -0.4094681739807129, "logps/chosen": -0.9236054420471191, "logps/rejected": -1.48633873462677, "loss": 0.9239, "nll_loss": 0.8718730211257935, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09236054867506027, "rewards/margins": 0.05627333000302315, "rewards/rejected": -0.14863386750221252, "step": 4220 }, { "epoch": 0.76, "grad_norm": 1.4613364934921265, "learning_rate": 5.594991993012084e-06, "log_odds_chosen": 0.692061185836792, "log_odds_ratio": -0.5662203431129456, "logits/chosen": -0.36791688203811646, "logits/rejected": -0.4184791147708893, "logps/chosen": -0.9626785516738892, "logps/rejected": -1.4185993671417236, "loss": 0.9929, "nll_loss": 0.9362456202507019, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09626786410808563, "rewards/margins": 0.04559207707643509, "rewards/rejected": -0.14185991883277893, "step": 4230 }, { "epoch": 0.77, "grad_norm": 1.1804014444351196, "learning_rate": 5.589168729072644e-06, "log_odds_chosen": 1.0438271760940552, "log_odds_ratio": -0.5010775327682495, "logits/chosen": -0.267377644777298, "logits/rejected": -0.3655751347541809, "logps/chosen": -0.9364291429519653, "logps/rejected": -1.6218111515045166, "loss": 0.9331, "nll_loss": 0.8829880952835083, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09364292025566101, "rewards/margins": 0.06853820383548737, "rewards/rejected": -0.16218113899230957, "step": 4240 }, { "epoch": 0.77, "grad_norm": 1.9332170486450195, "learning_rate": 5.5833454651332066e-06, "log_odds_chosen": 0.83074951171875, "log_odds_ratio": -0.5328481197357178, "logits/chosen": -0.4035532474517822, "logits/rejected": -0.44083279371261597, "logps/chosen": -0.8890374302864075, "logps/rejected": -1.4090745449066162, "loss": 0.969, "nll_loss": 0.9156768918037415, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08890374004840851, "rewards/margins": 0.052003733813762665, "rewards/rejected": -0.14090748131275177, "step": 4250 }, { "epoch": 0.77, "grad_norm": 1.3763980865478516, "learning_rate": 5.577522201193769e-06, "log_odds_chosen": 0.5008147954940796, "log_odds_ratio": -0.6271563768386841, "logits/chosen": -0.40320873260498047, "logits/rejected": -0.412767231464386, "logps/chosen": -0.9467431306838989, "logps/rejected": -1.296858310699463, "loss": 0.9401, "nll_loss": 0.8774242401123047, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09467431157827377, "rewards/margins": 0.03501152992248535, "rewards/rejected": -0.12968584895133972, "step": 4260 }, { "epoch": 0.77, "grad_norm": 0.9446521401405334, "learning_rate": 5.57169893725433e-06, "log_odds_chosen": 1.1107661724090576, "log_odds_ratio": -0.4330429136753082, "logits/chosen": -0.31322717666625977, "logits/rejected": -0.38749533891677856, "logps/chosen": -0.8513630628585815, "logps/rejected": -1.5474624633789062, "loss": 0.8809, "nll_loss": 0.8376407623291016, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0851363092660904, "rewards/margins": 0.06960994750261307, "rewards/rejected": -0.15474626421928406, "step": 4270 }, { "epoch": 0.77, "grad_norm": 1.3432685136795044, "learning_rate": 5.565875673314893e-06, "log_odds_chosen": 0.8585469126701355, "log_odds_ratio": -0.5709123015403748, "logits/chosen": -0.32336562871932983, "logits/rejected": -0.38996315002441406, "logps/chosen": -0.8784129023551941, "logps/rejected": -1.417546272277832, "loss": 0.967, "nll_loss": 0.9099494814872742, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08784128725528717, "rewards/margins": 0.053913332521915436, "rewards/rejected": -0.1417546272277832, "step": 4280 }, { "epoch": 0.77, "grad_norm": 1.2538154125213623, "learning_rate": 5.560052409375455e-06, "log_odds_chosen": 0.7539855241775513, "log_odds_ratio": -0.5314663052558899, "logits/chosen": -0.40349698066711426, "logits/rejected": -0.419721782207489, "logps/chosen": -0.9041644334793091, "logps/rejected": -1.377606987953186, "loss": 0.9484, "nll_loss": 0.8953009843826294, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09041643887758255, "rewards/margins": 0.04734424501657486, "rewards/rejected": -0.1377606987953186, "step": 4290 }, { "epoch": 0.78, "grad_norm": 2.1267170906066895, "learning_rate": 5.554229145436017e-06, "log_odds_chosen": 0.7543531656265259, "log_odds_ratio": -0.5795196294784546, "logits/chosen": -0.4250953793525696, "logits/rejected": -0.4518701434135437, "logps/chosen": -1.0713355541229248, "logps/rejected": -1.5738701820373535, "loss": 0.9899, "nll_loss": 0.931910514831543, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10713355243206024, "rewards/margins": 0.05025345832109451, "rewards/rejected": -0.15738701820373535, "step": 4300 }, { "epoch": 0.78, "grad_norm": 1.9224661588668823, "learning_rate": 5.548405881496579e-06, "log_odds_chosen": 0.6704081296920776, "log_odds_ratio": -0.5380033850669861, "logits/chosen": -0.44478529691696167, "logits/rejected": -0.4515528678894043, "logps/chosen": -0.9250621795654297, "logps/rejected": -1.3818514347076416, "loss": 0.9657, "nll_loss": 0.9119402170181274, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09250621497631073, "rewards/margins": 0.04567892104387283, "rewards/rejected": -0.13818514347076416, "step": 4310 }, { "epoch": 0.78, "grad_norm": 1.2206400632858276, "learning_rate": 5.542582617557141e-06, "log_odds_chosen": 0.5698705315589905, "log_odds_ratio": -0.5757175087928772, "logits/chosen": -0.4712826609611511, "logits/rejected": -0.48210567235946655, "logps/chosen": -0.9412860870361328, "logps/rejected": -1.3091002702713013, "loss": 0.9899, "nll_loss": 0.9323747754096985, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09412859380245209, "rewards/margins": 0.036781422793865204, "rewards/rejected": -0.1309100240468979, "step": 4320 }, { "epoch": 0.78, "grad_norm": 1.0580228567123413, "learning_rate": 5.5367593536177025e-06, "log_odds_chosen": 0.6712430119514465, "log_odds_ratio": -0.5941036939620972, "logits/chosen": -0.44383248686790466, "logits/rejected": -0.4895065426826477, "logps/chosen": -0.9147260785102844, "logps/rejected": -1.3670275211334229, "loss": 0.9959, "nll_loss": 0.9365051984786987, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09147261083126068, "rewards/margins": 0.04523014649748802, "rewards/rejected": -0.136702761054039, "step": 4330 }, { "epoch": 0.78, "grad_norm": 0.9036986827850342, "learning_rate": 5.530936089678264e-06, "log_odds_chosen": 0.5545127987861633, "log_odds_ratio": -0.59429532289505, "logits/chosen": -0.42196816205978394, "logits/rejected": -0.4064735472202301, "logps/chosen": -0.999508261680603, "logps/rejected": -1.366876482963562, "loss": 0.9843, "nll_loss": 0.9248775243759155, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09995082765817642, "rewards/margins": 0.03673681244254112, "rewards/rejected": -0.13668763637542725, "step": 4340 }, { "epoch": 0.79, "grad_norm": 0.7505858540534973, "learning_rate": 5.525112825738826e-06, "log_odds_chosen": 0.668211817741394, "log_odds_ratio": -0.557309091091156, "logits/chosen": -0.41097337007522583, "logits/rejected": -0.3994588553905487, "logps/chosen": -0.9319581985473633, "logps/rejected": -1.3750684261322021, "loss": 1.0205, "nll_loss": 0.9647325277328491, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09319581091403961, "rewards/margins": 0.0443110391497612, "rewards/rejected": -0.1375068575143814, "step": 4350 }, { "epoch": 0.79, "grad_norm": 1.393135666847229, "learning_rate": 5.519289561799389e-06, "log_odds_chosen": 1.0365409851074219, "log_odds_ratio": -0.4862661361694336, "logits/chosen": -0.36097291111946106, "logits/rejected": -0.4341823160648346, "logps/chosen": -0.850311279296875, "logps/rejected": -1.543900966644287, "loss": 0.933, "nll_loss": 0.884366512298584, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08503112196922302, "rewards/margins": 0.06935896724462509, "rewards/rejected": -0.15439006686210632, "step": 4360 }, { "epoch": 0.79, "grad_norm": 1.2937564849853516, "learning_rate": 5.51346629785995e-06, "log_odds_chosen": 0.7271625399589539, "log_odds_ratio": -0.5418993830680847, "logits/chosen": -0.408609539270401, "logits/rejected": -0.4532496929168701, "logps/chosen": -0.9302921295166016, "logps/rejected": -1.426904320716858, "loss": 1.0401, "nll_loss": 0.985892117023468, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0930292084813118, "rewards/margins": 0.04966122657060623, "rewards/rejected": -0.14269044995307922, "step": 4370 }, { "epoch": 0.79, "grad_norm": 1.5124766826629639, "learning_rate": 5.507643033920512e-06, "log_odds_chosen": 0.8994364738464355, "log_odds_ratio": -0.5436853170394897, "logits/chosen": -0.3987227976322174, "logits/rejected": -0.426923930644989, "logps/chosen": -0.857147216796875, "logps/rejected": -1.457137107849121, "loss": 0.9784, "nll_loss": 0.9239856600761414, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08571472018957138, "rewards/margins": 0.059999000281095505, "rewards/rejected": -0.1457137167453766, "step": 4380 }, { "epoch": 0.79, "grad_norm": 0.9244928956031799, "learning_rate": 5.501819769981075e-06, "log_odds_chosen": 0.7180790305137634, "log_odds_ratio": -0.595329225063324, "logits/chosen": -0.3936954140663147, "logits/rejected": -0.421609103679657, "logps/chosen": -1.0266858339309692, "logps/rejected": -1.4452166557312012, "loss": 1.0865, "nll_loss": 1.0269358158111572, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10266858339309692, "rewards/margins": 0.041853077709674835, "rewards/rejected": -0.14452166855335236, "step": 4390 }, { "epoch": 0.79, "grad_norm": 1.1748363971710205, "learning_rate": 5.495996506041636e-06, "log_odds_chosen": 0.8399966955184937, "log_odds_ratio": -0.559502363204956, "logits/chosen": -0.3925052881240845, "logits/rejected": -0.40640386939048767, "logps/chosen": -0.8787053823471069, "logps/rejected": -1.4464792013168335, "loss": 0.9667, "nll_loss": 0.9107036590576172, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08787054568529129, "rewards/margins": 0.05677737668156624, "rewards/rejected": -0.14464792609214783, "step": 4400 }, { "epoch": 0.8, "grad_norm": 1.6740537881851196, "learning_rate": 5.490173242102198e-06, "log_odds_chosen": 0.583748459815979, "log_odds_ratio": -0.6321566700935364, "logits/chosen": -0.4121069014072418, "logits/rejected": -0.44561561942100525, "logps/chosen": -1.0274041891098022, "logps/rejected": -1.407701849937439, "loss": 1.047, "nll_loss": 0.9837868809700012, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10274042934179306, "rewards/margins": 0.03802977129817009, "rewards/rejected": -0.14077021181583405, "step": 4410 }, { "epoch": 0.8, "grad_norm": 1.3400599956512451, "learning_rate": 5.48434997816276e-06, "log_odds_chosen": 0.5425913333892822, "log_odds_ratio": -0.6277004480361938, "logits/chosen": -0.44407615065574646, "logits/rejected": -0.46227678656578064, "logps/chosen": -1.0416014194488525, "logps/rejected": -1.4123096466064453, "loss": 1.0788, "nll_loss": 1.0160634517669678, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10416014492511749, "rewards/margins": 0.03707083314657211, "rewards/rejected": -0.141230970621109, "step": 4420 }, { "epoch": 0.8, "grad_norm": 5.0677666664123535, "learning_rate": 5.478526714223321e-06, "log_odds_chosen": 0.8549364805221558, "log_odds_ratio": -0.47799697518348694, "logits/chosen": -0.3967309594154358, "logits/rejected": -0.4370170533657074, "logps/chosen": -0.9174606204032898, "logps/rejected": -1.5248515605926514, "loss": 1.0526, "nll_loss": 1.0048235654830933, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09174606949090958, "rewards/margins": 0.06073909252882004, "rewards/rejected": -0.15248516201972961, "step": 4430 }, { "epoch": 0.8, "grad_norm": 1.2501968145370483, "learning_rate": 5.472703450283884e-06, "log_odds_chosen": 0.5208200216293335, "log_odds_ratio": -0.6153031587600708, "logits/chosen": -0.36386433243751526, "logits/rejected": -0.3963198661804199, "logps/chosen": -0.9649707674980164, "logps/rejected": -1.306401252746582, "loss": 0.9706, "nll_loss": 0.9090933799743652, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09649708122015, "rewards/margins": 0.03414304554462433, "rewards/rejected": -0.13064011931419373, "step": 4440 }, { "epoch": 0.8, "grad_norm": 1.1581801176071167, "learning_rate": 5.466880186344446e-06, "log_odds_chosen": 0.9284378886222839, "log_odds_ratio": -0.49612656235694885, "logits/chosen": -0.3325726389884949, "logits/rejected": -0.4070536494255066, "logps/chosen": -0.8295499682426453, "logps/rejected": -1.4266316890716553, "loss": 0.9135, "nll_loss": 0.8638966679573059, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08295499533414841, "rewards/margins": 0.059708189219236374, "rewards/rejected": -0.14266319572925568, "step": 4450 }, { "epoch": 0.81, "grad_norm": 1.7593944072723389, "learning_rate": 5.4610569224050074e-06, "log_odds_chosen": 0.684478223323822, "log_odds_ratio": -0.5533784627914429, "logits/chosen": -0.4242308735847473, "logits/rejected": -0.44120240211486816, "logps/chosen": -0.8975669145584106, "logps/rejected": -1.3506085872650146, "loss": 1.0322, "nll_loss": 0.9768352508544922, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08975669741630554, "rewards/margins": 0.04530417546629906, "rewards/rejected": -0.1350608766078949, "step": 4460 }, { "epoch": 0.81, "grad_norm": 0.8792012333869934, "learning_rate": 5.45523365846557e-06, "log_odds_chosen": 0.9008004069328308, "log_odds_ratio": -0.5196114778518677, "logits/chosen": -0.35840699076652527, "logits/rejected": -0.42450451850891113, "logps/chosen": -0.9610995054244995, "logps/rejected": -1.565719485282898, "loss": 0.9387, "nll_loss": 0.8867459297180176, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09610994905233383, "rewards/margins": 0.06046200543642044, "rewards/rejected": -0.15657195448875427, "step": 4470 }, { "epoch": 0.81, "grad_norm": 1.587249755859375, "learning_rate": 5.449410394526132e-06, "log_odds_chosen": 0.661650538444519, "log_odds_ratio": -0.578790545463562, "logits/chosen": -0.4225030541419983, "logits/rejected": -0.4778100848197937, "logps/chosen": -0.9953657984733582, "logps/rejected": -1.4192094802856445, "loss": 1.0387, "nll_loss": 0.9808080792427063, "rewards/accuracies": 0.625, "rewards/chosen": -0.09953657537698746, "rewards/margins": 0.042384374886751175, "rewards/rejected": -0.14192095398902893, "step": 4480 }, { "epoch": 0.81, "grad_norm": 1.2669217586517334, "learning_rate": 5.4435871305866935e-06, "log_odds_chosen": 0.9236847758293152, "log_odds_ratio": -0.5020954012870789, "logits/chosen": -0.382222443819046, "logits/rejected": -0.4366540014743805, "logps/chosen": -0.8376493453979492, "logps/rejected": -1.4251618385314941, "loss": 0.9316, "nll_loss": 0.8813702464103699, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0837649330496788, "rewards/margins": 0.05875125527381897, "rewards/rejected": -0.14251618087291718, "step": 4490 }, { "epoch": 0.81, "grad_norm": 1.1096593141555786, "learning_rate": 5.437763866647255e-06, "log_odds_chosen": 0.9368969798088074, "log_odds_ratio": -0.49079108238220215, "logits/chosen": -0.3573613166809082, "logits/rejected": -0.39419031143188477, "logps/chosen": -0.8301242589950562, "logps/rejected": -1.4331512451171875, "loss": 0.9555, "nll_loss": 0.9064178466796875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08301243185997009, "rewards/margins": 0.06030268594622612, "rewards/rejected": -0.1433151364326477, "step": 4500 }, { "epoch": 0.81, "grad_norm": 1.9662997722625732, "learning_rate": 5.431940602707817e-06, "log_odds_chosen": 0.9530878067016602, "log_odds_ratio": -0.5022386312484741, "logits/chosen": -0.3724839687347412, "logits/rejected": -0.3958912193775177, "logps/chosen": -0.9541193842887878, "logps/rejected": -1.603494644165039, "loss": 0.9589, "nll_loss": 0.908686637878418, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09541194140911102, "rewards/margins": 0.0649375170469284, "rewards/rejected": -0.16034945845603943, "step": 4510 }, { "epoch": 0.82, "grad_norm": 0.777763307094574, "learning_rate": 5.42611733876838e-06, "log_odds_chosen": 0.724637508392334, "log_odds_ratio": -0.5301684141159058, "logits/chosen": -0.4204856753349304, "logits/rejected": -0.42652568221092224, "logps/chosen": -0.8927604556083679, "logps/rejected": -1.378150463104248, "loss": 0.9923, "nll_loss": 0.9392973780632019, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08927605301141739, "rewards/margins": 0.048538997769355774, "rewards/rejected": -0.13781504333019257, "step": 4520 }, { "epoch": 0.82, "grad_norm": 0.9448487162590027, "learning_rate": 5.420294074828941e-06, "log_odds_chosen": 0.650304913520813, "log_odds_ratio": -0.5700558423995972, "logits/chosen": -0.42849674820899963, "logits/rejected": -0.476339727640152, "logps/chosen": -0.9701536893844604, "logps/rejected": -1.434536337852478, "loss": 1.0054, "nll_loss": 0.9483936429023743, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.097015380859375, "rewards/margins": 0.046438273042440414, "rewards/rejected": -0.14345364272594452, "step": 4530 }, { "epoch": 0.82, "grad_norm": 0.9929624199867249, "learning_rate": 5.414470810889503e-06, "log_odds_chosen": 0.7134051322937012, "log_odds_ratio": -0.5507954359054565, "logits/chosen": -0.379250705242157, "logits/rejected": -0.3842385709285736, "logps/chosen": -0.9411032795906067, "logps/rejected": -1.4669625759124756, "loss": 0.8943, "nll_loss": 0.8392144441604614, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09411032497882843, "rewards/margins": 0.05258594825863838, "rewards/rejected": -0.1466962844133377, "step": 4540 }, { "epoch": 0.82, "grad_norm": 1.1224552392959595, "learning_rate": 5.408647546950066e-06, "log_odds_chosen": 0.8553325533866882, "log_odds_ratio": -0.5414713621139526, "logits/chosen": -0.3530065715312958, "logits/rejected": -0.38633108139038086, "logps/chosen": -0.829110324382782, "logps/rejected": -1.3547379970550537, "loss": 0.936, "nll_loss": 0.8818100690841675, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08291102945804596, "rewards/margins": 0.052562762051820755, "rewards/rejected": -0.1354738026857376, "step": 4550 }, { "epoch": 0.82, "grad_norm": 0.7922306060791016, "learning_rate": 5.402824283010627e-06, "log_odds_chosen": 0.8722979426383972, "log_odds_ratio": -0.5566241145133972, "logits/chosen": -0.43379703164100647, "logits/rejected": -0.4889557957649231, "logps/chosen": -0.975425124168396, "logps/rejected": -1.6006053686141968, "loss": 1.0169, "nll_loss": 0.9612849354743958, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09754250943660736, "rewards/margins": 0.06251802295446396, "rewards/rejected": -0.1600605547428131, "step": 4560 }, { "epoch": 0.83, "grad_norm": 1.6481794118881226, "learning_rate": 5.3970010190711895e-06, "log_odds_chosen": 0.7812891602516174, "log_odds_ratio": -0.5631781816482544, "logits/chosen": -0.37542352080345154, "logits/rejected": -0.41719841957092285, "logps/chosen": -0.8898963928222656, "logps/rejected": -1.4438140392303467, "loss": 0.9596, "nll_loss": 0.9033276438713074, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08898963034152985, "rewards/margins": 0.05539176985621452, "rewards/rejected": -0.14438140392303467, "step": 4570 }, { "epoch": 0.83, "grad_norm": 1.1498469114303589, "learning_rate": 5.391177755131752e-06, "log_odds_chosen": 1.0272377729415894, "log_odds_ratio": -0.4711379408836365, "logits/chosen": -0.3855094313621521, "logits/rejected": -0.4190591871738434, "logps/chosen": -0.9228911399841309, "logps/rejected": -1.5633227825164795, "loss": 0.9365, "nll_loss": 0.8894192576408386, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.09228911995887756, "rewards/margins": 0.06404314935207367, "rewards/rejected": -0.15633228421211243, "step": 4580 }, { "epoch": 0.83, "grad_norm": 1.1245613098144531, "learning_rate": 5.385354491192312e-06, "log_odds_chosen": 0.9771651029586792, "log_odds_ratio": -0.4908887445926666, "logits/chosen": -0.42949455976486206, "logits/rejected": -0.5072841048240662, "logps/chosen": -0.8424657583236694, "logps/rejected": -1.5049989223480225, "loss": 0.9413, "nll_loss": 0.8921648263931274, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08424657583236694, "rewards/margins": 0.06625331938266754, "rewards/rejected": -0.15049989521503448, "step": 4590 }, { "epoch": 0.83, "grad_norm": 1.0176246166229248, "learning_rate": 5.379531227252875e-06, "log_odds_chosen": 0.9292768239974976, "log_odds_ratio": -0.48836570978164673, "logits/chosen": -0.46856871247291565, "logits/rejected": -0.49828463792800903, "logps/chosen": -1.0002449750900269, "logps/rejected": -1.6327403783798218, "loss": 0.9623, "nll_loss": 0.913472056388855, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1000244989991188, "rewards/margins": 0.06324952840805054, "rewards/rejected": -0.16327403485774994, "step": 4600 }, { "epoch": 0.83, "grad_norm": 1.0745056867599487, "learning_rate": 5.373707963313437e-06, "log_odds_chosen": 0.6398543119430542, "log_odds_ratio": -0.6533128619194031, "logits/chosen": -0.4060142934322357, "logits/rejected": -0.45436620712280273, "logps/chosen": -0.9138556718826294, "logps/rejected": -1.3905316591262817, "loss": 0.9979, "nll_loss": 0.9325958490371704, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09138555824756622, "rewards/margins": 0.04766761511564255, "rewards/rejected": -0.13905318081378937, "step": 4610 }, { "epoch": 0.83, "grad_norm": 1.0568596124649048, "learning_rate": 5.3678846993739985e-06, "log_odds_chosen": 0.8728116750717163, "log_odds_ratio": -0.48789605498313904, "logits/chosen": -0.4428630471229553, "logits/rejected": -0.493156760931015, "logps/chosen": -0.9080031514167786, "logps/rejected": -1.4789577722549438, "loss": 0.9531, "nll_loss": 0.9042849540710449, "rewards/accuracies": 0.75, "rewards/chosen": -0.09080030024051666, "rewards/margins": 0.057095468044281006, "rewards/rejected": -0.14789576828479767, "step": 4620 }, { "epoch": 0.84, "grad_norm": 0.9505348205566406, "learning_rate": 5.362061435434561e-06, "log_odds_chosen": 0.7823434472084045, "log_odds_ratio": -0.549795925617218, "logits/chosen": -0.4028090834617615, "logits/rejected": -0.42478498816490173, "logps/chosen": -0.8729179501533508, "logps/rejected": -1.3873783349990845, "loss": 0.9906, "nll_loss": 0.9356420636177063, "rewards/accuracies": 0.625, "rewards/chosen": -0.08729179203510284, "rewards/margins": 0.051446039229631424, "rewards/rejected": -0.13873784244060516, "step": 4630 }, { "epoch": 0.84, "grad_norm": 1.7304534912109375, "learning_rate": 5.356238171495123e-06, "log_odds_chosen": 0.6297367215156555, "log_odds_ratio": -0.5629245638847351, "logits/chosen": -0.444740355014801, "logits/rejected": -0.48241329193115234, "logps/chosen": -0.9954794049263, "logps/rejected": -1.4082391262054443, "loss": 0.985, "nll_loss": 0.9287381172180176, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09954794496297836, "rewards/margins": 0.04127597063779831, "rewards/rejected": -0.14082391560077667, "step": 4640 }, { "epoch": 0.84, "grad_norm": 0.9871335029602051, "learning_rate": 5.3504149075556846e-06, "log_odds_chosen": 0.8629505038261414, "log_odds_ratio": -0.4875113368034363, "logits/chosen": -0.36363014578819275, "logits/rejected": -0.4274236559867859, "logps/chosen": -0.9839668273925781, "logps/rejected": -1.553021788597107, "loss": 0.9214, "nll_loss": 0.8726351857185364, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0983966737985611, "rewards/margins": 0.05690549686551094, "rewards/rejected": -0.15530219674110413, "step": 4650 }, { "epoch": 0.84, "grad_norm": 1.7025538682937622, "learning_rate": 5.344591643616247e-06, "log_odds_chosen": 0.7422502636909485, "log_odds_ratio": -0.564644455909729, "logits/chosen": -0.3728085458278656, "logits/rejected": -0.4222579896450043, "logps/chosen": -0.9774104952812195, "logps/rejected": -1.4680044651031494, "loss": 0.9695, "nll_loss": 0.9130756258964539, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09774105250835419, "rewards/margins": 0.04905937984585762, "rewards/rejected": -0.1468004435300827, "step": 4660 }, { "epoch": 0.84, "grad_norm": 1.6551975011825562, "learning_rate": 5.338768379676809e-06, "log_odds_chosen": 0.8004302978515625, "log_odds_ratio": -0.5492068529129028, "logits/chosen": -0.4279244840145111, "logits/rejected": -0.44948825240135193, "logps/chosen": -0.9123814702033997, "logps/rejected": -1.4314771890640259, "loss": 0.931, "nll_loss": 0.8761026263237, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09123813360929489, "rewards/margins": 0.05190957337617874, "rewards/rejected": -0.14314770698547363, "step": 4670 }, { "epoch": 0.85, "grad_norm": 1.4915745258331299, "learning_rate": 5.33294511573737e-06, "log_odds_chosen": 0.8939487338066101, "log_odds_ratio": -0.5288721323013306, "logits/chosen": -0.4202663004398346, "logits/rejected": -0.44463270902633667, "logps/chosen": -0.905720591545105, "logps/rejected": -1.4950666427612305, "loss": 0.9727, "nll_loss": 0.9198009371757507, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0905720591545105, "rewards/margins": 0.058934617787599564, "rewards/rejected": -0.14950667321681976, "step": 4680 }, { "epoch": 0.85, "grad_norm": 1.170964241027832, "learning_rate": 5.327121851797932e-06, "log_odds_chosen": 0.7633172869682312, "log_odds_ratio": -0.578994870185852, "logits/chosen": -0.4211356043815613, "logits/rejected": -0.4459422528743744, "logps/chosen": -0.9920495748519897, "logps/rejected": -1.4625046253204346, "loss": 0.9584, "nll_loss": 0.9004647135734558, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09920495003461838, "rewards/margins": 0.0470455177128315, "rewards/rejected": -0.14625045657157898, "step": 4690 }, { "epoch": 0.85, "grad_norm": 1.534651756286621, "learning_rate": 5.321298587858494e-06, "log_odds_chosen": 0.7604216933250427, "log_odds_ratio": -0.6054350733757019, "logits/chosen": -0.39500564336776733, "logits/rejected": -0.43382158875465393, "logps/chosen": -0.9889154434204102, "logps/rejected": -1.4807031154632568, "loss": 0.9964, "nll_loss": 0.9358501434326172, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09889154881238937, "rewards/margins": 0.049178753048181534, "rewards/rejected": -0.1480703055858612, "step": 4700 }, { "epoch": 0.85, "grad_norm": 1.5456305742263794, "learning_rate": 5.315475323919056e-06, "log_odds_chosen": 0.44413527846336365, "log_odds_ratio": -0.6286059617996216, "logits/chosen": -0.4555909037590027, "logits/rejected": -0.48065558075904846, "logps/chosen": -1.0448988676071167, "logps/rejected": -1.3062857389450073, "loss": 1.0519, "nll_loss": 0.9890422821044922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10448990762233734, "rewards/margins": 0.026138659566640854, "rewards/rejected": -0.1306285560131073, "step": 4710 }, { "epoch": 0.85, "grad_norm": 1.510901689529419, "learning_rate": 5.309652059979618e-06, "log_odds_chosen": 1.0771484375, "log_odds_ratio": -0.49190235137939453, "logits/chosen": -0.38036924600601196, "logits/rejected": -0.4628377854824066, "logps/chosen": -0.8989608883857727, "logps/rejected": -1.5778999328613281, "loss": 0.8991, "nll_loss": 0.8498629331588745, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08989609032869339, "rewards/margins": 0.06789391487836838, "rewards/rejected": -0.15779002010822296, "step": 4720 }, { "epoch": 0.85, "grad_norm": 1.0319769382476807, "learning_rate": 5.3038287960401805e-06, "log_odds_chosen": 0.6651164889335632, "log_odds_ratio": -0.5910425782203674, "logits/chosen": -0.42693108320236206, "logits/rejected": -0.470429003238678, "logps/chosen": -0.8990481495857239, "logps/rejected": -1.3087271451950073, "loss": 1.007, "nll_loss": 0.9479179382324219, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08990481495857239, "rewards/margins": 0.0409679040312767, "rewards/rejected": -0.1308727115392685, "step": 4730 }, { "epoch": 0.86, "grad_norm": 0.9942054748535156, "learning_rate": 5.298005532100743e-06, "log_odds_chosen": 0.5316375494003296, "log_odds_ratio": -0.6433253288269043, "logits/chosen": -0.4612106382846832, "logits/rejected": -0.45017772912979126, "logps/chosen": -1.0055973529815674, "logps/rejected": -1.338026762008667, "loss": 1.0462, "nll_loss": 0.981835663318634, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10055973380804062, "rewards/margins": 0.03324293717741966, "rewards/rejected": -0.13380268216133118, "step": 4740 }, { "epoch": 0.86, "grad_norm": 1.2705566883087158, "learning_rate": 5.292182268161304e-06, "log_odds_chosen": 1.0579116344451904, "log_odds_ratio": -0.4778444170951843, "logits/chosen": -0.3648456037044525, "logits/rejected": -0.42490434646606445, "logps/chosen": -0.8988125920295715, "logps/rejected": -1.6541303396224976, "loss": 0.9395, "nll_loss": 0.8917468786239624, "rewards/accuracies": 0.75, "rewards/chosen": -0.08988126367330551, "rewards/margins": 0.07553178071975708, "rewards/rejected": -0.1654130518436432, "step": 4750 }, { "epoch": 0.86, "grad_norm": 1.1533735990524292, "learning_rate": 5.2863590042218666e-06, "log_odds_chosen": 0.6809478998184204, "log_odds_ratio": -0.5645762085914612, "logits/chosen": -0.4856683611869812, "logits/rejected": -0.46850576996803284, "logps/chosen": -1.0219817161560059, "logps/rejected": -1.4996047019958496, "loss": 1.02, "nll_loss": 0.9635196924209595, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.10219816863536835, "rewards/margins": 0.04776228964328766, "rewards/rejected": -0.149960458278656, "step": 4760 }, { "epoch": 0.86, "grad_norm": 1.5749801397323608, "learning_rate": 5.280535740282428e-06, "log_odds_chosen": 0.7470858693122864, "log_odds_ratio": -0.5543025732040405, "logits/chosen": -0.4672362804412842, "logits/rejected": -0.46102237701416016, "logps/chosen": -0.8804488182067871, "logps/rejected": -1.3649325370788574, "loss": 0.9422, "nll_loss": 0.8867942690849304, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08804487437009811, "rewards/margins": 0.048448383808135986, "rewards/rejected": -0.1364932507276535, "step": 4770 }, { "epoch": 0.86, "grad_norm": 1.3381456136703491, "learning_rate": 5.2747124763429895e-06, "log_odds_chosen": 0.8641374707221985, "log_odds_ratio": -0.5256321430206299, "logits/chosen": -0.41251951456069946, "logits/rejected": -0.44938522577285767, "logps/chosen": -0.9954856038093567, "logps/rejected": -1.5450937747955322, "loss": 1.0149, "nll_loss": 0.9623681902885437, "rewards/accuracies": 0.75, "rewards/chosen": -0.09954856336116791, "rewards/margins": 0.05496079847216606, "rewards/rejected": -0.15450936555862427, "step": 4780 }, { "epoch": 0.87, "grad_norm": 1.0874555110931396, "learning_rate": 5.268889212403552e-06, "log_odds_chosen": 0.7685006260871887, "log_odds_ratio": -0.5665751695632935, "logits/chosen": -0.4145042300224304, "logits/rejected": -0.471210241317749, "logps/chosen": -1.0652424097061157, "logps/rejected": -1.5898144245147705, "loss": 1.087, "nll_loss": 1.0303508043289185, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10652424395084381, "rewards/margins": 0.052457213401794434, "rewards/rejected": -0.15898147225379944, "step": 4790 }, { "epoch": 0.87, "grad_norm": 1.6360236406326294, "learning_rate": 5.263065948464114e-06, "log_odds_chosen": 0.9018100500106812, "log_odds_ratio": -0.4937785267829895, "logits/chosen": -0.39242830872535706, "logits/rejected": -0.43380218744277954, "logps/chosen": -0.9768654108047485, "logps/rejected": -1.5783965587615967, "loss": 0.9547, "nll_loss": 0.905341625213623, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09768654406070709, "rewards/margins": 0.060153115540742874, "rewards/rejected": -0.15783965587615967, "step": 4800 }, { "epoch": 0.87, "grad_norm": 1.1695739030838013, "learning_rate": 5.257242684524676e-06, "log_odds_chosen": 0.7925978899002075, "log_odds_ratio": -0.5527957677841187, "logits/chosen": -0.40756258368492126, "logits/rejected": -0.40037378668785095, "logps/chosen": -0.9216617345809937, "logps/rejected": -1.4269583225250244, "loss": 0.9425, "nll_loss": 0.8872434496879578, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09216617792844772, "rewards/margins": 0.050529658794403076, "rewards/rejected": -0.1426958292722702, "step": 4810 }, { "epoch": 0.87, "grad_norm": 1.5395045280456543, "learning_rate": 5.251419420585238e-06, "log_odds_chosen": 0.7387697100639343, "log_odds_ratio": -0.6026879549026489, "logits/chosen": -0.505529522895813, "logits/rejected": -0.5170288681983948, "logps/chosen": -1.066630482673645, "logps/rejected": -1.5880917310714722, "loss": 1.0232, "nll_loss": 0.9629791378974915, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1066630631685257, "rewards/margins": 0.052146125584840775, "rewards/rejected": -0.15880918502807617, "step": 4820 }, { "epoch": 0.87, "grad_norm": 1.0072517395019531, "learning_rate": 5.2455961566458e-06, "log_odds_chosen": 0.978954017162323, "log_odds_ratio": -0.4921804368495941, "logits/chosen": -0.4218106269836426, "logits/rejected": -0.4727579951286316, "logps/chosen": -0.8569656610488892, "logps/rejected": -1.5145273208618164, "loss": 0.9357, "nll_loss": 0.886450469493866, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08569655567407608, "rewards/margins": 0.0657561793923378, "rewards/rejected": -0.15145273506641388, "step": 4830 }, { "epoch": 0.87, "grad_norm": 1.023287057876587, "learning_rate": 5.239772892706362e-06, "log_odds_chosen": 0.9301230311393738, "log_odds_ratio": -0.5898328423500061, "logits/chosen": -0.4207271933555603, "logits/rejected": -0.439255952835083, "logps/chosen": -0.932141125202179, "logps/rejected": -1.6405792236328125, "loss": 0.9573, "nll_loss": 0.898327648639679, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09321411699056625, "rewards/margins": 0.07084380090236664, "rewards/rejected": -0.1640579104423523, "step": 4840 }, { "epoch": 0.88, "grad_norm": 0.7330833077430725, "learning_rate": 5.233949628766924e-06, "log_odds_chosen": 0.7396227717399597, "log_odds_ratio": -0.5504485368728638, "logits/chosen": -0.3369649052619934, "logits/rejected": -0.42328447103500366, "logps/chosen": -0.8521198034286499, "logps/rejected": -1.316091775894165, "loss": 0.9385, "nll_loss": 0.8834612965583801, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08521198481321335, "rewards/margins": 0.046397190541028976, "rewards/rejected": -0.13160917162895203, "step": 4850 }, { "epoch": 0.88, "grad_norm": 1.677101969718933, "learning_rate": 5.2281263648274854e-06, "log_odds_chosen": 1.0122201442718506, "log_odds_ratio": -0.5119179487228394, "logits/chosen": -0.43187013268470764, "logits/rejected": -0.4635971188545227, "logps/chosen": -0.9174407124519348, "logps/rejected": -1.548681616783142, "loss": 1.0155, "nll_loss": 0.9643124341964722, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0917440727353096, "rewards/margins": 0.06312408298254013, "rewards/rejected": -0.15486815571784973, "step": 4860 }, { "epoch": 0.88, "grad_norm": 1.1978373527526855, "learning_rate": 5.222303100888047e-06, "log_odds_chosen": 0.5320814251899719, "log_odds_ratio": -0.6548636555671692, "logits/chosen": -0.38169607520103455, "logits/rejected": -0.3945736885070801, "logps/chosen": -0.9942175149917603, "logps/rejected": -1.326147198677063, "loss": 1.0518, "nll_loss": 0.9862810969352722, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09942174702882767, "rewards/margins": 0.03319296985864639, "rewards/rejected": -0.13261471688747406, "step": 4870 }, { "epoch": 0.88, "grad_norm": 0.9683055281639099, "learning_rate": 5.216479836948609e-06, "log_odds_chosen": 0.6372218728065491, "log_odds_ratio": -0.559242844581604, "logits/chosen": -0.4606989324092865, "logits/rejected": -0.4654863476753235, "logps/chosen": -0.9736550450325012, "logps/rejected": -1.3879189491271973, "loss": 1.0846, "nll_loss": 1.0286335945129395, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09736550599336624, "rewards/margins": 0.0414263978600502, "rewards/rejected": -0.13879190385341644, "step": 4880 }, { "epoch": 0.88, "grad_norm": 0.8117696046829224, "learning_rate": 5.2106565730091715e-06, "log_odds_chosen": 0.6601318120956421, "log_odds_ratio": -0.5823066234588623, "logits/chosen": -0.41589921712875366, "logits/rejected": -0.45476651191711426, "logps/chosen": -0.9463253021240234, "logps/rejected": -1.3839843273162842, "loss": 0.991, "nll_loss": 0.9328001737594604, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09463252872228622, "rewards/margins": 0.043765921145677567, "rewards/rejected": -0.1383984386920929, "step": 4890 }, { "epoch": 0.89, "grad_norm": 0.7774304747581482, "learning_rate": 5.204833309069733e-06, "log_odds_chosen": 0.5918189287185669, "log_odds_ratio": -0.5761577486991882, "logits/chosen": -0.4116500914096832, "logits/rejected": -0.44521966576576233, "logps/chosen": -0.9466740489006042, "logps/rejected": -1.3614647388458252, "loss": 0.9578, "nll_loss": 0.90022212266922, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09466739743947983, "rewards/margins": 0.041479066014289856, "rewards/rejected": -0.13614647090435028, "step": 4900 }, { "epoch": 0.89, "grad_norm": 1.037978172302246, "learning_rate": 5.199010045130295e-06, "log_odds_chosen": 0.686073899269104, "log_odds_ratio": -0.5590696930885315, "logits/chosen": -0.42658406496047974, "logits/rejected": -0.4590669572353363, "logps/chosen": -0.9395986795425415, "logps/rejected": -1.4080203771591187, "loss": 0.986, "nll_loss": 0.9300875663757324, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09395986050367355, "rewards/margins": 0.04684218019247055, "rewards/rejected": -0.1408020555973053, "step": 4910 }, { "epoch": 0.89, "grad_norm": 1.1430654525756836, "learning_rate": 5.193186781190858e-06, "log_odds_chosen": 0.8383340835571289, "log_odds_ratio": -0.5327781438827515, "logits/chosen": -0.4290473461151123, "logits/rejected": -0.4340090751647949, "logps/chosen": -0.8470977544784546, "logps/rejected": -1.4011995792388916, "loss": 0.9516, "nll_loss": 0.8983678817749023, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0847097784280777, "rewards/margins": 0.055410176515579224, "rewards/rejected": -0.14011994004249573, "step": 4920 }, { "epoch": 0.89, "grad_norm": 1.4240922927856445, "learning_rate": 5.18736351725142e-06, "log_odds_chosen": 0.6269794702529907, "log_odds_ratio": -0.5663076639175415, "logits/chosen": -0.44721508026123047, "logits/rejected": -0.49740925431251526, "logps/chosen": -0.9577927589416504, "logps/rejected": -1.383522629737854, "loss": 0.9916, "nll_loss": 0.9349943399429321, "rewards/accuracies": 0.625, "rewards/chosen": -0.09577927738428116, "rewards/margins": 0.0425729937851429, "rewards/rejected": -0.13835227489471436, "step": 4930 }, { "epoch": 0.89, "grad_norm": 2.0192978382110596, "learning_rate": 5.181540253311981e-06, "log_odds_chosen": 0.7749100923538208, "log_odds_ratio": -0.5209966897964478, "logits/chosen": -0.43163347244262695, "logits/rejected": -0.4931555688381195, "logps/chosen": -0.9455229043960571, "logps/rejected": -1.4639647006988525, "loss": 1.0147, "nll_loss": 0.962630569934845, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09455229341983795, "rewards/margins": 0.051844190806150436, "rewards/rejected": -0.1463964879512787, "step": 4940 }, { "epoch": 0.89, "grad_norm": 2.5260753631591797, "learning_rate": 5.175716989372543e-06, "log_odds_chosen": 0.7062619924545288, "log_odds_ratio": -0.5886339545249939, "logits/chosen": -0.3936625123023987, "logits/rejected": -0.4544674754142761, "logps/chosen": -1.0079313516616821, "logps/rejected": -1.518587350845337, "loss": 0.9657, "nll_loss": 0.9068438410758972, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10079314559698105, "rewards/margins": 0.051065593957901, "rewards/rejected": -0.15185873210430145, "step": 4950 }, { "epoch": 0.9, "grad_norm": 1.9594887495040894, "learning_rate": 5.169893725433105e-06, "log_odds_chosen": 0.9933183789253235, "log_odds_ratio": -0.5047029256820679, "logits/chosen": -0.42144671082496643, "logits/rejected": -0.4574872851371765, "logps/chosen": -0.9184238314628601, "logps/rejected": -1.5921399593353271, "loss": 1.0062, "nll_loss": 0.9557191133499146, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09184238314628601, "rewards/margins": 0.06737162172794342, "rewards/rejected": -0.15921400487422943, "step": 4960 }, { "epoch": 0.9, "grad_norm": 2.419018268585205, "learning_rate": 5.164070461493667e-06, "log_odds_chosen": 0.42638593912124634, "log_odds_ratio": -0.5967411994934082, "logits/chosen": -0.4372042119503021, "logits/rejected": -0.4627193510532379, "logps/chosen": -1.0391814708709717, "logps/rejected": -1.3309781551361084, "loss": 0.9981, "nll_loss": 0.938385009765625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10391815751791, "rewards/margins": 0.02917967364192009, "rewards/rejected": -0.1330978125333786, "step": 4970 }, { "epoch": 0.9, "grad_norm": 0.9298260807991028, "learning_rate": 5.158247197554229e-06, "log_odds_chosen": 0.641968846321106, "log_odds_ratio": -0.5857947468757629, "logits/chosen": -0.3784220218658447, "logits/rejected": -0.45632854104042053, "logps/chosen": -0.8558870553970337, "logps/rejected": -1.2764778137207031, "loss": 1.0052, "nll_loss": 0.9466080665588379, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08558870851993561, "rewards/margins": 0.04205907881259918, "rewards/rejected": -0.1276477873325348, "step": 4980 }, { "epoch": 0.9, "grad_norm": 1.8054282665252686, "learning_rate": 5.152423933614791e-06, "log_odds_chosen": 0.6764596700668335, "log_odds_ratio": -0.5754591822624207, "logits/chosen": -0.4195118546485901, "logits/rejected": -0.46894583106040955, "logps/chosen": -0.9912070035934448, "logps/rejected": -1.4474741220474243, "loss": 0.9957, "nll_loss": 0.9381793141365051, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09912069886922836, "rewards/margins": 0.04562670364975929, "rewards/rejected": -0.14474740624427795, "step": 4990 }, { "epoch": 0.9, "grad_norm": 0.8131672739982605, "learning_rate": 5.146600669675353e-06, "log_odds_chosen": 0.765887439250946, "log_odds_ratio": -0.5449743866920471, "logits/chosen": -0.383352130651474, "logits/rejected": -0.4293951392173767, "logps/chosen": -0.825584888458252, "logps/rejected": -1.3375442028045654, "loss": 0.9055, "nll_loss": 0.8509842753410339, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08255849033594131, "rewards/margins": 0.05119592696428299, "rewards/rejected": -0.1337544023990631, "step": 5000 }, { "epoch": 0.91, "grad_norm": 1.0602699518203735, "learning_rate": 5.140777405735915e-06, "log_odds_chosen": 0.7626697421073914, "log_odds_ratio": -0.5582002401351929, "logits/chosen": -0.4061199724674225, "logits/rejected": -0.4781287610530853, "logps/chosen": -0.942223072052002, "logps/rejected": -1.4417130947113037, "loss": 0.9715, "nll_loss": 0.9156550168991089, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.094222292304039, "rewards/margins": 0.04994902387261391, "rewards/rejected": -0.1441713124513626, "step": 5010 }, { "epoch": 0.91, "grad_norm": 0.9965958595275879, "learning_rate": 5.134954141796477e-06, "log_odds_chosen": 0.5501303672790527, "log_odds_ratio": -0.5543341636657715, "logits/chosen": -0.3945973217487335, "logits/rejected": -0.4275182783603668, "logps/chosen": -0.8684164881706238, "logps/rejected": -1.2159730195999146, "loss": 0.9075, "nll_loss": 0.8520703315734863, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0868416577577591, "rewards/margins": 0.0347556471824646, "rewards/rejected": -0.1215972900390625, "step": 5020 }, { "epoch": 0.91, "grad_norm": 1.8370288610458374, "learning_rate": 5.129130877857039e-06, "log_odds_chosen": 0.9271780848503113, "log_odds_ratio": -0.49825724959373474, "logits/chosen": -0.383684366941452, "logits/rejected": -0.44511160254478455, "logps/chosen": -0.8650614619255066, "logps/rejected": -1.4556103944778442, "loss": 1.0062, "nll_loss": 0.9563736915588379, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08650614321231842, "rewards/margins": 0.059054892510175705, "rewards/rejected": -0.14556102454662323, "step": 5030 }, { "epoch": 0.91, "grad_norm": 1.0370079278945923, "learning_rate": 5.1233076139176e-06, "log_odds_chosen": 0.5564397573471069, "log_odds_ratio": -0.6237277388572693, "logits/chosen": -0.46575039625167847, "logits/rejected": -0.4472903609275818, "logps/chosen": -0.9088039398193359, "logps/rejected": -1.2560622692108154, "loss": 1.0217, "nll_loss": 0.9593534469604492, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0908803939819336, "rewards/margins": 0.03472583740949631, "rewards/rejected": -0.1256062239408493, "step": 5040 }, { "epoch": 0.91, "grad_norm": 1.349543571472168, "learning_rate": 5.1174843499781625e-06, "log_odds_chosen": 0.8041173219680786, "log_odds_ratio": -0.5334728956222534, "logits/chosen": -0.5147528052330017, "logits/rejected": -0.528853714466095, "logps/chosen": -1.0080214738845825, "logps/rejected": -1.5837531089782715, "loss": 1.0237, "nll_loss": 0.9703797101974487, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10080213844776154, "rewards/margins": 0.05757317692041397, "rewards/rejected": -0.1583753228187561, "step": 5050 }, { "epoch": 0.91, "grad_norm": 0.9374119639396667, "learning_rate": 5.111661086038724e-06, "log_odds_chosen": 0.588254988193512, "log_odds_ratio": -0.6173927187919617, "logits/chosen": -0.3634551167488098, "logits/rejected": -0.4189843237400055, "logps/chosen": -0.9690176248550415, "logps/rejected": -1.4033445119857788, "loss": 0.9763, "nll_loss": 0.9145170450210571, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09690175950527191, "rewards/margins": 0.04343269020318985, "rewards/rejected": -0.14033445715904236, "step": 5060 }, { "epoch": 0.92, "grad_norm": 1.4300905466079712, "learning_rate": 5.105837822099286e-06, "log_odds_chosen": 0.9700382351875305, "log_odds_ratio": -0.5175814032554626, "logits/chosen": -0.417670875787735, "logits/rejected": -0.43058285117149353, "logps/chosen": -0.8614059686660767, "logps/rejected": -1.5537515878677368, "loss": 0.9059, "nll_loss": 0.8541040420532227, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08614059537649155, "rewards/margins": 0.06923457235097885, "rewards/rejected": -0.1553751528263092, "step": 5070 }, { "epoch": 0.92, "grad_norm": 1.2038459777832031, "learning_rate": 5.100014558159849e-06, "log_odds_chosen": 0.8242164850234985, "log_odds_ratio": -0.5438815355300903, "logits/chosen": -0.4557031989097595, "logits/rejected": -0.47849076986312866, "logps/chosen": -1.0131621360778809, "logps/rejected": -1.5966014862060547, "loss": 0.9387, "nll_loss": 0.8842656016349792, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10131619870662689, "rewards/margins": 0.05834393948316574, "rewards/rejected": -0.15966013073921204, "step": 5080 }, { "epoch": 0.92, "grad_norm": 1.4084062576293945, "learning_rate": 5.09419129422041e-06, "log_odds_chosen": 0.8046883344650269, "log_odds_ratio": -0.5245261192321777, "logits/chosen": -0.39046531915664673, "logits/rejected": -0.42180243134498596, "logps/chosen": -1.027181625366211, "logps/rejected": -1.5842394828796387, "loss": 1.0032, "nll_loss": 0.950783371925354, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10271817445755005, "rewards/margins": 0.055705778300762177, "rewards/rejected": -0.15842394530773163, "step": 5090 }, { "epoch": 0.92, "grad_norm": 1.1398626565933228, "learning_rate": 5.088368030280972e-06, "log_odds_chosen": 0.7871803045272827, "log_odds_ratio": -0.5711994767189026, "logits/chosen": -0.42440158128738403, "logits/rejected": -0.42357882857322693, "logps/chosen": -0.8840295076370239, "logps/rejected": -1.4321167469024658, "loss": 0.9717, "nll_loss": 0.9145844578742981, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08840294927358627, "rewards/margins": 0.05480872467160225, "rewards/rejected": -0.14321167767047882, "step": 5100 }, { "epoch": 0.92, "grad_norm": 1.2491414546966553, "learning_rate": 5.082544766341535e-06, "log_odds_chosen": 0.7577713131904602, "log_odds_ratio": -0.5364837646484375, "logits/chosen": -0.4503016471862793, "logits/rejected": -0.4818963408470154, "logps/chosen": -0.9299535751342773, "logps/rejected": -1.4110002517700195, "loss": 0.9741, "nll_loss": 0.9204285740852356, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09299536049365997, "rewards/margins": 0.0481046661734581, "rewards/rejected": -0.14110003411769867, "step": 5110 }, { "epoch": 0.92, "grad_norm": 1.3868515491485596, "learning_rate": 5.076721502402095e-06, "log_odds_chosen": 0.9328304529190063, "log_odds_ratio": -0.5016459226608276, "logits/chosen": -0.47459521889686584, "logits/rejected": -0.468188613653183, "logps/chosen": -0.9387983083724976, "logps/rejected": -1.6026138067245483, "loss": 1.0102, "nll_loss": 0.9600510597229004, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09387984126806259, "rewards/margins": 0.06638153642416, "rewards/rejected": -0.1602613925933838, "step": 5120 }, { "epoch": 0.93, "grad_norm": 1.8675638437271118, "learning_rate": 5.070898238462658e-06, "log_odds_chosen": 0.8587830662727356, "log_odds_ratio": -0.5065292119979858, "logits/chosen": -0.4888245165348053, "logits/rejected": -0.501750648021698, "logps/chosen": -0.9960910081863403, "logps/rejected": -1.5872774124145508, "loss": 1.0089, "nll_loss": 0.9582953453063965, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09960909932851791, "rewards/margins": 0.05911865830421448, "rewards/rejected": -0.158727765083313, "step": 5130 }, { "epoch": 0.93, "grad_norm": 1.1439307928085327, "learning_rate": 5.06507497452322e-06, "log_odds_chosen": 0.7081592679023743, "log_odds_ratio": -0.5490130186080933, "logits/chosen": -0.4350952208042145, "logits/rejected": -0.46510592103004456, "logps/chosen": -0.8627880215644836, "logps/rejected": -1.3226877450942993, "loss": 0.9911, "nll_loss": 0.936205267906189, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08627880364656448, "rewards/margins": 0.04598996788263321, "rewards/rejected": -0.1322687715291977, "step": 5140 }, { "epoch": 0.93, "grad_norm": 1.0725418329238892, "learning_rate": 5.059251710583782e-06, "log_odds_chosen": 0.5310506224632263, "log_odds_ratio": -0.6339167356491089, "logits/chosen": -0.5025271773338318, "logits/rejected": -0.525850236415863, "logps/chosen": -1.002502679824829, "logps/rejected": -1.3719778060913086, "loss": 1.0611, "nll_loss": 0.997689425945282, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10025026649236679, "rewards/margins": 0.03694751486182213, "rewards/rejected": -0.13719777762889862, "step": 5150 }, { "epoch": 0.93, "grad_norm": 2.5402228832244873, "learning_rate": 5.053428446644344e-06, "log_odds_chosen": 0.5080104470252991, "log_odds_ratio": -0.6327738165855408, "logits/chosen": -0.4841841757297516, "logits/rejected": -0.4749310612678528, "logps/chosen": -0.9421554803848267, "logps/rejected": -1.2996408939361572, "loss": 0.9475, "nll_loss": 0.8842074275016785, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09421554952859879, "rewards/margins": 0.03574854135513306, "rewards/rejected": -0.12996408343315125, "step": 5160 }, { "epoch": 0.93, "grad_norm": 1.4586751461029053, "learning_rate": 5.047605182704906e-06, "log_odds_chosen": 0.6196699738502502, "log_odds_ratio": -0.5867195129394531, "logits/chosen": -0.4862591624259949, "logits/rejected": -0.47595709562301636, "logps/chosen": -1.0899220705032349, "logps/rejected": -1.5220587253570557, "loss": 1.0038, "nll_loss": 0.9451197385787964, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10899219661951065, "rewards/margins": 0.043213676661252975, "rewards/rejected": -0.15220588445663452, "step": 5170 }, { "epoch": 0.94, "grad_norm": 1.0485233068466187, "learning_rate": 5.041781918765468e-06, "log_odds_chosen": 0.7615898847579956, "log_odds_ratio": -0.5628899335861206, "logits/chosen": -0.4379093647003174, "logits/rejected": -0.4398167133331299, "logps/chosen": -0.9211156964302063, "logps/rejected": -1.4053175449371338, "loss": 1.0219, "nll_loss": 0.9656468629837036, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09211156517267227, "rewards/margins": 0.04842020198702812, "rewards/rejected": -0.1405317485332489, "step": 5180 }, { "epoch": 0.94, "grad_norm": 1.5329257249832153, "learning_rate": 5.03595865482603e-06, "log_odds_chosen": 0.6447916030883789, "log_odds_ratio": -0.5756027698516846, "logits/chosen": -0.3828571140766144, "logits/rejected": -0.4486420750617981, "logps/chosen": -0.9009901881217957, "logps/rejected": -1.3342673778533936, "loss": 1.0371, "nll_loss": 0.9795511960983276, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0900990217924118, "rewards/margins": 0.04332772642374039, "rewards/rejected": -0.1334267556667328, "step": 5190 }, { "epoch": 0.94, "grad_norm": 1.021886944770813, "learning_rate": 5.030135390886592e-06, "log_odds_chosen": 1.0003609657287598, "log_odds_ratio": -0.5113279819488525, "logits/chosen": -0.38337117433547974, "logits/rejected": -0.4203677177429199, "logps/chosen": -0.812484860420227, "logps/rejected": -1.4831180572509766, "loss": 0.8823, "nll_loss": 0.8311184048652649, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08124849200248718, "rewards/margins": 0.06706332415342331, "rewards/rejected": -0.1483118236064911, "step": 5200 }, { "epoch": 0.94, "grad_norm": 1.1141126155853271, "learning_rate": 5.024312126947154e-06, "log_odds_chosen": 1.0493788719177246, "log_odds_ratio": -0.5095570683479309, "logits/chosen": -0.3903278708457947, "logits/rejected": -0.45845937728881836, "logps/chosen": -0.9482452273368835, "logps/rejected": -1.6919740438461304, "loss": 0.9605, "nll_loss": 0.9095357656478882, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09482452273368835, "rewards/margins": 0.07437288761138916, "rewards/rejected": -0.16919739544391632, "step": 5210 }, { "epoch": 0.94, "grad_norm": 1.2961907386779785, "learning_rate": 5.018488863007715e-06, "log_odds_chosen": 0.9118143320083618, "log_odds_ratio": -0.4949149489402771, "logits/chosen": -0.42776647210121155, "logits/rejected": -0.49795588850975037, "logps/chosen": -1.0686237812042236, "logps/rejected": -1.7282140254974365, "loss": 1.0082, "nll_loss": 0.9587093591690063, "rewards/accuracies": 0.75, "rewards/chosen": -0.1068623811006546, "rewards/margins": 0.06595902144908905, "rewards/rejected": -0.17282140254974365, "step": 5220 }, { "epoch": 0.94, "grad_norm": 1.5598081350326538, "learning_rate": 5.012665599068277e-06, "log_odds_chosen": 0.5592025518417358, "log_odds_ratio": -0.583517849445343, "logits/chosen": -0.4597102105617523, "logits/rejected": -0.49349212646484375, "logps/chosen": -0.9325317144393921, "logps/rejected": -1.2962085008621216, "loss": 1.0022, "nll_loss": 0.9438241720199585, "rewards/accuracies": 0.625, "rewards/chosen": -0.09325318038463593, "rewards/margins": 0.03636767342686653, "rewards/rejected": -0.12962085008621216, "step": 5230 }, { "epoch": 0.95, "grad_norm": 1.3146644830703735, "learning_rate": 5.00684233512884e-06, "log_odds_chosen": 0.7231449484825134, "log_odds_ratio": -0.5562411546707153, "logits/chosen": -0.47616177797317505, "logits/rejected": -0.5005732178688049, "logps/chosen": -0.9185335040092468, "logps/rejected": -1.388819932937622, "loss": 1.0186, "nll_loss": 0.9629395604133606, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09185335785150528, "rewards/margins": 0.047028638422489166, "rewards/rejected": -0.13888199627399445, "step": 5240 }, { "epoch": 0.95, "grad_norm": 1.8377481698989868, "learning_rate": 5.001019071189401e-06, "log_odds_chosen": 0.6257731318473816, "log_odds_ratio": -0.5724449157714844, "logits/chosen": -0.549246609210968, "logits/rejected": -0.5497399568557739, "logps/chosen": -0.963038444519043, "logps/rejected": -1.3778165578842163, "loss": 1.0246, "nll_loss": 0.9673110842704773, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09630385041236877, "rewards/margins": 0.04147782921791077, "rewards/rejected": -0.13778167963027954, "step": 5250 }, { "epoch": 0.95, "grad_norm": 1.3203247785568237, "learning_rate": 4.9951958072499634e-06, "log_odds_chosen": 0.8214631080627441, "log_odds_ratio": -0.5461077094078064, "logits/chosen": -0.4012463092803955, "logits/rejected": -0.4272507131099701, "logps/chosen": -0.8835545778274536, "logps/rejected": -1.4272209405899048, "loss": 0.9018, "nll_loss": 0.8472299575805664, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08835545927286148, "rewards/margins": 0.054366640746593475, "rewards/rejected": -0.14272208511829376, "step": 5260 }, { "epoch": 0.95, "grad_norm": 0.8510302305221558, "learning_rate": 4.989372543310526e-06, "log_odds_chosen": 0.9004091024398804, "log_odds_ratio": -0.5193175673484802, "logits/chosen": -0.40570640563964844, "logits/rejected": -0.44902676343917847, "logps/chosen": -0.9213584661483765, "logps/rejected": -1.5180364847183228, "loss": 0.9516, "nll_loss": 0.8996561169624329, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09213585406541824, "rewards/margins": 0.05966780334711075, "rewards/rejected": -0.151803657412529, "step": 5270 }, { "epoch": 0.95, "grad_norm": 4.7708940505981445, "learning_rate": 4.983549279371087e-06, "log_odds_chosen": 0.9598283767700195, "log_odds_ratio": -0.5097156763076782, "logits/chosen": -0.41570359468460083, "logits/rejected": -0.435255765914917, "logps/chosen": -0.8283422589302063, "logps/rejected": -1.51011061668396, "loss": 0.9208, "nll_loss": 0.869789719581604, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08283422142267227, "rewards/margins": 0.06817685067653656, "rewards/rejected": -0.15101107954978943, "step": 5280 }, { "epoch": 0.96, "grad_norm": 1.195023775100708, "learning_rate": 4.9777260154316495e-06, "log_odds_chosen": 0.7134519815444946, "log_odds_ratio": -0.5415999293327332, "logits/chosen": -0.4060707986354828, "logits/rejected": -0.4542246460914612, "logps/chosen": -0.9608157277107239, "logps/rejected": -1.4222064018249512, "loss": 0.9772, "nll_loss": 0.9230211973190308, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09608156979084015, "rewards/margins": 0.04613906145095825, "rewards/rejected": -0.1422206461429596, "step": 5290 }, { "epoch": 0.96, "grad_norm": 1.9315356016159058, "learning_rate": 4.971902751492211e-06, "log_odds_chosen": 0.6030072569847107, "log_odds_ratio": -0.6301401257514954, "logits/chosen": -0.42665156722068787, "logits/rejected": -0.45132675766944885, "logps/chosen": -0.9962190389633179, "logps/rejected": -1.4173381328582764, "loss": 1.0517, "nll_loss": 0.9886919260025024, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09962191432714462, "rewards/margins": 0.042111899703741074, "rewards/rejected": -0.1417338252067566, "step": 5300 }, { "epoch": 0.96, "grad_norm": 1.0606050491333008, "learning_rate": 4.9660794875527724e-06, "log_odds_chosen": 1.0494234561920166, "log_odds_ratio": -0.47545939683914185, "logits/chosen": -0.37000641226768494, "logits/rejected": -0.4474189877510071, "logps/chosen": -0.9080765843391418, "logps/rejected": -1.5959746837615967, "loss": 0.9406, "nll_loss": 0.8930392265319824, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09080766141414642, "rewards/margins": 0.06878980249166489, "rewards/rejected": -0.1595974713563919, "step": 5310 }, { "epoch": 0.96, "grad_norm": 1.306066870689392, "learning_rate": 4.960256223613335e-06, "log_odds_chosen": 0.724065899848938, "log_odds_ratio": -0.5389540195465088, "logits/chosen": -0.4582054018974304, "logits/rejected": -0.4962041974067688, "logps/chosen": -1.0584847927093506, "logps/rejected": -1.5543081760406494, "loss": 1.0256, "nll_loss": 0.9717254638671875, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10584849119186401, "rewards/margins": 0.04958232864737511, "rewards/rejected": -0.15543082356452942, "step": 5320 }, { "epoch": 0.96, "grad_norm": 1.0546212196350098, "learning_rate": 4.954432959673897e-06, "log_odds_chosen": 0.7997231483459473, "log_odds_ratio": -0.5457186698913574, "logits/chosen": -0.37917906045913696, "logits/rejected": -0.4547523856163025, "logps/chosen": -0.8591880798339844, "logps/rejected": -1.397567629814148, "loss": 0.9259, "nll_loss": 0.8712942004203796, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08591881394386292, "rewards/margins": 0.05383795499801636, "rewards/rejected": -0.13975676894187927, "step": 5330 }, { "epoch": 0.96, "grad_norm": 1.134579062461853, "learning_rate": 4.9486096957344585e-06, "log_odds_chosen": 0.8113735914230347, "log_odds_ratio": -0.49761638045310974, "logits/chosen": -0.4220595359802246, "logits/rejected": -0.4521370530128479, "logps/chosen": -0.9218913912773132, "logps/rejected": -1.467212200164795, "loss": 0.9593, "nll_loss": 0.9095503091812134, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09218914806842804, "rewards/margins": 0.05453207343816757, "rewards/rejected": -0.14672121405601501, "step": 5340 }, { "epoch": 0.97, "grad_norm": 1.6318745613098145, "learning_rate": 4.942786431795021e-06, "log_odds_chosen": 0.9257783889770508, "log_odds_ratio": -0.48035699129104614, "logits/chosen": -0.32946550846099854, "logits/rejected": -0.39153873920440674, "logps/chosen": -0.9110609889030457, "logps/rejected": -1.5010361671447754, "loss": 0.9243, "nll_loss": 0.8762644529342651, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.0911061018705368, "rewards/margins": 0.05899751931428909, "rewards/rejected": -0.1501035988330841, "step": 5350 }, { "epoch": 0.97, "grad_norm": 1.2999166250228882, "learning_rate": 4.936963167855583e-06, "log_odds_chosen": 0.6719887852668762, "log_odds_ratio": -0.5760074853897095, "logits/chosen": -0.5032616257667542, "logits/rejected": -0.5166983604431152, "logps/chosen": -0.9730969667434692, "logps/rejected": -1.427927851676941, "loss": 1.0772, "nll_loss": 1.0196009874343872, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09730970114469528, "rewards/margins": 0.045483093708753586, "rewards/rejected": -0.14279279112815857, "step": 5360 }, { "epoch": 0.97, "grad_norm": 1.9392198324203491, "learning_rate": 4.9311399039161454e-06, "log_odds_chosen": 0.6282123327255249, "log_odds_ratio": -0.5469298362731934, "logits/chosen": -0.45358020067214966, "logits/rejected": -0.5022454857826233, "logps/chosen": -1.0295902490615845, "logps/rejected": -1.4616683721542358, "loss": 1.0047, "nll_loss": 0.9499963521957397, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10295902192592621, "rewards/margins": 0.0432078093290329, "rewards/rejected": -0.1461668312549591, "step": 5370 }, { "epoch": 0.97, "grad_norm": 1.0072379112243652, "learning_rate": 4.925316639976707e-06, "log_odds_chosen": 0.7839166522026062, "log_odds_ratio": -0.5868215560913086, "logits/chosen": -0.5020217895507812, "logits/rejected": -0.5194701552391052, "logps/chosen": -0.9136531949043274, "logps/rejected": -1.440227746963501, "loss": 1.0671, "nll_loss": 1.0084375143051147, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09136532247066498, "rewards/margins": 0.05265744403004646, "rewards/rejected": -0.14402277767658234, "step": 5380 }, { "epoch": 0.97, "grad_norm": 0.7636964321136475, "learning_rate": 4.919493376037268e-06, "log_odds_chosen": 0.9430241584777832, "log_odds_ratio": -0.48400864005088806, "logits/chosen": -0.4314158856868744, "logits/rejected": -0.47129616141319275, "logps/chosen": -0.925650954246521, "logps/rejected": -1.5474523305892944, "loss": 0.9774, "nll_loss": 0.9290070533752441, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09256509691476822, "rewards/margins": 0.06218013912439346, "rewards/rejected": -0.1547452211380005, "step": 5390 }, { "epoch": 0.98, "grad_norm": 2.348417282104492, "learning_rate": 4.913670112097831e-06, "log_odds_chosen": 0.972173810005188, "log_odds_ratio": -0.5155828595161438, "logits/chosen": -0.34915369749069214, "logits/rejected": -0.4212590157985687, "logps/chosen": -0.8123193979263306, "logps/rejected": -1.457058072090149, "loss": 0.8675, "nll_loss": 0.8159490823745728, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08123193681240082, "rewards/margins": 0.06447387486696243, "rewards/rejected": -0.14570581912994385, "step": 5400 }, { "epoch": 0.98, "grad_norm": 0.9304720759391785, "learning_rate": 4.907846848158392e-06, "log_odds_chosen": 0.7508946657180786, "log_odds_ratio": -0.5314705967903137, "logits/chosen": -0.41652408242225647, "logits/rejected": -0.47718554735183716, "logps/chosen": -0.9125279188156128, "logps/rejected": -1.4500106573104858, "loss": 0.9048, "nll_loss": 0.8516514897346497, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09125280380249023, "rewards/margins": 0.05374826118350029, "rewards/rejected": -0.14500106871128082, "step": 5410 }, { "epoch": 0.98, "grad_norm": 1.3840512037277222, "learning_rate": 4.9020235842189545e-06, "log_odds_chosen": 0.8840494155883789, "log_odds_ratio": -0.5257449150085449, "logits/chosen": -0.3711920380592346, "logits/rejected": -0.43855515122413635, "logps/chosen": -0.9542511105537415, "logps/rejected": -1.5532355308532715, "loss": 0.9891, "nll_loss": 0.9365428686141968, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0954250916838646, "rewards/margins": 0.05989844724535942, "rewards/rejected": -0.1553235501050949, "step": 5420 }, { "epoch": 0.98, "grad_norm": 1.0180045366287231, "learning_rate": 4.896200320279517e-06, "log_odds_chosen": 0.6119788885116577, "log_odds_ratio": -0.6346170902252197, "logits/chosen": -0.42415839433670044, "logits/rejected": -0.42158955335617065, "logps/chosen": -0.9403412938117981, "logps/rejected": -1.39726722240448, "loss": 0.9807, "nll_loss": 0.9172808527946472, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09403412789106369, "rewards/margins": 0.04569259658455849, "rewards/rejected": -0.13972671329975128, "step": 5430 }, { "epoch": 0.98, "grad_norm": 2.6387805938720703, "learning_rate": 4.890377056340078e-06, "log_odds_chosen": 0.8593441843986511, "log_odds_ratio": -0.5371569991111755, "logits/chosen": -0.4286310076713562, "logits/rejected": -0.44843417406082153, "logps/chosen": -0.9194551706314087, "logps/rejected": -1.4854052066802979, "loss": 1.0245, "nll_loss": 0.9708328247070312, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09194551408290863, "rewards/margins": 0.056595008820295334, "rewards/rejected": -0.14854054152965546, "step": 5440 }, { "epoch": 0.98, "grad_norm": 1.9560596942901611, "learning_rate": 4.8845537924006405e-06, "log_odds_chosen": 0.9763432741165161, "log_odds_ratio": -0.4967397153377533, "logits/chosen": -0.3855515420436859, "logits/rejected": -0.43095794320106506, "logps/chosen": -0.9564552307128906, "logps/rejected": -1.5772628784179688, "loss": 0.8998, "nll_loss": 0.8501434326171875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09564553201198578, "rewards/margins": 0.06208076328039169, "rewards/rejected": -0.15772630274295807, "step": 5450 }, { "epoch": 0.99, "grad_norm": 1.3743293285369873, "learning_rate": 4.878730528461203e-06, "log_odds_chosen": 0.9867672920227051, "log_odds_ratio": -0.4925385117530823, "logits/chosen": -0.3673693537712097, "logits/rejected": -0.43347668647766113, "logps/chosen": -0.9285440444946289, "logps/rejected": -1.5850690603256226, "loss": 0.8967, "nll_loss": 0.8474740982055664, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09285441040992737, "rewards/margins": 0.0656525045633316, "rewards/rejected": -0.15850690007209778, "step": 5460 }, { "epoch": 0.99, "grad_norm": 1.4439855813980103, "learning_rate": 4.872907264521764e-06, "log_odds_chosen": 0.9440599679946899, "log_odds_ratio": -0.525600254535675, "logits/chosen": -0.4502839148044586, "logits/rejected": -0.48919907212257385, "logps/chosen": -0.8880621194839478, "logps/rejected": -1.5326378345489502, "loss": 1.003, "nll_loss": 0.9504783749580383, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08880620449781418, "rewards/margins": 0.06445760279893875, "rewards/rejected": -0.15326380729675293, "step": 5470 }, { "epoch": 0.99, "grad_norm": 1.1630498170852661, "learning_rate": 4.867084000582326e-06, "log_odds_chosen": 1.0208189487457275, "log_odds_ratio": -0.4648086130619049, "logits/chosen": -0.37898311018943787, "logits/rejected": -0.3820800185203552, "logps/chosen": -0.8005183339118958, "logps/rejected": -1.456621766090393, "loss": 0.8536, "nll_loss": 0.8070961236953735, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08005184680223465, "rewards/margins": 0.06561031937599182, "rewards/rejected": -0.14566215872764587, "step": 5480 }, { "epoch": 0.99, "grad_norm": 0.786601185798645, "learning_rate": 4.861260736642888e-06, "log_odds_chosen": 0.9441927075386047, "log_odds_ratio": -0.507569432258606, "logits/chosen": -0.4196494519710541, "logits/rejected": -0.47500672936439514, "logps/chosen": -0.8276809453964233, "logps/rejected": -1.4831466674804688, "loss": 0.9866, "nll_loss": 0.9358325004577637, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08276810497045517, "rewards/margins": 0.06554658710956573, "rewards/rejected": -0.1483146846294403, "step": 5490 }, { "epoch": 0.99, "grad_norm": 1.4178991317749023, "learning_rate": 4.8554374727034495e-06, "log_odds_chosen": 0.9024141430854797, "log_odds_ratio": -0.5218056440353394, "logits/chosen": -0.44788289070129395, "logits/rejected": -0.47160372138023376, "logps/chosen": -0.9077650904655457, "logps/rejected": -1.4974048137664795, "loss": 0.9775, "nll_loss": 0.9252709150314331, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09077651798725128, "rewards/margins": 0.05896396562457085, "rewards/rejected": -0.14974047243595123, "step": 5500 }, { "epoch": 1.0, "grad_norm": 1.9363542795181274, "learning_rate": 4.849614208764012e-06, "log_odds_chosen": 0.7485558390617371, "log_odds_ratio": -0.6058672666549683, "logits/chosen": -0.44489818811416626, "logits/rejected": -0.43072813749313354, "logps/chosen": -1.074777364730835, "logps/rejected": -1.5849609375, "loss": 0.9916, "nll_loss": 0.9309671521186829, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10747772455215454, "rewards/margins": 0.051018375903367996, "rewards/rejected": -0.15849611163139343, "step": 5510 }, { "epoch": 1.0, "grad_norm": 1.9006177186965942, "learning_rate": 4.843790944824574e-06, "log_odds_chosen": 0.9450963139533997, "log_odds_ratio": -0.5010809302330017, "logits/chosen": -0.458138644695282, "logits/rejected": -0.48872989416122437, "logps/chosen": -0.9480603337287903, "logps/rejected": -1.5880528688430786, "loss": 1.0535, "nll_loss": 1.0034300088882446, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09480603039264679, "rewards/margins": 0.0639992505311966, "rewards/rejected": -0.15880528092384338, "step": 5520 }, { "epoch": 1.0, "grad_norm": 1.5550851821899414, "learning_rate": 4.837967680885136e-06, "log_odds_chosen": 0.8837807774543762, "log_odds_ratio": -0.5516039729118347, "logits/chosen": -0.4584503173828125, "logits/rejected": -0.5218029022216797, "logps/chosen": -1.0116498470306396, "logps/rejected": -1.6194604635238647, "loss": 1.017, "nll_loss": 0.9618609547615051, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10116498172283173, "rewards/margins": 0.06078105419874191, "rewards/rejected": -0.16194602847099304, "step": 5530 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.7954168319702148, "eval_log_odds_ratio": -0.5471761226654053, "eval_logits/chosen": -0.4407408535480499, "eval_logits/rejected": -0.4670475721359253, "eval_logps/chosen": -0.9404959082603455, "eval_logps/rejected": -1.4815776348114014, "eval_loss": 0.9784727096557617, "eval_nll_loss": 0.9237551093101501, "eval_rewards/accuracies": 0.6675246953964233, "eval_rewards/chosen": -0.09404958784580231, "eval_rewards/margins": 0.05410816892981529, "eval_rewards/rejected": -0.1481577455997467, "eval_runtime": 2286.2087, "eval_samples_per_second": 1.02, "eval_steps_per_second": 1.02, "step": 5536 }, { "epoch": 1.0, "grad_norm": 1.2207567691802979, "learning_rate": 4.832144416945698e-06, "log_odds_chosen": 0.892682671546936, "log_odds_ratio": -0.5117454528808594, "logits/chosen": -0.4071148931980133, "logits/rejected": -0.47196096181869507, "logps/chosen": -0.9103930592536926, "logps/rejected": -1.4970780611038208, "loss": 0.9617, "nll_loss": 0.9104766845703125, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09103929996490479, "rewards/margins": 0.05866849422454834, "rewards/rejected": -0.14970777928829193, "step": 5540 }, { "epoch": 1.0, "grad_norm": 1.3107197284698486, "learning_rate": 4.82632115300626e-06, "log_odds_chosen": 0.9778968095779419, "log_odds_ratio": -0.4795476794242859, "logits/chosen": -0.45730486512184143, "logits/rejected": -0.5228903889656067, "logps/chosen": -0.8578106164932251, "logps/rejected": -1.526828408241272, "loss": 0.938, "nll_loss": 0.8900574445724487, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08578106015920639, "rewards/margins": 0.0669017806649208, "rewards/rejected": -0.1526828408241272, "step": 5550 }, { "epoch": 1.0, "grad_norm": 1.3291869163513184, "learning_rate": 4.820497889066822e-06, "log_odds_chosen": 0.7961874604225159, "log_odds_ratio": -0.5262941718101501, "logits/chosen": -0.4247972071170807, "logits/rejected": -0.4710194170475006, "logps/chosen": -0.9164491891860962, "logps/rejected": -1.4848397970199585, "loss": 1.0881, "nll_loss": 1.0354729890823364, "rewards/accuracies": 0.625, "rewards/chosen": -0.09164492785930634, "rewards/margins": 0.05683906003832817, "rewards/rejected": -0.1484839916229248, "step": 5560 }, { "epoch": 1.01, "grad_norm": 1.3004399538040161, "learning_rate": 4.814674625127383e-06, "log_odds_chosen": 0.888166069984436, "log_odds_ratio": -0.4967440962791443, "logits/chosen": -0.452568918466568, "logits/rejected": -0.4748914837837219, "logps/chosen": -0.8483587503433228, "logps/rejected": -1.446743369102478, "loss": 0.8882, "nll_loss": 0.8385192155838013, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08483588695526123, "rewards/margins": 0.05983845517039299, "rewards/rejected": -0.14467434585094452, "step": 5570 }, { "epoch": 1.01, "grad_norm": 1.4460393190383911, "learning_rate": 4.8088513611879455e-06, "log_odds_chosen": 0.7630864977836609, "log_odds_ratio": -0.5620290040969849, "logits/chosen": -0.41607731580734253, "logits/rejected": -0.44624605774879456, "logps/chosen": -0.9713469743728638, "logps/rejected": -1.4890177249908447, "loss": 0.9427, "nll_loss": 0.8865121603012085, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09713469445705414, "rewards/margins": 0.05176708847284317, "rewards/rejected": -0.1489017903804779, "step": 5580 }, { "epoch": 1.01, "grad_norm": 1.551102638244629, "learning_rate": 4.803028097248508e-06, "log_odds_chosen": 0.5414020419120789, "log_odds_ratio": -0.5626022219657898, "logits/chosen": -0.4925295412540436, "logits/rejected": -0.48884886503219604, "logps/chosen": -0.9413201212882996, "logps/rejected": -1.270108938217163, "loss": 0.9689, "nll_loss": 0.9126895666122437, "rewards/accuracies": 0.625, "rewards/chosen": -0.09413202106952667, "rewards/margins": 0.03287887945771217, "rewards/rejected": -0.12701091170310974, "step": 5590 }, { "epoch": 1.01, "grad_norm": 1.6918314695358276, "learning_rate": 4.797204833309069e-06, "log_odds_chosen": 0.6707077026367188, "log_odds_ratio": -0.5309171080589294, "logits/chosen": -0.47139209508895874, "logits/rejected": -0.4601783752441406, "logps/chosen": -0.9363657832145691, "logps/rejected": -1.3649402856826782, "loss": 0.9239, "nll_loss": 0.8708307147026062, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09363657981157303, "rewards/margins": 0.042857442051172256, "rewards/rejected": -0.1364940106868744, "step": 5600 }, { "epoch": 1.01, "grad_norm": 1.3999618291854858, "learning_rate": 4.7913815693696316e-06, "log_odds_chosen": 0.8025909662246704, "log_odds_ratio": -0.533409595489502, "logits/chosen": -0.4704816937446594, "logits/rejected": -0.5034157633781433, "logps/chosen": -0.9593275785446167, "logps/rejected": -1.4829776287078857, "loss": 1.0076, "nll_loss": 0.9542564153671265, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09593275934457779, "rewards/margins": 0.052365005016326904, "rewards/rejected": -0.1482977569103241, "step": 5610 }, { "epoch": 1.02, "grad_norm": 1.3082317113876343, "learning_rate": 4.785558305430194e-06, "log_odds_chosen": 0.8759142160415649, "log_odds_ratio": -0.4906393587589264, "logits/chosen": -0.4527658522129059, "logits/rejected": -0.48744139075279236, "logps/chosen": -0.9517688751220703, "logps/rejected": -1.5468437671661377, "loss": 0.9451, "nll_loss": 0.8960543870925903, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.09517689049243927, "rewards/margins": 0.05950748175382614, "rewards/rejected": -0.154684379696846, "step": 5620 }, { "epoch": 1.02, "grad_norm": 0.9139883518218994, "learning_rate": 4.779735041490755e-06, "log_odds_chosen": 0.8130331039428711, "log_odds_ratio": -0.531697690486908, "logits/chosen": -0.4422592222690582, "logits/rejected": -0.48472967743873596, "logps/chosen": -0.9625687599182129, "logps/rejected": -1.5124341249465942, "loss": 0.9998, "nll_loss": 0.9466264843940735, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09625687450170517, "rewards/margins": 0.054986536502838135, "rewards/rejected": -0.1512434184551239, "step": 5630 }, { "epoch": 1.02, "grad_norm": 1.2138420343399048, "learning_rate": 4.773911777551318e-06, "log_odds_chosen": 0.6880245208740234, "log_odds_ratio": -0.5870713591575623, "logits/chosen": -0.45311981439590454, "logits/rejected": -0.4890304505825043, "logps/chosen": -0.9035439491271973, "logps/rejected": -1.3936666250228882, "loss": 0.977, "nll_loss": 0.9182752370834351, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09035440534353256, "rewards/margins": 0.04901226609945297, "rewards/rejected": -0.13936665654182434, "step": 5640 }, { "epoch": 1.02, "grad_norm": 3.006988286972046, "learning_rate": 4.76808851361188e-06, "log_odds_chosen": 0.6752545237541199, "log_odds_ratio": -0.6011164784431458, "logits/chosen": -0.5098429918289185, "logits/rejected": -0.5509897470474243, "logps/chosen": -1.0110231637954712, "logps/rejected": -1.5172195434570312, "loss": 1.0542, "nll_loss": 0.9940736889839172, "rewards/accuracies": 0.625, "rewards/chosen": -0.1011023297905922, "rewards/margins": 0.05061963200569153, "rewards/rejected": -0.15172193944454193, "step": 5650 }, { "epoch": 1.02, "grad_norm": 1.0073820352554321, "learning_rate": 4.7622652496724406e-06, "log_odds_chosen": 0.6800388693809509, "log_odds_ratio": -0.5709148645401001, "logits/chosen": -0.4339476227760315, "logits/rejected": -0.5066033601760864, "logps/chosen": -0.8620915412902832, "logps/rejected": -1.3292564153671265, "loss": 0.9278, "nll_loss": 0.870742917060852, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08620915561914444, "rewards/margins": 0.04671648517251015, "rewards/rejected": -0.13292565941810608, "step": 5660 }, { "epoch": 1.02, "grad_norm": 1.3023157119750977, "learning_rate": 4.756441985733003e-06, "log_odds_chosen": 0.7827984690666199, "log_odds_ratio": -0.49791890382766724, "logits/chosen": -0.46909022331237793, "logits/rejected": -0.499662309885025, "logps/chosen": -0.8504747152328491, "logps/rejected": -1.3593947887420654, "loss": 0.9253, "nll_loss": 0.8755376935005188, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08504746854305267, "rewards/margins": 0.05089200660586357, "rewards/rejected": -0.13593947887420654, "step": 5670 }, { "epoch": 1.03, "grad_norm": 1.9402965307235718, "learning_rate": 4.750618721793565e-06, "log_odds_chosen": 0.764835774898529, "log_odds_ratio": -0.5476073026657104, "logits/chosen": -0.43662959337234497, "logits/rejected": -0.48592323064804077, "logps/chosen": -0.8871974945068359, "logps/rejected": -1.366875171661377, "loss": 0.9221, "nll_loss": 0.8673022985458374, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08871974796056747, "rewards/margins": 0.04796776920557022, "rewards/rejected": -0.1366875320672989, "step": 5680 }, { "epoch": 1.03, "grad_norm": 0.885296106338501, "learning_rate": 4.744795457854127e-06, "log_odds_chosen": 0.7651258707046509, "log_odds_ratio": -0.5727325677871704, "logits/chosen": -0.49575671553611755, "logits/rejected": -0.534355103969574, "logps/chosen": -0.8569524884223938, "logps/rejected": -1.3553133010864258, "loss": 1.0281, "nll_loss": 0.9708574414253235, "rewards/accuracies": 0.625, "rewards/chosen": -0.08569525182247162, "rewards/margins": 0.04983608052134514, "rewards/rejected": -0.13553133606910706, "step": 5690 }, { "epoch": 1.03, "grad_norm": 3.4250690937042236, "learning_rate": 4.738972193914689e-06, "log_odds_chosen": 0.9850413203239441, "log_odds_ratio": -0.5156182050704956, "logits/chosen": -0.44237810373306274, "logits/rejected": -0.48232460021972656, "logps/chosen": -0.9154784083366394, "logps/rejected": -1.5720593929290771, "loss": 0.9803, "nll_loss": 0.9286953210830688, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09154783189296722, "rewards/margins": 0.06565810739994049, "rewards/rejected": -0.15720593929290771, "step": 5700 }, { "epoch": 1.03, "grad_norm": 1.2578308582305908, "learning_rate": 4.733148929975251e-06, "log_odds_chosen": 0.5149042010307312, "log_odds_ratio": -0.6784185171127319, "logits/chosen": -0.5314583778381348, "logits/rejected": -0.5327475666999817, "logps/chosen": -1.0456842184066772, "logps/rejected": -1.4066373109817505, "loss": 1.0308, "nll_loss": 0.9629226922988892, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.10456842184066772, "rewards/margins": 0.03609530255198479, "rewards/rejected": -0.14066371321678162, "step": 5710 }, { "epoch": 1.03, "grad_norm": 1.155043363571167, "learning_rate": 4.727325666035813e-06, "log_odds_chosen": 0.8577598333358765, "log_odds_ratio": -0.5551376342773438, "logits/chosen": -0.4258693754673004, "logits/rejected": -0.4448915421962738, "logps/chosen": -0.9433683156967163, "logps/rejected": -1.5140371322631836, "loss": 0.9678, "nll_loss": 0.9122626185417175, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0943368449807167, "rewards/margins": 0.05706688016653061, "rewards/rejected": -0.15140371024608612, "step": 5720 }, { "epoch": 1.04, "grad_norm": 0.8873026967048645, "learning_rate": 4.721502402096375e-06, "log_odds_chosen": 1.106774091720581, "log_odds_ratio": -0.495880663394928, "logits/chosen": -0.4094298779964447, "logits/rejected": -0.3826850354671478, "logps/chosen": -0.7966852784156799, "logps/rejected": -1.5608104467391968, "loss": 0.9192, "nll_loss": 0.8695797920227051, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07966851443052292, "rewards/margins": 0.07641252875328064, "rewards/rejected": -0.15608103573322296, "step": 5730 }, { "epoch": 1.04, "grad_norm": 0.6950234770774841, "learning_rate": 4.715679138156937e-06, "log_odds_chosen": 1.0668140649795532, "log_odds_ratio": -0.48193269968032837, "logits/chosen": -0.41230225563049316, "logits/rejected": -0.4267541766166687, "logps/chosen": -0.8979755640029907, "logps/rejected": -1.6309888362884521, "loss": 1.0101, "nll_loss": 0.9619138836860657, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08979754149913788, "rewards/margins": 0.07330133765935898, "rewards/rejected": -0.16309887170791626, "step": 5740 }, { "epoch": 1.04, "grad_norm": 1.4105861186981201, "learning_rate": 4.709855874217498e-06, "log_odds_chosen": 1.5627528429031372, "log_odds_ratio": -0.4116443693637848, "logits/chosen": -0.3588656783103943, "logits/rejected": -0.4155047535896301, "logps/chosen": -0.7683436870574951, "logps/rejected": -1.859259843826294, "loss": 0.944, "nll_loss": 0.9028097987174988, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07683436572551727, "rewards/margins": 0.1090916246175766, "rewards/rejected": -0.18592599034309387, "step": 5750 }, { "epoch": 1.04, "grad_norm": 1.2038817405700684, "learning_rate": 4.70403261027806e-06, "log_odds_chosen": 0.8248249888420105, "log_odds_ratio": -0.5202821493148804, "logits/chosen": -0.4802790582180023, "logits/rejected": -0.4898431897163391, "logps/chosen": -0.9521000981330872, "logps/rejected": -1.5202988386154175, "loss": 0.9786, "nll_loss": 0.9266166687011719, "rewards/accuracies": 0.6875, "rewards/chosen": -0.095210000872612, "rewards/margins": 0.05681987479329109, "rewards/rejected": -0.152029886841774, "step": 5760 }, { "epoch": 1.04, "grad_norm": 0.9356115460395813, "learning_rate": 4.698209346338623e-06, "log_odds_chosen": 0.9297721982002258, "log_odds_ratio": -0.5297530889511108, "logits/chosen": -0.38405826687812805, "logits/rejected": -0.4060952067375183, "logps/chosen": -0.8437892198562622, "logps/rejected": -1.4541056156158447, "loss": 0.897, "nll_loss": 0.8439979553222656, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0843789204955101, "rewards/margins": 0.06103163957595825, "rewards/rejected": -0.14541055262088776, "step": 5770 }, { "epoch": 1.04, "grad_norm": 1.438058614730835, "learning_rate": 4.692386082399184e-06, "log_odds_chosen": 0.8766366243362427, "log_odds_ratio": -0.5100477933883667, "logits/chosen": -0.4361240863800049, "logits/rejected": -0.463198184967041, "logps/chosen": -0.9211476445198059, "logps/rejected": -1.4904837608337402, "loss": 1.0009, "nll_loss": 0.9499245882034302, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09211476147174835, "rewards/margins": 0.056933604180812836, "rewards/rejected": -0.1490483582019806, "step": 5780 }, { "epoch": 1.05, "grad_norm": 1.1899020671844482, "learning_rate": 4.686562818459746e-06, "log_odds_chosen": 0.781607449054718, "log_odds_ratio": -0.5476793050765991, "logits/chosen": -0.46544790267944336, "logits/rejected": -0.4789610505104065, "logps/chosen": -0.9231128692626953, "logps/rejected": -1.429552674293518, "loss": 1.0049, "nll_loss": 0.9501617550849915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09231128543615341, "rewards/margins": 0.05064399167895317, "rewards/rejected": -0.14295528829097748, "step": 5790 }, { "epoch": 1.05, "grad_norm": 2.000607967376709, "learning_rate": 4.680739554520309e-06, "log_odds_chosen": 0.8846811056137085, "log_odds_ratio": -0.48624786734580994, "logits/chosen": -0.4445672631263733, "logits/rejected": -0.48973551392555237, "logps/chosen": -0.8961418271064758, "logps/rejected": -1.4623029232025146, "loss": 0.9977, "nll_loss": 0.9490774869918823, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08961419761180878, "rewards/margins": 0.05661610886454582, "rewards/rejected": -0.1462303102016449, "step": 5800 }, { "epoch": 1.05, "grad_norm": 1.3409258127212524, "learning_rate": 4.674916290580871e-06, "log_odds_chosen": 0.7539998292922974, "log_odds_ratio": -0.5352480411529541, "logits/chosen": -0.4524649977684021, "logits/rejected": -0.47686344385147095, "logps/chosen": -0.9493099451065063, "logps/rejected": -1.4658688306808472, "loss": 0.9753, "nll_loss": 0.9217742681503296, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.0949309915304184, "rewards/margins": 0.05165589973330498, "rewards/rejected": -0.14658688008785248, "step": 5810 }, { "epoch": 1.05, "grad_norm": 0.7173593640327454, "learning_rate": 4.6690930266414324e-06, "log_odds_chosen": 0.5790658593177795, "log_odds_ratio": -0.6249306797981262, "logits/chosen": -0.49599066376686096, "logits/rejected": -0.4908533990383148, "logps/chosen": -1.0195724964141846, "logps/rejected": -1.3964897394180298, "loss": 1.051, "nll_loss": 0.9885488748550415, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10195724666118622, "rewards/margins": 0.03769173100590706, "rewards/rejected": -0.13964898884296417, "step": 5820 }, { "epoch": 1.05, "grad_norm": 1.9051140546798706, "learning_rate": 4.663269762701995e-06, "log_odds_chosen": 0.8640015721321106, "log_odds_ratio": -0.514534592628479, "logits/chosen": -0.4196191430091858, "logits/rejected": -0.44794726371765137, "logps/chosen": -0.9575347900390625, "logps/rejected": -1.5376710891723633, "loss": 0.9067, "nll_loss": 0.8552610278129578, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09575347602367401, "rewards/margins": 0.058013636618852615, "rewards/rejected": -0.15376712381839752, "step": 5830 }, { "epoch": 1.05, "grad_norm": 1.4965343475341797, "learning_rate": 4.657446498762556e-06, "log_odds_chosen": 0.6723813414573669, "log_odds_ratio": -0.5684707760810852, "logits/chosen": -0.42264777421951294, "logits/rejected": -0.4406364858150482, "logps/chosen": -0.8763896822929382, "logps/rejected": -1.284791350364685, "loss": 0.9805, "nll_loss": 0.9236391186714172, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0876389741897583, "rewards/margins": 0.04084015637636185, "rewards/rejected": -0.12847913801670074, "step": 5840 }, { "epoch": 1.06, "grad_norm": 0.7926245927810669, "learning_rate": 4.651623234823118e-06, "log_odds_chosen": 1.1886515617370605, "log_odds_ratio": -0.4757661819458008, "logits/chosen": -0.4212180972099304, "logits/rejected": -0.43057411909103394, "logps/chosen": -0.8711498975753784, "logps/rejected": -1.6483008861541748, "loss": 0.9531, "nll_loss": 0.9055501222610474, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08711498230695724, "rewards/margins": 0.07771511375904083, "rewards/rejected": -0.16483010351657867, "step": 5850 }, { "epoch": 1.06, "grad_norm": 1.2240430116653442, "learning_rate": 4.64579997088368e-06, "log_odds_chosen": 0.7755094766616821, "log_odds_ratio": -0.5845614671707153, "logits/chosen": -0.43493086099624634, "logits/rejected": -0.45848387479782104, "logps/chosen": -0.9059002995491028, "logps/rejected": -1.4610520601272583, "loss": 0.9695, "nll_loss": 0.9110584259033203, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09059003740549088, "rewards/margins": 0.055515170097351074, "rewards/rejected": -0.14610520005226135, "step": 5860 }, { "epoch": 1.06, "grad_norm": 1.060349941253662, "learning_rate": 4.639976706944242e-06, "log_odds_chosen": 0.7335856556892395, "log_odds_ratio": -0.5503862500190735, "logits/chosen": -0.3889656364917755, "logits/rejected": -0.419551283121109, "logps/chosen": -1.0413269996643066, "logps/rejected": -1.5086840391159058, "loss": 1.1028, "nll_loss": 1.0477640628814697, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10413269698619843, "rewards/margins": 0.04673569276928902, "rewards/rejected": -0.15086840093135834, "step": 5870 }, { "epoch": 1.06, "grad_norm": 1.2340961694717407, "learning_rate": 4.634153443004804e-06, "log_odds_chosen": 0.7230426073074341, "log_odds_ratio": -0.5490708351135254, "logits/chosen": -0.4647773206233978, "logits/rejected": -0.5086523294448853, "logps/chosen": -0.9753144383430481, "logps/rejected": -1.4537895917892456, "loss": 1.0191, "nll_loss": 0.9642260670661926, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09753144532442093, "rewards/margins": 0.04784751683473587, "rewards/rejected": -0.1453789621591568, "step": 5880 }, { "epoch": 1.06, "grad_norm": 0.7249181270599365, "learning_rate": 4.628330179065366e-06, "log_odds_chosen": 0.7544859647750854, "log_odds_ratio": -0.5356615781784058, "logits/chosen": -0.43114471435546875, "logits/rejected": -0.4747946858406067, "logps/chosen": -0.8531882166862488, "logps/rejected": -1.3764461278915405, "loss": 0.9115, "nll_loss": 0.8579704165458679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08531881868839264, "rewards/margins": 0.05232580378651619, "rewards/rejected": -0.13764461874961853, "step": 5890 }, { "epoch": 1.07, "grad_norm": 0.8657764196395874, "learning_rate": 4.622506915125928e-06, "log_odds_chosen": 0.9151697158813477, "log_odds_ratio": -0.5579023361206055, "logits/chosen": -0.446617990732193, "logits/rejected": -0.4951063096523285, "logps/chosen": -0.9576125144958496, "logps/rejected": -1.5999959707260132, "loss": 0.9359, "nll_loss": 0.8801458477973938, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0957612469792366, "rewards/margins": 0.06423834711313248, "rewards/rejected": -0.15999959409236908, "step": 5900 }, { "epoch": 1.07, "grad_norm": 1.1720466613769531, "learning_rate": 4.61668365118649e-06, "log_odds_chosen": 0.6918897032737732, "log_odds_ratio": -0.5745338201522827, "logits/chosen": -0.425567090511322, "logits/rejected": -0.445716917514801, "logps/chosen": -0.9016758799552917, "logps/rejected": -1.3733348846435547, "loss": 0.9548, "nll_loss": 0.8973618745803833, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09016759693622589, "rewards/margins": 0.04716590791940689, "rewards/rejected": -0.13733351230621338, "step": 5910 }, { "epoch": 1.07, "grad_norm": 2.0577094554901123, "learning_rate": 4.610860387247052e-06, "log_odds_chosen": 0.8531296849250793, "log_odds_ratio": -0.5350161194801331, "logits/chosen": -0.4382871985435486, "logits/rejected": -0.4813820421695709, "logps/chosen": -0.8610553741455078, "logps/rejected": -1.437717080116272, "loss": 0.9136, "nll_loss": 0.8601323962211609, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08610554039478302, "rewards/margins": 0.05766616389155388, "rewards/rejected": -0.1437717080116272, "step": 5920 }, { "epoch": 1.07, "grad_norm": 1.0709292888641357, "learning_rate": 4.605037123307614e-06, "log_odds_chosen": 0.6848903894424438, "log_odds_ratio": -0.5732904672622681, "logits/chosen": -0.4110352098941803, "logits/rejected": -0.47563010454177856, "logps/chosen": -0.9090906977653503, "logps/rejected": -1.3542063236236572, "loss": 0.9591, "nll_loss": 0.9017614126205444, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09090907871723175, "rewards/margins": 0.044511578977108, "rewards/rejected": -0.13542065024375916, "step": 5930 }, { "epoch": 1.07, "grad_norm": 1.3522981405258179, "learning_rate": 4.599213859368175e-06, "log_odds_chosen": 0.7799988389015198, "log_odds_ratio": -0.5210399627685547, "logits/chosen": -0.4593687951564789, "logits/rejected": -0.4631672501564026, "logps/chosen": -0.8581777811050415, "logps/rejected": -1.3306677341461182, "loss": 0.9031, "nll_loss": 0.8510422706604004, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08581778407096863, "rewards/margins": 0.047249000519514084, "rewards/rejected": -0.133066788315773, "step": 5940 }, { "epoch": 1.07, "grad_norm": 0.6679449677467346, "learning_rate": 4.593390595428737e-06, "log_odds_chosen": 0.8621038198471069, "log_odds_ratio": -0.518430233001709, "logits/chosen": -0.44318872690200806, "logits/rejected": -0.45970430970191956, "logps/chosen": -0.8508337140083313, "logps/rejected": -1.4162659645080566, "loss": 0.886, "nll_loss": 0.8341652154922485, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08508336544036865, "rewards/margins": 0.056543223559856415, "rewards/rejected": -0.14162659645080566, "step": 5950 }, { "epoch": 1.08, "grad_norm": 0.9131520390510559, "learning_rate": 4.5875673314893e-06, "log_odds_chosen": 0.5632303953170776, "log_odds_ratio": -0.5651002526283264, "logits/chosen": -0.5047626495361328, "logits/rejected": -0.5000123381614685, "logps/chosen": -0.9939088821411133, "logps/rejected": -1.3372070789337158, "loss": 0.9807, "nll_loss": 0.9242087602615356, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09939088672399521, "rewards/margins": 0.034329816699028015, "rewards/rejected": -0.13372069597244263, "step": 5960 }, { "epoch": 1.08, "grad_norm": 0.9747700691223145, "learning_rate": 4.581744067549861e-06, "log_odds_chosen": 0.7390889525413513, "log_odds_ratio": -0.5641605257987976, "logits/chosen": -0.43641337752342224, "logits/rejected": -0.4768204689025879, "logps/chosen": -0.9668332934379578, "logps/rejected": -1.476610541343689, "loss": 0.9901, "nll_loss": 0.9336675405502319, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0966833308339119, "rewards/margins": 0.05097772926092148, "rewards/rejected": -0.14766106009483337, "step": 5970 }, { "epoch": 1.08, "grad_norm": 1.3658349514007568, "learning_rate": 4.5759208036104235e-06, "log_odds_chosen": 0.803102970123291, "log_odds_ratio": -0.5294958353042603, "logits/chosen": -0.44828686118125916, "logits/rejected": -0.4648512899875641, "logps/chosen": -0.9527362585067749, "logps/rejected": -1.4989818334579468, "loss": 0.9945, "nll_loss": 0.9415693283081055, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09527363628149033, "rewards/margins": 0.05462455749511719, "rewards/rejected": -0.14989818632602692, "step": 5980 }, { "epoch": 1.08, "grad_norm": 1.7984719276428223, "learning_rate": 4.570097539670986e-06, "log_odds_chosen": 0.5657428503036499, "log_odds_ratio": -0.6062055826187134, "logits/chosen": -0.44131985306739807, "logits/rejected": -0.45149001479148865, "logps/chosen": -1.01093327999115, "logps/rejected": -1.351680040359497, "loss": 0.9973, "nll_loss": 0.9366718530654907, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10109331458806992, "rewards/margins": 0.03407468646764755, "rewards/rejected": -0.13516801595687866, "step": 5990 }, { "epoch": 1.08, "grad_norm": 2.813422441482544, "learning_rate": 4.564274275731547e-06, "log_odds_chosen": 0.7859033346176147, "log_odds_ratio": -0.5689486265182495, "logits/chosen": -0.37758010625839233, "logits/rejected": -0.4466930031776428, "logps/chosen": -0.8639854192733765, "logps/rejected": -1.4098567962646484, "loss": 0.8868, "nll_loss": 0.8299161791801453, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08639854192733765, "rewards/margins": 0.05458713322877884, "rewards/rejected": -0.14098568260669708, "step": 6000 }, { "epoch": 1.09, "grad_norm": 0.9352004528045654, "learning_rate": 4.558451011792109e-06, "log_odds_chosen": 0.7700805068016052, "log_odds_ratio": -0.5558144450187683, "logits/chosen": -0.3771985173225403, "logits/rejected": -0.44899502396583557, "logps/chosen": -0.9024406671524048, "logps/rejected": -1.4145829677581787, "loss": 0.9885, "nll_loss": 0.9329215884208679, "rewards/accuracies": 0.625, "rewards/chosen": -0.09024406969547272, "rewards/margins": 0.05121422931551933, "rewards/rejected": -0.14145830273628235, "step": 6010 }, { "epoch": 1.09, "grad_norm": 1.1320077180862427, "learning_rate": 4.552627747852671e-06, "log_odds_chosen": 0.6384034752845764, "log_odds_ratio": -0.5681486129760742, "logits/chosen": -0.4968019127845764, "logits/rejected": -0.5048823356628418, "logps/chosen": -0.8819563984870911, "logps/rejected": -1.2801580429077148, "loss": 0.9733, "nll_loss": 0.9164499044418335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08819563686847687, "rewards/margins": 0.039820168167352676, "rewards/rejected": -0.12801580131053925, "step": 6020 }, { "epoch": 1.09, "grad_norm": 1.4417674541473389, "learning_rate": 4.546804483913233e-06, "log_odds_chosen": 0.845604419708252, "log_odds_ratio": -0.5084959268569946, "logits/chosen": -0.4493161141872406, "logits/rejected": -0.4819253981113434, "logps/chosen": -0.8610888719558716, "logps/rejected": -1.4244296550750732, "loss": 0.9825, "nll_loss": 0.9316762685775757, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08610888570547104, "rewards/margins": 0.05633409693837166, "rewards/rejected": -0.142442986369133, "step": 6030 }, { "epoch": 1.09, "grad_norm": 0.7863137722015381, "learning_rate": 4.540981219973795e-06, "log_odds_chosen": 0.6155441403388977, "log_odds_ratio": -0.6100500226020813, "logits/chosen": -0.4968458116054535, "logits/rejected": -0.4958006739616394, "logps/chosen": -0.9442992210388184, "logps/rejected": -1.3343608379364014, "loss": 0.9896, "nll_loss": 0.9286164045333862, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09442992508411407, "rewards/margins": 0.039006151258945465, "rewards/rejected": -0.13343606889247894, "step": 6040 }, { "epoch": 1.09, "grad_norm": 1.2931441068649292, "learning_rate": 4.535157956034357e-06, "log_odds_chosen": 0.8838874697685242, "log_odds_ratio": -0.5274507999420166, "logits/chosen": -0.4653933644294739, "logits/rejected": -0.5074115991592407, "logps/chosen": -0.9719902276992798, "logps/rejected": -1.5905590057373047, "loss": 0.9916, "nll_loss": 0.9388928413391113, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09719902276992798, "rewards/margins": 0.06185689568519592, "rewards/rejected": -0.1590559035539627, "step": 6050 }, { "epoch": 1.09, "grad_norm": 1.2712411880493164, "learning_rate": 4.529334692094919e-06, "log_odds_chosen": 0.7494404315948486, "log_odds_ratio": -0.5353826284408569, "logits/chosen": -0.3938870429992676, "logits/rejected": -0.45134368538856506, "logps/chosen": -0.9210939407348633, "logps/rejected": -1.441649317741394, "loss": 0.9476, "nll_loss": 0.8940416574478149, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09210939705371857, "rewards/margins": 0.05205554515123367, "rewards/rejected": -0.14416493475437164, "step": 6060 }, { "epoch": 1.1, "grad_norm": 1.6904181241989136, "learning_rate": 4.523511428155481e-06, "log_odds_chosen": 1.1151350736618042, "log_odds_ratio": -0.4561994671821594, "logits/chosen": -0.43357786536216736, "logits/rejected": -0.4941074848175049, "logps/chosen": -0.9114344716072083, "logps/rejected": -1.6274280548095703, "loss": 0.9271, "nll_loss": 0.881447434425354, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09114344418048859, "rewards/margins": 0.07159935683012009, "rewards/rejected": -0.16274279356002808, "step": 6070 }, { "epoch": 1.1, "grad_norm": 1.0081928968429565, "learning_rate": 4.517688164216043e-06, "log_odds_chosen": 0.88865727186203, "log_odds_ratio": -0.5304638147354126, "logits/chosen": -0.4526711404323578, "logits/rejected": -0.46278172731399536, "logps/chosen": -0.9050025939941406, "logps/rejected": -1.5034633874893188, "loss": 0.9395, "nll_loss": 0.8864428400993347, "rewards/accuracies": 0.625, "rewards/chosen": -0.09050027281045914, "rewards/margins": 0.05984606221318245, "rewards/rejected": -0.15034635365009308, "step": 6080 }, { "epoch": 1.1, "grad_norm": 1.6177700757980347, "learning_rate": 4.5118649002766055e-06, "log_odds_chosen": 1.024202585220337, "log_odds_ratio": -0.4576869606971741, "logits/chosen": -0.4027012288570404, "logits/rejected": -0.4568304121494293, "logps/chosen": -0.8995013236999512, "logps/rejected": -1.5450494289398193, "loss": 0.9174, "nll_loss": 0.8716768026351929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08995013684034348, "rewards/margins": 0.06455481052398682, "rewards/rejected": -0.1545049250125885, "step": 6090 }, { "epoch": 1.1, "grad_norm": 0.9513935446739197, "learning_rate": 4.506041636337166e-06, "log_odds_chosen": 1.1876469850540161, "log_odds_ratio": -0.48842042684555054, "logits/chosen": -0.46368637681007385, "logits/rejected": -0.4937184453010559, "logps/chosen": -0.9262706637382507, "logps/rejected": -1.740277886390686, "loss": 0.9675, "nll_loss": 0.918613612651825, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09262706339359283, "rewards/margins": 0.08140072971582413, "rewards/rejected": -0.17402780055999756, "step": 6100 }, { "epoch": 1.1, "grad_norm": 1.707017183303833, "learning_rate": 4.500218372397728e-06, "log_odds_chosen": 0.9699726104736328, "log_odds_ratio": -0.5156748294830322, "logits/chosen": -0.4302978515625, "logits/rejected": -0.44206157326698303, "logps/chosen": -0.9146237373352051, "logps/rejected": -1.5761725902557373, "loss": 0.9557, "nll_loss": 0.9041454195976257, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09146237373352051, "rewards/margins": 0.06615491211414337, "rewards/rejected": -0.15761728584766388, "step": 6110 }, { "epoch": 1.11, "grad_norm": 1.1624763011932373, "learning_rate": 4.494395108458291e-06, "log_odds_chosen": 1.2337392568588257, "log_odds_ratio": -0.4633565843105316, "logits/chosen": -0.45803871750831604, "logits/rejected": -0.5049036741256714, "logps/chosen": -0.8313882946968079, "logps/rejected": -1.6546128988265991, "loss": 0.974, "nll_loss": 0.9276957511901855, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08313882350921631, "rewards/margins": 0.08232248574495316, "rewards/rejected": -0.16546130180358887, "step": 6120 }, { "epoch": 1.11, "grad_norm": 1.7360762357711792, "learning_rate": 4.488571844518852e-06, "log_odds_chosen": 0.7800405621528625, "log_odds_ratio": -0.5640308856964111, "logits/chosen": -0.48888611793518066, "logits/rejected": -0.526140034198761, "logps/chosen": -0.8599061965942383, "logps/rejected": -1.3839830160140991, "loss": 0.9853, "nll_loss": 0.9288476705551147, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08599063009023666, "rewards/margins": 0.05240767449140549, "rewards/rejected": -0.13839831948280334, "step": 6130 }, { "epoch": 1.11, "grad_norm": 0.8478294014930725, "learning_rate": 4.4827485805794145e-06, "log_odds_chosen": 0.8905304074287415, "log_odds_ratio": -0.5386573672294617, "logits/chosen": -0.4045413136482239, "logits/rejected": -0.4013892710208893, "logps/chosen": -0.9712546467781067, "logps/rejected": -1.5939693450927734, "loss": 0.9416, "nll_loss": 0.887769877910614, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09712545573711395, "rewards/margins": 0.062271494418382645, "rewards/rejected": -0.1593969464302063, "step": 6140 }, { "epoch": 1.11, "grad_norm": 1.7147661447525024, "learning_rate": 4.476925316639977e-06, "log_odds_chosen": 0.8515718579292297, "log_odds_ratio": -0.5489001274108887, "logits/chosen": -0.4691835343837738, "logits/rejected": -0.5160794258117676, "logps/chosen": -1.0208523273468018, "logps/rejected": -1.6025129556655884, "loss": 1.0615, "nll_loss": 1.0066022872924805, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10208523273468018, "rewards/margins": 0.05816606804728508, "rewards/rejected": -0.16025128960609436, "step": 6150 }, { "epoch": 1.11, "grad_norm": 1.0859298706054688, "learning_rate": 4.471102052700538e-06, "log_odds_chosen": 0.8427707552909851, "log_odds_ratio": -0.5371182560920715, "logits/chosen": -0.4537859559059143, "logits/rejected": -0.4494473338127136, "logps/chosen": -0.9272929430007935, "logps/rejected": -1.486228108406067, "loss": 1.0053, "nll_loss": 0.9515641927719116, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09272929280996323, "rewards/margins": 0.05589351803064346, "rewards/rejected": -0.1486227959394455, "step": 6160 }, { "epoch": 1.11, "grad_norm": 1.7710280418395996, "learning_rate": 4.465278788761101e-06, "log_odds_chosen": 0.7671918869018555, "log_odds_ratio": -0.5367628335952759, "logits/chosen": -0.4476463794708252, "logits/rejected": -0.45224714279174805, "logps/chosen": -1.0947606563568115, "logps/rejected": -1.627105951309204, "loss": 1.0101, "nll_loss": 0.956439197063446, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10947606712579727, "rewards/margins": 0.053234536200761795, "rewards/rejected": -0.16271059215068817, "step": 6170 }, { "epoch": 1.12, "grad_norm": 1.216435432434082, "learning_rate": 4.459455524821663e-06, "log_odds_chosen": 0.6147326827049255, "log_odds_ratio": -0.5704823732376099, "logits/chosen": -0.4661136269569397, "logits/rejected": -0.46458595991134644, "logps/chosen": -0.9244877099990845, "logps/rejected": -1.3198649883270264, "loss": 0.9443, "nll_loss": 0.8872181177139282, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09244877099990845, "rewards/margins": 0.03953772783279419, "rewards/rejected": -0.13198649883270264, "step": 6180 }, { "epoch": 1.12, "grad_norm": 2.7578301429748535, "learning_rate": 4.4536322608822235e-06, "log_odds_chosen": 0.7873707413673401, "log_odds_ratio": -0.5360560417175293, "logits/chosen": -0.4558858871459961, "logits/rejected": -0.49239593744277954, "logps/chosen": -0.967001736164093, "logps/rejected": -1.5444661378860474, "loss": 0.9927, "nll_loss": 0.9390754699707031, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09670017659664154, "rewards/margins": 0.05774643272161484, "rewards/rejected": -0.15444661676883698, "step": 6190 }, { "epoch": 1.12, "grad_norm": 1.234567403793335, "learning_rate": 4.447808996942786e-06, "log_odds_chosen": 0.7407630681991577, "log_odds_ratio": -0.5109227895736694, "logits/chosen": -0.4580449163913727, "logits/rejected": -0.46283191442489624, "logps/chosen": -0.891004741191864, "logps/rejected": -1.3632593154907227, "loss": 0.9524, "nll_loss": 0.9013134837150574, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08910048007965088, "rewards/margins": 0.0472254678606987, "rewards/rejected": -0.13632594048976898, "step": 6200 }, { "epoch": 1.12, "grad_norm": 0.999319314956665, "learning_rate": 4.441985733003348e-06, "log_odds_chosen": 0.8994671106338501, "log_odds_ratio": -0.5333329439163208, "logits/chosen": -0.48216262459754944, "logits/rejected": -0.44915810227394104, "logps/chosen": -0.9370241165161133, "logps/rejected": -1.5314620733261108, "loss": 0.87, "nll_loss": 0.8166621327400208, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09370241314172745, "rewards/margins": 0.05944380164146423, "rewards/rejected": -0.15314622223377228, "step": 6210 }, { "epoch": 1.12, "grad_norm": 1.658113956451416, "learning_rate": 4.4361624690639104e-06, "log_odds_chosen": 0.9446815252304077, "log_odds_ratio": -0.4770669937133789, "logits/chosen": -0.44138193130493164, "logits/rejected": -0.4626283049583435, "logps/chosen": -0.8511655926704407, "logps/rejected": -1.460635781288147, "loss": 0.8938, "nll_loss": 0.8460577726364136, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08511656522750854, "rewards/margins": 0.06094701960682869, "rewards/rejected": -0.14606359601020813, "step": 6220 }, { "epoch": 1.13, "grad_norm": 1.3590821027755737, "learning_rate": 4.430339205124472e-06, "log_odds_chosen": 0.7557743787765503, "log_odds_ratio": -0.5099378824234009, "logits/chosen": -0.41205430030822754, "logits/rejected": -0.48370322585105896, "logps/chosen": -0.9024174809455872, "logps/rejected": -1.3916748762130737, "loss": 0.9609, "nll_loss": 0.9099496603012085, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09024176001548767, "rewards/margins": 0.0489257350564003, "rewards/rejected": -0.13916750252246857, "step": 6230 }, { "epoch": 1.13, "grad_norm": 1.4263062477111816, "learning_rate": 4.424515941185034e-06, "log_odds_chosen": 0.9851778745651245, "log_odds_ratio": -0.5378494262695312, "logits/chosen": -0.4365876615047455, "logits/rejected": -0.4904526174068451, "logps/chosen": -0.9848604202270508, "logps/rejected": -1.7028894424438477, "loss": 1.0141, "nll_loss": 0.9603080749511719, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0984860435128212, "rewards/margins": 0.07180289924144745, "rewards/rejected": -0.17028896510601044, "step": 6240 }, { "epoch": 1.13, "grad_norm": 1.0578852891921997, "learning_rate": 4.4186926772455965e-06, "log_odds_chosen": 0.8757988214492798, "log_odds_ratio": -0.5127619504928589, "logits/chosen": -0.41092929244041443, "logits/rejected": -0.4253155589103699, "logps/chosen": -0.8159357309341431, "logps/rejected": -1.3599027395248413, "loss": 0.915, "nll_loss": 0.8637593984603882, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0815935730934143, "rewards/margins": 0.054396700114011765, "rewards/rejected": -0.13599026203155518, "step": 6250 }, { "epoch": 1.13, "grad_norm": 1.2675119638442993, "learning_rate": 4.412869413306158e-06, "log_odds_chosen": 1.1324522495269775, "log_odds_ratio": -0.48209959268569946, "logits/chosen": -0.38643237948417664, "logits/rejected": -0.4416092336177826, "logps/chosen": -0.8294364213943481, "logps/rejected": -1.6051595211029053, "loss": 0.8791, "nll_loss": 0.8309270739555359, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08294364809989929, "rewards/margins": 0.07757231593132019, "rewards/rejected": -0.16051596403121948, "step": 6260 }, { "epoch": 1.13, "grad_norm": 1.1435970067977905, "learning_rate": 4.40704614936672e-06, "log_odds_chosen": 0.7152145504951477, "log_odds_ratio": -0.5820814371109009, "logits/chosen": -0.4874646067619324, "logits/rejected": -0.5094522833824158, "logps/chosen": -0.9671697616577148, "logps/rejected": -1.4383320808410645, "loss": 1.0701, "nll_loss": 1.011844277381897, "rewards/accuracies": 0.625, "rewards/chosen": -0.0967169851064682, "rewards/margins": 0.0471162274479866, "rewards/rejected": -0.1438332051038742, "step": 6270 }, { "epoch": 1.13, "grad_norm": 1.9420355558395386, "learning_rate": 4.401222885427282e-06, "log_odds_chosen": 0.9026612043380737, "log_odds_ratio": -0.5407354831695557, "logits/chosen": -0.42870211601257324, "logits/rejected": -0.45270299911499023, "logps/chosen": -0.9005454778671265, "logps/rejected": -1.4829877614974976, "loss": 0.9653, "nll_loss": 0.9112182855606079, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09005454927682877, "rewards/margins": 0.058244235813617706, "rewards/rejected": -0.14829877018928528, "step": 6280 }, { "epoch": 1.14, "grad_norm": 1.3078311681747437, "learning_rate": 4.395399621487843e-06, "log_odds_chosen": 0.9692419767379761, "log_odds_ratio": -0.5271007418632507, "logits/chosen": -0.49861112236976624, "logits/rejected": -0.4814334809780121, "logps/chosen": -0.8934978246688843, "logps/rejected": -1.5919220447540283, "loss": 0.9152, "nll_loss": 0.8624576330184937, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08934978395700455, "rewards/margins": 0.06984242051839828, "rewards/rejected": -0.15919220447540283, "step": 6290 }, { "epoch": 1.14, "grad_norm": 1.0360865592956543, "learning_rate": 4.3895763575484055e-06, "log_odds_chosen": 1.2350679636001587, "log_odds_ratio": -0.4851533770561218, "logits/chosen": -0.38407033681869507, "logits/rejected": -0.44652214646339417, "logps/chosen": -0.7998726963996887, "logps/rejected": -1.5704978704452515, "loss": 0.8281, "nll_loss": 0.7795952558517456, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07998727262020111, "rewards/margins": 0.07706251740455627, "rewards/rejected": -0.15704980492591858, "step": 6300 }, { "epoch": 1.14, "grad_norm": 1.2178974151611328, "learning_rate": 4.383753093608968e-06, "log_odds_chosen": 0.7196434736251831, "log_odds_ratio": -0.6056947708129883, "logits/chosen": -0.5137656331062317, "logits/rejected": -0.5180788636207581, "logps/chosen": -1.0398476123809814, "logps/rejected": -1.576139211654663, "loss": 1.023, "nll_loss": 0.9624187350273132, "rewards/accuracies": 0.625, "rewards/chosen": -0.10398473590612411, "rewards/margins": 0.05362917110323906, "rewards/rejected": -0.15761391818523407, "step": 6310 }, { "epoch": 1.14, "grad_norm": 1.1940643787384033, "learning_rate": 4.377929829669529e-06, "log_odds_chosen": 0.6526185274124146, "log_odds_ratio": -0.6317920088768005, "logits/chosen": -0.4650532603263855, "logits/rejected": -0.48263511061668396, "logps/chosen": -0.9457101821899414, "logps/rejected": -1.3629834651947021, "loss": 0.9974, "nll_loss": 0.9341708421707153, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09457103163003922, "rewards/margins": 0.04172731563448906, "rewards/rejected": -0.13629834353923798, "step": 6320 }, { "epoch": 1.14, "grad_norm": 2.10884165763855, "learning_rate": 4.372106565730092e-06, "log_odds_chosen": 0.7424247860908508, "log_odds_ratio": -0.5436392426490784, "logits/chosen": -0.4434364438056946, "logits/rejected": -0.47471094131469727, "logps/chosen": -0.9834851026535034, "logps/rejected": -1.4713070392608643, "loss": 0.9389, "nll_loss": 0.8845357894897461, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09834851324558258, "rewards/margins": 0.048782214522361755, "rewards/rejected": -0.14713071286678314, "step": 6330 }, { "epoch": 1.15, "grad_norm": 1.1214773654937744, "learning_rate": 4.366283301790654e-06, "log_odds_chosen": 0.8600967526435852, "log_odds_ratio": -0.5864149332046509, "logits/chosen": -0.488150030374527, "logits/rejected": -0.4834933280944824, "logps/chosen": -0.8966549038887024, "logps/rejected": -1.4617177248001099, "loss": 0.9967, "nll_loss": 0.938027024269104, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.089665487408638, "rewards/margins": 0.056506287306547165, "rewards/rejected": -0.14617177844047546, "step": 6340 }, { "epoch": 1.15, "grad_norm": 1.8667078018188477, "learning_rate": 4.360460037851215e-06, "log_odds_chosen": 0.8151102066040039, "log_odds_ratio": -0.5495513081550598, "logits/chosen": -0.4086511731147766, "logits/rejected": -0.4186836779117584, "logps/chosen": -0.9379664659500122, "logps/rejected": -1.51735520362854, "loss": 0.9226, "nll_loss": 0.8676198124885559, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09379664808511734, "rewards/margins": 0.05793887376785278, "rewards/rejected": -0.15173551440238953, "step": 6350 }, { "epoch": 1.15, "grad_norm": 1.1724246740341187, "learning_rate": 4.354636773911778e-06, "log_odds_chosen": 0.8040224313735962, "log_odds_ratio": -0.5135282278060913, "logits/chosen": -0.4123757779598236, "logits/rejected": -0.42883196473121643, "logps/chosen": -0.8877067565917969, "logps/rejected": -1.4244158267974854, "loss": 0.9228, "nll_loss": 0.871476948261261, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08877068012952805, "rewards/margins": 0.05367090553045273, "rewards/rejected": -0.14244157075881958, "step": 6360 }, { "epoch": 1.15, "grad_norm": 0.7838314771652222, "learning_rate": 4.348813509972339e-06, "log_odds_chosen": 1.0577952861785889, "log_odds_ratio": -0.5264900326728821, "logits/chosen": -0.41539543867111206, "logits/rejected": -0.44596344232559204, "logps/chosen": -0.8569475412368774, "logps/rejected": -1.5782248973846436, "loss": 0.9204, "nll_loss": 0.8677042126655579, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08569475263357162, "rewards/margins": 0.07212773710489273, "rewards/rejected": -0.15782250463962555, "step": 6370 }, { "epoch": 1.15, "grad_norm": 1.7537171840667725, "learning_rate": 4.342990246032901e-06, "log_odds_chosen": 0.8003638982772827, "log_odds_ratio": -0.5795815587043762, "logits/chosen": -0.4575144648551941, "logits/rejected": -0.4661421775817871, "logps/chosen": -0.9662486910820007, "logps/rejected": -1.540056824684143, "loss": 0.9709, "nll_loss": 0.9129317998886108, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09662487357854843, "rewards/margins": 0.05738081410527229, "rewards/rejected": -0.15400567650794983, "step": 6380 }, { "epoch": 1.15, "grad_norm": 1.3851758241653442, "learning_rate": 4.337166982093463e-06, "log_odds_chosen": 1.0264556407928467, "log_odds_ratio": -0.4926171898841858, "logits/chosen": -0.3985103964805603, "logits/rejected": -0.43339523673057556, "logps/chosen": -0.907995343208313, "logps/rejected": -1.6010644435882568, "loss": 0.9261, "nll_loss": 0.8768340945243835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09079953283071518, "rewards/margins": 0.06930691003799438, "rewards/rejected": -0.16010645031929016, "step": 6390 }, { "epoch": 1.16, "grad_norm": 1.298351764678955, "learning_rate": 4.331343718154025e-06, "log_odds_chosen": 0.7900893092155457, "log_odds_ratio": -0.5369637608528137, "logits/chosen": -0.4567854404449463, "logits/rejected": -0.43872490525245667, "logps/chosen": -0.942980170249939, "logps/rejected": -1.4829621315002441, "loss": 0.9717, "nll_loss": 0.9179746508598328, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09429802000522614, "rewards/margins": 0.0539981946349144, "rewards/rejected": -0.14829620718955994, "step": 6400 }, { "epoch": 1.16, "grad_norm": 1.8827316761016846, "learning_rate": 4.325520454214587e-06, "log_odds_chosen": 1.0105141401290894, "log_odds_ratio": -0.44694510102272034, "logits/chosen": -0.4560214877128601, "logits/rejected": -0.48660048842430115, "logps/chosen": -0.872963547706604, "logps/rejected": -1.478611946105957, "loss": 0.8971, "nll_loss": 0.8524263501167297, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08729635179042816, "rewards/margins": 0.06056482344865799, "rewards/rejected": -0.14786118268966675, "step": 6410 }, { "epoch": 1.16, "grad_norm": 1.138771891593933, "learning_rate": 4.319697190275149e-06, "log_odds_chosen": 0.9309137463569641, "log_odds_ratio": -0.5124486684799194, "logits/chosen": -0.4205206334590912, "logits/rejected": -0.46138858795166016, "logps/chosen": -0.9748755693435669, "logps/rejected": -1.6254541873931885, "loss": 0.9367, "nll_loss": 0.8854933977127075, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09748755395412445, "rewards/margins": 0.06505786627531052, "rewards/rejected": -0.16254541277885437, "step": 6420 }, { "epoch": 1.16, "grad_norm": 1.4133132696151733, "learning_rate": 4.313873926335711e-06, "log_odds_chosen": 1.0693262815475464, "log_odds_ratio": -0.5076473951339722, "logits/chosen": -0.3518396317958832, "logits/rejected": -0.38446587324142456, "logps/chosen": -0.81288081407547, "logps/rejected": -1.5248903036117554, "loss": 0.8726, "nll_loss": 0.82184237241745, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08128808438777924, "rewards/margins": 0.07120096683502197, "rewards/rejected": -0.15248903632164001, "step": 6430 }, { "epoch": 1.16, "grad_norm": 2.3196606636047363, "learning_rate": 4.308050662396274e-06, "log_odds_chosen": 0.6822506189346313, "log_odds_ratio": -0.6364583969116211, "logits/chosen": -0.47918400168418884, "logits/rejected": -0.49134689569473267, "logps/chosen": -1.0488958358764648, "logps/rejected": -1.5445834398269653, "loss": 1.072, "nll_loss": 1.0083352327346802, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10488957166671753, "rewards/margins": 0.049568768590688705, "rewards/rejected": -0.15445835888385773, "step": 6440 }, { "epoch": 1.17, "grad_norm": 2.130206346511841, "learning_rate": 4.302227398456835e-06, "log_odds_chosen": 0.9953344464302063, "log_odds_ratio": -0.5044914484024048, "logits/chosen": -0.43052592873573303, "logits/rejected": -0.47344738245010376, "logps/chosen": -0.885372519493103, "logps/rejected": -1.5817621946334839, "loss": 0.9541, "nll_loss": 0.9036803245544434, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08853726089000702, "rewards/margins": 0.06963896006345749, "rewards/rejected": -0.1581762135028839, "step": 6450 }, { "epoch": 1.17, "grad_norm": 1.7047734260559082, "learning_rate": 4.2964041345173965e-06, "log_odds_chosen": 0.40977612137794495, "log_odds_ratio": -0.6772114038467407, "logits/chosen": -0.46431097388267517, "logits/rejected": -0.4783390462398529, "logps/chosen": -0.9744992256164551, "logps/rejected": -1.2869319915771484, "loss": 1.0318, "nll_loss": 0.964094340801239, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09744994342327118, "rewards/margins": 0.031243273988366127, "rewards/rejected": -0.12869320809841156, "step": 6460 }, { "epoch": 1.17, "grad_norm": 1.5343431234359741, "learning_rate": 4.290580870577959e-06, "log_odds_chosen": 1.0222392082214355, "log_odds_ratio": -0.5148922204971313, "logits/chosen": -0.44463086128234863, "logits/rejected": -0.47433581948280334, "logps/chosen": -0.8703458905220032, "logps/rejected": -1.5388939380645752, "loss": 0.9594, "nll_loss": 0.9079081416130066, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08703459799289703, "rewards/margins": 0.0668548122048378, "rewards/rejected": -0.15388940274715424, "step": 6470 }, { "epoch": 1.17, "grad_norm": 0.9783145189285278, "learning_rate": 4.28475760663852e-06, "log_odds_chosen": 1.05240797996521, "log_odds_ratio": -0.47812169790267944, "logits/chosen": -0.4410451054573059, "logits/rejected": -0.46557217836380005, "logps/chosen": -0.836539089679718, "logps/rejected": -1.5610973834991455, "loss": 0.8759, "nll_loss": 0.8281365633010864, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08365390449762344, "rewards/margins": 0.07245583832263947, "rewards/rejected": -0.1561097502708435, "step": 6480 }, { "epoch": 1.17, "grad_norm": 3.253295660018921, "learning_rate": 4.278934342699083e-06, "log_odds_chosen": 0.8776898384094238, "log_odds_ratio": -0.4985920488834381, "logits/chosen": -0.4755215048789978, "logits/rejected": -0.48895248770713806, "logps/chosen": -0.9190085530281067, "logps/rejected": -1.5400559902191162, "loss": 0.9548, "nll_loss": 0.9049298167228699, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09190084040164948, "rewards/margins": 0.062104739248752594, "rewards/rejected": -0.15400558710098267, "step": 6490 }, { "epoch": 1.17, "grad_norm": 0.9343768954277039, "learning_rate": 4.273111078759645e-06, "log_odds_chosen": 0.6879677176475525, "log_odds_ratio": -0.5824822187423706, "logits/chosen": -0.47967013716697693, "logits/rejected": -0.4678632616996765, "logps/chosen": -0.9688738584518433, "logps/rejected": -1.4536980390548706, "loss": 1.0141, "nll_loss": 0.9558396339416504, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09688737988471985, "rewards/margins": 0.04848243668675423, "rewards/rejected": -0.14536981284618378, "step": 6500 }, { "epoch": 1.18, "grad_norm": 1.6471149921417236, "learning_rate": 4.267287814820206e-06, "log_odds_chosen": 1.1193538904190063, "log_odds_ratio": -0.47517600655555725, "logits/chosen": -0.45033377408981323, "logits/rejected": -0.47791576385498047, "logps/chosen": -0.9069080352783203, "logps/rejected": -1.702310562133789, "loss": 0.9214, "nll_loss": 0.8739102482795715, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09069080650806427, "rewards/margins": 0.07954025268554688, "rewards/rejected": -0.17023104429244995, "step": 6510 }, { "epoch": 1.18, "grad_norm": 2.0032148361206055, "learning_rate": 4.261464550880769e-06, "log_odds_chosen": 0.950029194355011, "log_odds_ratio": -0.5132050514221191, "logits/chosen": -0.4736092984676361, "logits/rejected": -0.4746457040309906, "logps/chosen": -0.9412399530410767, "logps/rejected": -1.6105903387069702, "loss": 0.9592, "nll_loss": 0.9078750610351562, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09412400424480438, "rewards/margins": 0.06693503260612488, "rewards/rejected": -0.16105900704860687, "step": 6520 }, { "epoch": 1.18, "grad_norm": 2.7363672256469727, "learning_rate": 4.255641286941331e-06, "log_odds_chosen": 0.6849262118339539, "log_odds_ratio": -0.5214493870735168, "logits/chosen": -0.4941480755805969, "logits/rejected": -0.49812883138656616, "logps/chosen": -0.9730516672134399, "logps/rejected": -1.425528883934021, "loss": 1.0632, "nll_loss": 1.0110845565795898, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09730516374111176, "rewards/margins": 0.045247726142406464, "rewards/rejected": -0.14255289733409882, "step": 6530 }, { "epoch": 1.18, "grad_norm": 0.7267311215400696, "learning_rate": 4.2498180230018925e-06, "log_odds_chosen": 0.8294305801391602, "log_odds_ratio": -0.5074676275253296, "logits/chosen": -0.4479546546936035, "logits/rejected": -0.469372034072876, "logps/chosen": -0.9526723623275757, "logps/rejected": -1.5356619358062744, "loss": 0.9706, "nll_loss": 0.9198722839355469, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09526724368333817, "rewards/margins": 0.05829895660281181, "rewards/rejected": -0.15356619656085968, "step": 6540 }, { "epoch": 1.18, "grad_norm": 0.9771788120269775, "learning_rate": 4.243994759062454e-06, "log_odds_chosen": 0.8805567026138306, "log_odds_ratio": -0.5365056395530701, "logits/chosen": -0.3927402198314667, "logits/rejected": -0.45152372121810913, "logps/chosen": -0.8411895036697388, "logps/rejected": -1.3840970993041992, "loss": 0.9869, "nll_loss": 0.9332119226455688, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08411894738674164, "rewards/margins": 0.054290771484375, "rewards/rejected": -0.13840971887111664, "step": 6550 }, { "epoch": 1.18, "grad_norm": 2.6478631496429443, "learning_rate": 4.238171495123016e-06, "log_odds_chosen": 0.5081368684768677, "log_odds_ratio": -0.5900481343269348, "logits/chosen": -0.4167002737522125, "logits/rejected": -0.4424230456352234, "logps/chosen": -0.9873155355453491, "logps/rejected": -1.3592678308486938, "loss": 0.9531, "nll_loss": 0.8941276669502258, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09873154759407043, "rewards/margins": 0.03719523549079895, "rewards/rejected": -0.13592679798603058, "step": 6560 }, { "epoch": 1.19, "grad_norm": 1.5292366743087769, "learning_rate": 4.232348231183578e-06, "log_odds_chosen": 1.2604739665985107, "log_odds_ratio": -0.4504130482673645, "logits/chosen": -0.41908130049705505, "logits/rejected": -0.4867452085018158, "logps/chosen": -0.9487060308456421, "logps/rejected": -1.8027664422988892, "loss": 0.9882, "nll_loss": 0.9431636929512024, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09487061202526093, "rewards/margins": 0.08540603518486023, "rewards/rejected": -0.18027664721012115, "step": 6570 }, { "epoch": 1.19, "grad_norm": 1.120796799659729, "learning_rate": 4.22652496724414e-06, "log_odds_chosen": 0.9827529788017273, "log_odds_ratio": -0.5386685132980347, "logits/chosen": -0.4483468532562256, "logits/rejected": -0.46654707193374634, "logps/chosen": -0.8629282712936401, "logps/rejected": -1.5766785144805908, "loss": 0.9345, "nll_loss": 0.8805925250053406, "rewards/accuracies": 0.625, "rewards/chosen": -0.08629283308982849, "rewards/margins": 0.07137502729892731, "rewards/rejected": -0.1576678454875946, "step": 6580 }, { "epoch": 1.19, "grad_norm": 1.3530491590499878, "learning_rate": 4.220701703304702e-06, "log_odds_chosen": 0.8309918642044067, "log_odds_ratio": -0.539421558380127, "logits/chosen": -0.47228020429611206, "logits/rejected": -0.4613127112388611, "logps/chosen": -0.9116449356079102, "logps/rejected": -1.4743530750274658, "loss": 0.8868, "nll_loss": 0.832877516746521, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.0911644995212555, "rewards/margins": 0.05627080798149109, "rewards/rejected": -0.14743532240390778, "step": 6590 }, { "epoch": 1.19, "grad_norm": 1.0767103433609009, "learning_rate": 4.214878439365264e-06, "log_odds_chosen": 0.8759848475456238, "log_odds_ratio": -0.49737709760665894, "logits/chosen": -0.43695569038391113, "logits/rejected": -0.47424954175949097, "logps/chosen": -0.9387688636779785, "logps/rejected": -1.5360345840454102, "loss": 0.9592, "nll_loss": 0.9094923734664917, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09387689083814621, "rewards/margins": 0.059726566076278687, "rewards/rejected": -0.1536034494638443, "step": 6600 }, { "epoch": 1.19, "grad_norm": 1.734474778175354, "learning_rate": 4.209055175425826e-06, "log_odds_chosen": 1.0728756189346313, "log_odds_ratio": -0.45263487100601196, "logits/chosen": -0.43640464544296265, "logits/rejected": -0.46173277497291565, "logps/chosen": -0.8198699951171875, "logps/rejected": -1.4813389778137207, "loss": 0.9697, "nll_loss": 0.9244702458381653, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08198700100183487, "rewards/margins": 0.06614689528942108, "rewards/rejected": -0.14813390374183655, "step": 6610 }, { "epoch": 1.2, "grad_norm": 1.4955604076385498, "learning_rate": 4.203231911486388e-06, "log_odds_chosen": 0.6764736771583557, "log_odds_ratio": -0.5236061811447144, "logits/chosen": -0.5202940702438354, "logits/rejected": -0.4843006134033203, "logps/chosen": -0.9377977252006531, "logps/rejected": -1.3767682313919067, "loss": 0.983, "nll_loss": 0.9306391477584839, "rewards/accuracies": 0.75, "rewards/chosen": -0.09377976506948471, "rewards/margins": 0.043897055089473724, "rewards/rejected": -0.13767683506011963, "step": 6620 }, { "epoch": 1.2, "grad_norm": 1.9507471323013306, "learning_rate": 4.197408647546949e-06, "log_odds_chosen": 0.7487798929214478, "log_odds_ratio": -0.5690258741378784, "logits/chosen": -0.4605821669101715, "logits/rejected": -0.45187997817993164, "logps/chosen": -0.9927385449409485, "logps/rejected": -1.4855639934539795, "loss": 0.9683, "nll_loss": 0.9114102125167847, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09927386045455933, "rewards/margins": 0.04928254336118698, "rewards/rejected": -0.1485564112663269, "step": 6630 }, { "epoch": 1.2, "grad_norm": 1.5568227767944336, "learning_rate": 4.191585383607511e-06, "log_odds_chosen": 0.9163190722465515, "log_odds_ratio": -0.49040189385414124, "logits/chosen": -0.42363911867141724, "logits/rejected": -0.4508935511112213, "logps/chosen": -0.9086519479751587, "logps/rejected": -1.5187523365020752, "loss": 0.968, "nll_loss": 0.9189130067825317, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09086520224809647, "rewards/margins": 0.06101004406809807, "rewards/rejected": -0.15187524259090424, "step": 6640 }, { "epoch": 1.2, "grad_norm": 0.8964611291885376, "learning_rate": 4.185762119668074e-06, "log_odds_chosen": 0.7944979071617126, "log_odds_ratio": -0.5299047231674194, "logits/chosen": -0.3896244168281555, "logits/rejected": -0.4423930048942566, "logps/chosen": -0.9034556150436401, "logps/rejected": -1.4044759273529053, "loss": 0.8695, "nll_loss": 0.8164774775505066, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09034556150436401, "rewards/margins": 0.05010201781988144, "rewards/rejected": -0.14044758677482605, "step": 6650 }, { "epoch": 1.2, "grad_norm": 1.2127312421798706, "learning_rate": 4.179938855728636e-06, "log_odds_chosen": 0.7413554191589355, "log_odds_ratio": -0.5506377220153809, "logits/chosen": -0.4648379385471344, "logits/rejected": -0.4867902398109436, "logps/chosen": -0.9216586947441101, "logps/rejected": -1.426811933517456, "loss": 0.9425, "nll_loss": 0.8874050378799438, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09216587245464325, "rewards/margins": 0.0505153127014637, "rewards/rejected": -0.14268119633197784, "step": 6660 }, { "epoch": 1.2, "grad_norm": 1.0402570962905884, "learning_rate": 4.1741155917891974e-06, "log_odds_chosen": 0.9177875518798828, "log_odds_ratio": -0.5133272409439087, "logits/chosen": -0.3900728225708008, "logits/rejected": -0.4346277117729187, "logps/chosen": -0.9603956937789917, "logps/rejected": -1.5792925357818604, "loss": 0.9434, "nll_loss": 0.8920313715934753, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09603957086801529, "rewards/margins": 0.06188970059156418, "rewards/rejected": -0.15792927145957947, "step": 6670 }, { "epoch": 1.21, "grad_norm": 1.1733165979385376, "learning_rate": 4.16829232784976e-06, "log_odds_chosen": 0.5979320406913757, "log_odds_ratio": -0.6295372843742371, "logits/chosen": -0.46904540061950684, "logits/rejected": -0.47716307640075684, "logps/chosen": -0.9750441312789917, "logps/rejected": -1.390572428703308, "loss": 1.0124, "nll_loss": 0.9493964314460754, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09750442206859589, "rewards/margins": 0.0415528267621994, "rewards/rejected": -0.1390572488307953, "step": 6680 }, { "epoch": 1.21, "grad_norm": 1.071596622467041, "learning_rate": 4.162469063910322e-06, "log_odds_chosen": 0.8037103414535522, "log_odds_ratio": -0.5370471477508545, "logits/chosen": -0.4986805021762848, "logits/rejected": -0.4829865097999573, "logps/chosen": -0.9812761545181274, "logps/rejected": -1.547245979309082, "loss": 1.0159, "nll_loss": 0.9622408747673035, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09812761843204498, "rewards/margins": 0.05659698694944382, "rewards/rejected": -0.1547246128320694, "step": 6690 }, { "epoch": 1.21, "grad_norm": 0.7664772868156433, "learning_rate": 4.1566457999708835e-06, "log_odds_chosen": 0.9926007986068726, "log_odds_ratio": -0.5328630208969116, "logits/chosen": -0.4604420065879822, "logits/rejected": -0.4595687985420227, "logps/chosen": -1.0073707103729248, "logps/rejected": -1.7252197265625, "loss": 1.0122, "nll_loss": 0.9589058756828308, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1007370725274086, "rewards/margins": 0.07178490608930588, "rewards/rejected": -0.17252197861671448, "step": 6700 }, { "epoch": 1.21, "grad_norm": 1.2537119388580322, "learning_rate": 4.150822536031446e-06, "log_odds_chosen": 0.9483833312988281, "log_odds_ratio": -0.5262691378593445, "logits/chosen": -0.4464438557624817, "logits/rejected": -0.49215570092201233, "logps/chosen": -0.8856107592582703, "logps/rejected": -1.479950189590454, "loss": 0.9288, "nll_loss": 0.8761947751045227, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08856107294559479, "rewards/margins": 0.05943392589688301, "rewards/rejected": -0.1479950249195099, "step": 6710 }, { "epoch": 1.21, "grad_norm": 1.239740014076233, "learning_rate": 4.144999272092008e-06, "log_odds_chosen": 1.1764709949493408, "log_odds_ratio": -0.4934256970882416, "logits/chosen": -0.38612398505210876, "logits/rejected": -0.44745928049087524, "logps/chosen": -0.7867048382759094, "logps/rejected": -1.5437514781951904, "loss": 0.8604, "nll_loss": 0.811070442199707, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07867047935724258, "rewards/margins": 0.0757046714425087, "rewards/rejected": -0.15437515079975128, "step": 6720 }, { "epoch": 1.22, "grad_norm": 1.7960892915725708, "learning_rate": 4.139176008152569e-06, "log_odds_chosen": 0.7462882995605469, "log_odds_ratio": -0.6226873397827148, "logits/chosen": -0.49681800603866577, "logits/rejected": -0.510138988494873, "logps/chosen": -0.8982499837875366, "logps/rejected": -1.4710769653320312, "loss": 1.01, "nll_loss": 0.9477685689926147, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.08982499688863754, "rewards/margins": 0.0572826974093914, "rewards/rejected": -0.14710770547389984, "step": 6730 }, { "epoch": 1.22, "grad_norm": 1.6915690898895264, "learning_rate": 4.133352744213131e-06, "log_odds_chosen": 1.1176029443740845, "log_odds_ratio": -0.4465733468532562, "logits/chosen": -0.45315223932266235, "logits/rejected": -0.5397419333457947, "logps/chosen": -0.9248179197311401, "logps/rejected": -1.6554958820343018, "loss": 0.951, "nll_loss": 0.9063474535942078, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09248179197311401, "rewards/margins": 0.0730677992105484, "rewards/rejected": -0.1655496060848236, "step": 6740 }, { "epoch": 1.22, "grad_norm": 1.6438920497894287, "learning_rate": 4.127529480273693e-06, "log_odds_chosen": 0.9469677209854126, "log_odds_ratio": -0.5286572575569153, "logits/chosen": -0.4755041003227234, "logits/rejected": -0.4868949353694916, "logps/chosen": -0.8673686981201172, "logps/rejected": -1.5094770193099976, "loss": 0.9609, "nll_loss": 0.908068835735321, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08673687279224396, "rewards/margins": 0.06421081721782684, "rewards/rejected": -0.1509476900100708, "step": 6750 }, { "epoch": 1.22, "grad_norm": 1.2185676097869873, "learning_rate": 4.121706216334255e-06, "log_odds_chosen": 0.9891728162765503, "log_odds_ratio": -0.5200589299201965, "logits/chosen": -0.44195041060447693, "logits/rejected": -0.48202115297317505, "logps/chosen": -0.9076203107833862, "logps/rejected": -1.5685710906982422, "loss": 1.0175, "nll_loss": 0.965488076210022, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09076203405857086, "rewards/margins": 0.06609507650136948, "rewards/rejected": -0.15685710310935974, "step": 6760 }, { "epoch": 1.22, "grad_norm": 1.3335249423980713, "learning_rate": 4.115882952394817e-06, "log_odds_chosen": 0.9775427579879761, "log_odds_ratio": -0.48796018958091736, "logits/chosen": -0.484336793422699, "logits/rejected": -0.5003049373626709, "logps/chosen": -0.9130972027778625, "logps/rejected": -1.6077959537506104, "loss": 0.9418, "nll_loss": 0.8930259943008423, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09130971133708954, "rewards/margins": 0.0694698840379715, "rewards/rejected": -0.16077958047389984, "step": 6770 }, { "epoch": 1.22, "grad_norm": 1.6787357330322266, "learning_rate": 4.1100596884553794e-06, "log_odds_chosen": 0.9635372161865234, "log_odds_ratio": -0.5388367176055908, "logits/chosen": -0.4794275164604187, "logits/rejected": -0.5022672414779663, "logps/chosen": -0.9136490821838379, "logps/rejected": -1.597872018814087, "loss": 0.912, "nll_loss": 0.8581094741821289, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09136491268873215, "rewards/margins": 0.06842230260372162, "rewards/rejected": -0.15978720784187317, "step": 6780 }, { "epoch": 1.23, "grad_norm": 1.1251144409179688, "learning_rate": 4.104236424515941e-06, "log_odds_chosen": 0.8386086225509644, "log_odds_ratio": -0.5400186777114868, "logits/chosen": -0.4414609372615814, "logits/rejected": -0.46368885040283203, "logps/chosen": -0.9598379135131836, "logps/rejected": -1.497143268585205, "loss": 0.9998, "nll_loss": 0.9457993507385254, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09598378837108612, "rewards/margins": 0.05373052880167961, "rewards/rejected": -0.14971432089805603, "step": 6790 }, { "epoch": 1.23, "grad_norm": 1.0504904985427856, "learning_rate": 4.098413160576503e-06, "log_odds_chosen": 0.8111333847045898, "log_odds_ratio": -0.5414608120918274, "logits/chosen": -0.43067970871925354, "logits/rejected": -0.44196492433547974, "logps/chosen": -0.9440352320671082, "logps/rejected": -1.4980310201644897, "loss": 0.9046, "nll_loss": 0.8504649996757507, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09440352767705917, "rewards/margins": 0.0553995780646801, "rewards/rejected": -0.14980310201644897, "step": 6800 }, { "epoch": 1.23, "grad_norm": 1.0189682245254517, "learning_rate": 4.0925898966370655e-06, "log_odds_chosen": 1.1220974922180176, "log_odds_ratio": -0.48033151030540466, "logits/chosen": -0.44528502225875854, "logits/rejected": -0.512277364730835, "logps/chosen": -0.8466246724128723, "logps/rejected": -1.5640580654144287, "loss": 0.9424, "nll_loss": 0.8943877220153809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08466245979070663, "rewards/margins": 0.07174333184957504, "rewards/rejected": -0.15640580654144287, "step": 6810 }, { "epoch": 1.23, "grad_norm": 1.2099649906158447, "learning_rate": 4.086766632697626e-06, "log_odds_chosen": 0.9407919049263, "log_odds_ratio": -0.49303555488586426, "logits/chosen": -0.44150418043136597, "logits/rejected": -0.4331357479095459, "logps/chosen": -0.8563871383666992, "logps/rejected": -1.5000990629196167, "loss": 0.9265, "nll_loss": 0.8772442936897278, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08563872426748276, "rewards/margins": 0.06437118351459503, "rewards/rejected": -0.1500099152326584, "step": 6820 }, { "epoch": 1.23, "grad_norm": 2.6405341625213623, "learning_rate": 4.0809433687581885e-06, "log_odds_chosen": 0.6924955248832703, "log_odds_ratio": -0.5537286996841431, "logits/chosen": -0.5182186365127563, "logits/rejected": -0.532181441783905, "logps/chosen": -1.0400513410568237, "logps/rejected": -1.528926968574524, "loss": 1.0234, "nll_loss": 0.967991054058075, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10400513559579849, "rewards/margins": 0.04888755828142166, "rewards/rejected": -0.15289269387722015, "step": 6830 }, { "epoch": 1.24, "grad_norm": 1.5627719163894653, "learning_rate": 4.075120104818751e-06, "log_odds_chosen": 1.0674669742584229, "log_odds_ratio": -0.4889269471168518, "logits/chosen": -0.4403366148471832, "logits/rejected": -0.4267563223838806, "logps/chosen": -0.7794691324234009, "logps/rejected": -1.4749410152435303, "loss": 0.9037, "nll_loss": 0.8547900319099426, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07794691622257233, "rewards/margins": 0.06954719126224518, "rewards/rejected": -0.1474941074848175, "step": 6840 }, { "epoch": 1.24, "grad_norm": 0.8014816045761108, "learning_rate": 4.069296840879312e-06, "log_odds_chosen": 0.739176869392395, "log_odds_ratio": -0.5694806575775146, "logits/chosen": -0.44575244188308716, "logits/rejected": -0.4639630913734436, "logps/chosen": -0.9097639322280884, "logps/rejected": -1.4184764623641968, "loss": 0.9335, "nll_loss": 0.8765338659286499, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09097640216350555, "rewards/margins": 0.05087127164006233, "rewards/rejected": -0.1418476402759552, "step": 6850 }, { "epoch": 1.24, "grad_norm": 1.7838681936264038, "learning_rate": 4.0634735769398745e-06, "log_odds_chosen": 0.8185930252075195, "log_odds_ratio": -0.5115641355514526, "logits/chosen": -0.441885769367218, "logits/rejected": -0.4383307993412018, "logps/chosen": -0.9322429895401001, "logps/rejected": -1.4734580516815186, "loss": 0.9531, "nll_loss": 0.901964008808136, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09322428703308105, "rewards/margins": 0.05412151664495468, "rewards/rejected": -0.14734581112861633, "step": 6860 }, { "epoch": 1.24, "grad_norm": 1.6935601234436035, "learning_rate": 4.057650313000437e-06, "log_odds_chosen": 0.47807177901268005, "log_odds_ratio": -0.6113700866699219, "logits/chosen": -0.47867828607559204, "logits/rejected": -0.49842625856399536, "logps/chosen": -1.120103120803833, "logps/rejected": -1.4488173723220825, "loss": 1.0677, "nll_loss": 1.0065581798553467, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.11201032251119614, "rewards/margins": 0.03287142515182495, "rewards/rejected": -0.1448817402124405, "step": 6870 }, { "epoch": 1.24, "grad_norm": 0.7489156723022461, "learning_rate": 4.051827049060999e-06, "log_odds_chosen": 0.8212149739265442, "log_odds_ratio": -0.5638260841369629, "logits/chosen": -0.5162757635116577, "logits/rejected": -0.5030328035354614, "logps/chosen": -0.901445746421814, "logps/rejected": -1.3886324167251587, "loss": 0.9795, "nll_loss": 0.9231454133987427, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0901445671916008, "rewards/margins": 0.048718664795160294, "rewards/rejected": -0.1388632357120514, "step": 6880 }, { "epoch": 1.24, "grad_norm": 0.9027697443962097, "learning_rate": 4.046003785121561e-06, "log_odds_chosen": 0.8782781362533569, "log_odds_ratio": -0.5325084924697876, "logits/chosen": -0.4884958863258362, "logits/rejected": -0.5067233443260193, "logps/chosen": -0.9407421350479126, "logps/rejected": -1.5487650632858276, "loss": 1.0885, "nll_loss": 1.035233974456787, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09407420456409454, "rewards/margins": 0.06080232933163643, "rewards/rejected": -0.15487651526927948, "step": 6890 }, { "epoch": 1.25, "grad_norm": 1.6402422189712524, "learning_rate": 4.040180521182122e-06, "log_odds_chosen": 0.7644470930099487, "log_odds_ratio": -0.6131644248962402, "logits/chosen": -0.5099159479141235, "logits/rejected": -0.49645549058914185, "logps/chosen": -0.9875893592834473, "logps/rejected": -1.5628368854522705, "loss": 1.0243, "nll_loss": 0.9630203247070312, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09875893592834473, "rewards/margins": 0.05752474069595337, "rewards/rejected": -0.1562836766242981, "step": 6900 }, { "epoch": 1.25, "grad_norm": 1.100881814956665, "learning_rate": 4.034357257242684e-06, "log_odds_chosen": 0.5976766347885132, "log_odds_ratio": -0.6160573959350586, "logits/chosen": -0.46787476539611816, "logits/rejected": -0.48675742745399475, "logps/chosen": -0.9335827827453613, "logps/rejected": -1.3486616611480713, "loss": 1.0044, "nll_loss": 0.9428272247314453, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09335827827453613, "rewards/margins": 0.041507888585329056, "rewards/rejected": -0.1348661631345749, "step": 6910 }, { "epoch": 1.25, "grad_norm": 1.0779989957809448, "learning_rate": 4.028533993303246e-06, "log_odds_chosen": 1.032034158706665, "log_odds_ratio": -0.45201388001441956, "logits/chosen": -0.5129601359367371, "logits/rejected": -0.5174925923347473, "logps/chosen": -0.8675662279129028, "logps/rejected": -1.561211347579956, "loss": 0.8998, "nll_loss": 0.8546127080917358, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.0867566242814064, "rewards/margins": 0.0693645179271698, "rewards/rejected": -0.1561211496591568, "step": 6920 }, { "epoch": 1.25, "grad_norm": 1.5758169889450073, "learning_rate": 4.022710729363808e-06, "log_odds_chosen": 1.4302197694778442, "log_odds_ratio": -0.4029451310634613, "logits/chosen": -0.41934436559677124, "logits/rejected": -0.4693407416343689, "logps/chosen": -0.855545163154602, "logps/rejected": -1.8322776556015015, "loss": 0.9478, "nll_loss": 0.9074686169624329, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.08555451035499573, "rewards/margins": 0.09767324477434158, "rewards/rejected": -0.1832277774810791, "step": 6930 }, { "epoch": 1.25, "grad_norm": 1.2255765199661255, "learning_rate": 4.0168874654243705e-06, "log_odds_chosen": 1.0592358112335205, "log_odds_ratio": -0.48211097717285156, "logits/chosen": -0.46458035707473755, "logits/rejected": -0.4718129634857178, "logps/chosen": -0.8804963827133179, "logps/rejected": -1.60556161403656, "loss": 0.9907, "nll_loss": 0.9424525499343872, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08804963529109955, "rewards/margins": 0.07250651717185974, "rewards/rejected": -0.16055616736412048, "step": 6940 }, { "epoch": 1.26, "grad_norm": 1.0770710706710815, "learning_rate": 4.011064201484932e-06, "log_odds_chosen": 0.8747438192367554, "log_odds_ratio": -0.5370987057685852, "logits/chosen": -0.4886086583137512, "logits/rejected": -0.46743687987327576, "logps/chosen": -0.9328964352607727, "logps/rejected": -1.534489631652832, "loss": 0.958, "nll_loss": 0.904322624206543, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09328965097665787, "rewards/margins": 0.06015932559967041, "rewards/rejected": -0.15344896912574768, "step": 6950 }, { "epoch": 1.26, "grad_norm": 1.0228257179260254, "learning_rate": 4.005240937545494e-06, "log_odds_chosen": 0.9547672271728516, "log_odds_ratio": -0.5136852264404297, "logits/chosen": -0.4253026843070984, "logits/rejected": -0.44504547119140625, "logps/chosen": -0.8749715089797974, "logps/rejected": -1.517364263534546, "loss": 0.914, "nll_loss": 0.8625979423522949, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08749713748693466, "rewards/margins": 0.06423927843570709, "rewards/rejected": -0.15173643827438354, "step": 6960 }, { "epoch": 1.26, "grad_norm": 1.1351262331008911, "learning_rate": 3.999417673606056e-06, "log_odds_chosen": 0.8336387872695923, "log_odds_ratio": -0.5469530820846558, "logits/chosen": -0.4293319582939148, "logits/rejected": -0.47841334342956543, "logps/chosen": -0.9202069044113159, "logps/rejected": -1.4685187339782715, "loss": 0.9341, "nll_loss": 0.879433274269104, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.092020682990551, "rewards/margins": 0.05483119562268257, "rewards/rejected": -0.14685186743736267, "step": 6970 }, { "epoch": 1.26, "grad_norm": 1.3760859966278076, "learning_rate": 3.993594409666618e-06, "log_odds_chosen": 0.973896324634552, "log_odds_ratio": -0.5526650547981262, "logits/chosen": -0.45984959602355957, "logits/rejected": -0.48349887132644653, "logps/chosen": -0.9893393516540527, "logps/rejected": -1.6574366092681885, "loss": 1.0807, "nll_loss": 1.0254210233688354, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09893393516540527, "rewards/margins": 0.06680972129106522, "rewards/rejected": -0.1657436639070511, "step": 6980 }, { "epoch": 1.26, "grad_norm": 0.7266592979431152, "learning_rate": 3.9877711457271795e-06, "log_odds_chosen": 0.8963996171951294, "log_odds_ratio": -0.48495644330978394, "logits/chosen": -0.3935561776161194, "logits/rejected": -0.43216556310653687, "logps/chosen": -0.8080177307128906, "logps/rejected": -1.3844796419143677, "loss": 0.8988, "nll_loss": 0.8502942323684692, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08080177009105682, "rewards/margins": 0.057646192610263824, "rewards/rejected": -0.13844797015190125, "step": 6990 }, { "epoch": 1.26, "grad_norm": 1.136324405670166, "learning_rate": 3.981947881787742e-06, "log_odds_chosen": 1.0977365970611572, "log_odds_ratio": -0.5075836777687073, "logits/chosen": -0.35787349939346313, "logits/rejected": -0.43332797288894653, "logps/chosen": -0.8275062441825867, "logps/rejected": -1.5597859621047974, "loss": 0.8396, "nll_loss": 0.7887982130050659, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08275063335895538, "rewards/margins": 0.07322796434164047, "rewards/rejected": -0.15597859025001526, "step": 7000 }, { "epoch": 1.27, "grad_norm": 1.6420483589172363, "learning_rate": 3.976124617848304e-06, "log_odds_chosen": 0.7088609933853149, "log_odds_ratio": -0.5549465417861938, "logits/chosen": -0.4889621138572693, "logits/rejected": -0.5120013356208801, "logps/chosen": -0.9765472412109375, "logps/rejected": -1.474671483039856, "loss": 0.9782, "nll_loss": 0.9226747751235962, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09765471518039703, "rewards/margins": 0.04981241747736931, "rewards/rejected": -0.14746715128421783, "step": 7010 }, { "epoch": 1.27, "grad_norm": 1.3024640083312988, "learning_rate": 3.9703013539088656e-06, "log_odds_chosen": 0.8062094449996948, "log_odds_ratio": -0.5428799986839294, "logits/chosen": -0.45093774795532227, "logits/rejected": -0.47385063767433167, "logps/chosen": -0.9071990847587585, "logps/rejected": -1.4861849546432495, "loss": 0.9166, "nll_loss": 0.8622728586196899, "rewards/accuracies": 0.75, "rewards/chosen": -0.09071992337703705, "rewards/margins": 0.05789857357740402, "rewards/rejected": -0.14861848950386047, "step": 7020 }, { "epoch": 1.27, "grad_norm": 1.2627145051956177, "learning_rate": 3.964478089969428e-06, "log_odds_chosen": 0.9427957534790039, "log_odds_ratio": -0.4958980977535248, "logits/chosen": -0.4174041152000427, "logits/rejected": -0.47076454758644104, "logps/chosen": -0.8594261407852173, "logps/rejected": -1.5148569345474243, "loss": 0.9342, "nll_loss": 0.8845945596694946, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08594261854887009, "rewards/margins": 0.06554307788610458, "rewards/rejected": -0.15148569643497467, "step": 7030 }, { "epoch": 1.27, "grad_norm": 1.116181492805481, "learning_rate": 3.958654826029989e-06, "log_odds_chosen": 1.0329385995864868, "log_odds_ratio": -0.5539559721946716, "logits/chosen": -0.4265444874763489, "logits/rejected": -0.45913130044937134, "logps/chosen": -0.8591364622116089, "logps/rejected": -1.6048072576522827, "loss": 0.9528, "nll_loss": 0.8973916172981262, "rewards/accuracies": 0.625, "rewards/chosen": -0.08591364324092865, "rewards/margins": 0.07456707954406738, "rewards/rejected": -0.16048072278499603, "step": 7040 }, { "epoch": 1.27, "grad_norm": 0.9981061816215515, "learning_rate": 3.952831562090552e-06, "log_odds_chosen": 1.2240138053894043, "log_odds_ratio": -0.4347744584083557, "logits/chosen": -0.41130954027175903, "logits/rejected": -0.48269692063331604, "logps/chosen": -0.7971197366714478, "logps/rejected": -1.584804892539978, "loss": 0.8706, "nll_loss": 0.827093243598938, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07971197366714478, "rewards/margins": 0.0787685215473175, "rewards/rejected": -0.15848049521446228, "step": 7050 }, { "epoch": 1.28, "grad_norm": 1.3474347591400146, "learning_rate": 3.947008298151114e-06, "log_odds_chosen": 0.9372537732124329, "log_odds_ratio": -0.4520055651664734, "logits/chosen": -0.43001455068588257, "logits/rejected": -0.4487704336643219, "logps/chosen": -0.853164792060852, "logps/rejected": -1.4767754077911377, "loss": 0.9032, "nll_loss": 0.8579923510551453, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.0853164792060852, "rewards/margins": 0.062361061573028564, "rewards/rejected": -0.14767754077911377, "step": 7060 }, { "epoch": 1.28, "grad_norm": 1.6226599216461182, "learning_rate": 3.941185034211675e-06, "log_odds_chosen": 0.7453482151031494, "log_odds_ratio": -0.5842022895812988, "logits/chosen": -0.49188828468322754, "logits/rejected": -0.4837633967399597, "logps/chosen": -0.9727983474731445, "logps/rejected": -1.4930089712142944, "loss": 1.0248, "nll_loss": 0.9663785696029663, "rewards/accuracies": 0.625, "rewards/chosen": -0.09727983176708221, "rewards/margins": 0.05202106386423111, "rewards/rejected": -0.14930090308189392, "step": 7070 }, { "epoch": 1.28, "grad_norm": 1.3624190092086792, "learning_rate": 3.935361770272237e-06, "log_odds_chosen": 0.7438120245933533, "log_odds_ratio": -0.610444188117981, "logits/chosen": -0.43259668350219727, "logits/rejected": -0.48162850737571716, "logps/chosen": -0.9150593876838684, "logps/rejected": -1.4560168981552124, "loss": 0.9653, "nll_loss": 0.9042099118232727, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09150593727827072, "rewards/margins": 0.05409575253725052, "rewards/rejected": -0.14560168981552124, "step": 7080 }, { "epoch": 1.28, "grad_norm": 2.6767237186431885, "learning_rate": 3.929538506332799e-06, "log_odds_chosen": 0.8162752389907837, "log_odds_ratio": -0.5413010716438293, "logits/chosen": -0.4502388834953308, "logits/rejected": -0.49949997663497925, "logps/chosen": -0.9828441739082336, "logps/rejected": -1.5386993885040283, "loss": 0.9733, "nll_loss": 0.9192169904708862, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09828442335128784, "rewards/margins": 0.05558552220463753, "rewards/rejected": -0.15386994183063507, "step": 7090 }, { "epoch": 1.28, "grad_norm": 2.3011538982391357, "learning_rate": 3.9237152423933615e-06, "log_odds_chosen": 0.6587690114974976, "log_odds_ratio": -0.5885307788848877, "logits/chosen": -0.5728673338890076, "logits/rejected": -0.5504066348075867, "logps/chosen": -1.0062379837036133, "logps/rejected": -1.490363597869873, "loss": 1.0529, "nll_loss": 0.9940397143363953, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10062380135059357, "rewards/margins": 0.04841255396604538, "rewards/rejected": -0.14903636276721954, "step": 7100 }, { "epoch": 1.28, "grad_norm": 1.0612140893936157, "learning_rate": 3.917891978453923e-06, "log_odds_chosen": 0.6910644769668579, "log_odds_ratio": -0.5820239782333374, "logits/chosen": -0.43986526131629944, "logits/rejected": -0.47137826681137085, "logps/chosen": -0.9189141988754272, "logps/rejected": -1.391150951385498, "loss": 0.9652, "nll_loss": 0.9069743156433105, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09189142286777496, "rewards/margins": 0.04722367227077484, "rewards/rejected": -0.1391150951385498, "step": 7110 }, { "epoch": 1.29, "grad_norm": 1.8805290460586548, "learning_rate": 3.912068714514485e-06, "log_odds_chosen": 0.646868109703064, "log_odds_ratio": -0.6137688159942627, "logits/chosen": -0.45590490102767944, "logits/rejected": -0.469409316778183, "logps/chosen": -0.9701493978500366, "logps/rejected": -1.424383521080017, "loss": 1.0222, "nll_loss": 0.9608350992202759, "rewards/accuracies": 0.625, "rewards/chosen": -0.09701494127511978, "rewards/margins": 0.04542340710759163, "rewards/rejected": -0.1424383670091629, "step": 7120 }, { "epoch": 1.29, "grad_norm": 1.5245620012283325, "learning_rate": 3.906245450575047e-06, "log_odds_chosen": 1.120469331741333, "log_odds_ratio": -0.47017669677734375, "logits/chosen": -0.44969311356544495, "logits/rejected": -0.45316869020462036, "logps/chosen": -0.8377715349197388, "logps/rejected": -1.6501190662384033, "loss": 0.9008, "nll_loss": 0.8537575006484985, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08377715200185776, "rewards/margins": 0.08123474568128586, "rewards/rejected": -0.16501189768314362, "step": 7130 }, { "epoch": 1.29, "grad_norm": 1.402364730834961, "learning_rate": 3.900422186635609e-06, "log_odds_chosen": 0.48328810930252075, "log_odds_ratio": -0.6531627774238586, "logits/chosen": -0.4681634306907654, "logits/rejected": -0.5032951235771179, "logps/chosen": -0.9557656049728394, "logps/rejected": -1.3002371788024902, "loss": 1.0143, "nll_loss": 0.9489529728889465, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09557655453681946, "rewards/margins": 0.034447163343429565, "rewards/rejected": -0.13002371788024902, "step": 7140 }, { "epoch": 1.29, "grad_norm": 0.9971950650215149, "learning_rate": 3.894598922696171e-06, "log_odds_chosen": 0.8737391233444214, "log_odds_ratio": -0.5511624217033386, "logits/chosen": -0.418578565120697, "logits/rejected": -0.45996397733688354, "logps/chosen": -0.8235437273979187, "logps/rejected": -1.4508552551269531, "loss": 0.9171, "nll_loss": 0.8619489669799805, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08235438168048859, "rewards/margins": 0.06273116171360016, "rewards/rejected": -0.14508552849292755, "step": 7150 }, { "epoch": 1.29, "grad_norm": 1.293363332748413, "learning_rate": 3.888775658756733e-06, "log_odds_chosen": 0.6994448900222778, "log_odds_ratio": -0.5278793573379517, "logits/chosen": -0.46094202995300293, "logits/rejected": -0.5015737414360046, "logps/chosen": -0.8974512815475464, "logps/rejected": -1.3594824075698853, "loss": 0.9091, "nll_loss": 0.8563462495803833, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08974512666463852, "rewards/margins": 0.046203114092350006, "rewards/rejected": -0.13594824075698853, "step": 7160 }, { "epoch": 1.3, "grad_norm": 1.8436368703842163, "learning_rate": 3.882952394817295e-06, "log_odds_chosen": 0.8397830724716187, "log_odds_ratio": -0.5210739374160767, "logits/chosen": -0.475078284740448, "logits/rejected": -0.4907829761505127, "logps/chosen": -0.9214905500411987, "logps/rejected": -1.4428390264511108, "loss": 0.9722, "nll_loss": 0.9201291799545288, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09214906394481659, "rewards/margins": 0.05213485285639763, "rewards/rejected": -0.14428392052650452, "step": 7170 }, { "epoch": 1.3, "grad_norm": 1.1829735040664673, "learning_rate": 3.877129130877857e-06, "log_odds_chosen": 0.6583563089370728, "log_odds_ratio": -0.5702995657920837, "logits/chosen": -0.5196170210838318, "logits/rejected": -0.5385341644287109, "logps/chosen": -0.9533084034919739, "logps/rejected": -1.4245009422302246, "loss": 0.9988, "nll_loss": 0.9417839050292969, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09533083438873291, "rewards/margins": 0.04711926728487015, "rewards/rejected": -0.14245007932186127, "step": 7180 }, { "epoch": 1.3, "grad_norm": 4.115557670593262, "learning_rate": 3.871305866938419e-06, "log_odds_chosen": 0.8950411677360535, "log_odds_ratio": -0.5178020000457764, "logits/chosen": -0.5039768815040588, "logits/rejected": -0.5022481083869934, "logps/chosen": -0.9849491119384766, "logps/rejected": -1.6610597372055054, "loss": 1.0442, "nll_loss": 0.9924230575561523, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09849490970373154, "rewards/margins": 0.06761106103658676, "rewards/rejected": -0.1661059856414795, "step": 7190 }, { "epoch": 1.3, "grad_norm": 1.6129990816116333, "learning_rate": 3.865482602998981e-06, "log_odds_chosen": 0.977696418762207, "log_odds_ratio": -0.5619850158691406, "logits/chosen": -0.48574066162109375, "logits/rejected": -0.5287076234817505, "logps/chosen": -0.8981271982192993, "logps/rejected": -1.5921859741210938, "loss": 0.9912, "nll_loss": 0.9349862933158875, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08981271088123322, "rewards/margins": 0.06940589845180511, "rewards/rejected": -0.15921860933303833, "step": 7200 }, { "epoch": 1.3, "grad_norm": 0.7514549493789673, "learning_rate": 3.859659339059543e-06, "log_odds_chosen": 1.0510715246200562, "log_odds_ratio": -0.4970950186252594, "logits/chosen": -0.45134028792381287, "logits/rejected": -0.48873621225357056, "logps/chosen": -0.8820649981498718, "logps/rejected": -1.5363123416900635, "loss": 0.9205, "nll_loss": 0.8707484006881714, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08820649981498718, "rewards/margins": 0.06542472541332245, "rewards/rejected": -0.15363122522830963, "step": 7210 }, { "epoch": 1.3, "grad_norm": 1.2355408668518066, "learning_rate": 3.853836075120104e-06, "log_odds_chosen": 0.9166771769523621, "log_odds_ratio": -0.5141069889068604, "logits/chosen": -0.49904727935791016, "logits/rejected": -0.5441471934318542, "logps/chosen": -0.9830909967422485, "logps/rejected": -1.62188720703125, "loss": 1.0317, "nll_loss": 0.9803188443183899, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09830910712480545, "rewards/margins": 0.06387962400913239, "rewards/rejected": -0.16218872368335724, "step": 7220 }, { "epoch": 1.31, "grad_norm": 2.117945909500122, "learning_rate": 3.8480128111806664e-06, "log_odds_chosen": 0.984078049659729, "log_odds_ratio": -0.44726839661598206, "logits/chosen": -0.4131518006324768, "logits/rejected": -0.4934825003147125, "logps/chosen": -0.9141210317611694, "logps/rejected": -1.603629469871521, "loss": 0.9391, "nll_loss": 0.8943251371383667, "rewards/accuracies": 0.8125, "rewards/chosen": -0.09141210466623306, "rewards/margins": 0.06895085424184799, "rewards/rejected": -0.16036295890808105, "step": 7230 }, { "epoch": 1.31, "grad_norm": 1.4795246124267578, "learning_rate": 3.842189547241229e-06, "log_odds_chosen": 1.179312825202942, "log_odds_ratio": -0.4878261983394623, "logits/chosen": -0.415322482585907, "logits/rejected": -0.4562760889530182, "logps/chosen": -0.8761876821517944, "logps/rejected": -1.7031415700912476, "loss": 0.8902, "nll_loss": 0.8413738012313843, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08761876821517944, "rewards/margins": 0.08269539475440979, "rewards/rejected": -0.17031416296958923, "step": 7240 }, { "epoch": 1.31, "grad_norm": 1.564605474472046, "learning_rate": 3.836366283301791e-06, "log_odds_chosen": 0.7795203924179077, "log_odds_ratio": -0.5619980096817017, "logits/chosen": -0.4421234726905823, "logits/rejected": -0.4827519357204437, "logps/chosen": -0.926150918006897, "logps/rejected": -1.481336236000061, "loss": 0.9577, "nll_loss": 0.901451587677002, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09261508285999298, "rewards/margins": 0.05551854521036148, "rewards/rejected": -0.14813363552093506, "step": 7250 }, { "epoch": 1.31, "grad_norm": 1.023547887802124, "learning_rate": 3.8305430193623525e-06, "log_odds_chosen": 0.9588610529899597, "log_odds_ratio": -0.4990147650241852, "logits/chosen": -0.48034173250198364, "logits/rejected": -0.5247659683227539, "logps/chosen": -0.901258647441864, "logps/rejected": -1.540440320968628, "loss": 0.9929, "nll_loss": 0.9430160522460938, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09012585878372192, "rewards/margins": 0.06391817331314087, "rewards/rejected": -0.1540440171957016, "step": 7260 }, { "epoch": 1.31, "grad_norm": 0.862139105796814, "learning_rate": 3.824719755422914e-06, "log_odds_chosen": 0.7220529317855835, "log_odds_ratio": -0.5099185109138489, "logits/chosen": -0.4641505181789398, "logits/rejected": -0.44089174270629883, "logps/chosen": -0.9471620321273804, "logps/rejected": -1.4334056377410889, "loss": 0.8787, "nll_loss": 0.8277288675308228, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09471620619297028, "rewards/margins": 0.04862435534596443, "rewards/rejected": -0.1433405578136444, "step": 7270 }, { "epoch": 1.32, "grad_norm": 1.5787487030029297, "learning_rate": 3.818896491483476e-06, "log_odds_chosen": 1.0601729154586792, "log_odds_ratio": -0.4927380681037903, "logits/chosen": -0.444486141204834, "logits/rejected": -0.43789857625961304, "logps/chosen": -0.9262102842330933, "logps/rejected": -1.684975028038025, "loss": 0.9764, "nll_loss": 0.9271078109741211, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09262104332447052, "rewards/margins": 0.07587646692991257, "rewards/rejected": -0.1684975028038025, "step": 7280 }, { "epoch": 1.32, "grad_norm": 1.860785722732544, "learning_rate": 3.813073227544038e-06, "log_odds_chosen": 0.9285827875137329, "log_odds_ratio": -0.5077587962150574, "logits/chosen": -0.40383124351501465, "logits/rejected": -0.4806482195854187, "logps/chosen": -0.946780800819397, "logps/rejected": -1.6201133728027344, "loss": 0.949, "nll_loss": 0.8982681035995483, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09467808157205582, "rewards/margins": 0.06733326613903046, "rewards/rejected": -0.16201135516166687, "step": 7290 }, { "epoch": 1.32, "grad_norm": 4.042755603790283, "learning_rate": 3.8072499636046e-06, "log_odds_chosen": 0.7702414393424988, "log_odds_ratio": -0.5221496820449829, "logits/chosen": -0.4408508241176605, "logits/rejected": -0.4835754930973053, "logps/chosen": -0.860028088092804, "logps/rejected": -1.3842127323150635, "loss": 0.9616, "nll_loss": 0.9093489646911621, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08600281178951263, "rewards/margins": 0.05241847038269043, "rewards/rejected": -0.13842126727104187, "step": 7300 }, { "epoch": 1.32, "grad_norm": 1.9792332649230957, "learning_rate": 3.8014266996651624e-06, "log_odds_chosen": 1.1855796575546265, "log_odds_ratio": -0.4438776969909668, "logits/chosen": -0.3634795546531677, "logits/rejected": -0.44674357771873474, "logps/chosen": -0.7828920483589172, "logps/rejected": -1.5813146829605103, "loss": 0.8686, "nll_loss": 0.8241745829582214, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.0782892033457756, "rewards/margins": 0.07984226942062378, "rewards/rejected": -0.1581314504146576, "step": 7310 }, { "epoch": 1.32, "grad_norm": 1.8787983655929565, "learning_rate": 3.795603435725724e-06, "log_odds_chosen": 0.9159332513809204, "log_odds_ratio": -0.5088370442390442, "logits/chosen": -0.4633324146270752, "logits/rejected": -0.49445921182632446, "logps/chosen": -0.9663828015327454, "logps/rejected": -1.586145043373108, "loss": 0.9774, "nll_loss": 0.9265311360359192, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0966382771730423, "rewards/margins": 0.06197623163461685, "rewards/rejected": -0.15861448645591736, "step": 7320 }, { "epoch": 1.32, "grad_norm": 1.74463951587677, "learning_rate": 3.7897801717862857e-06, "log_odds_chosen": 0.9995774030685425, "log_odds_ratio": -0.5128060579299927, "logits/chosen": -0.43940553069114685, "logits/rejected": -0.4527904987335205, "logps/chosen": -0.899426281452179, "logps/rejected": -1.5987919569015503, "loss": 0.9602, "nll_loss": 0.9089180827140808, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08994264900684357, "rewards/margins": 0.06993655860424042, "rewards/rejected": -0.15987920761108398, "step": 7330 }, { "epoch": 1.33, "grad_norm": 1.7805492877960205, "learning_rate": 3.783956907846848e-06, "log_odds_chosen": 1.0778847932815552, "log_odds_ratio": -0.4922080636024475, "logits/chosen": -0.4460074305534363, "logits/rejected": -0.48577815294265747, "logps/chosen": -0.8337495923042297, "logps/rejected": -1.5919334888458252, "loss": 1.0109, "nll_loss": 0.9617268443107605, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08337496221065521, "rewards/margins": 0.07581837475299835, "rewards/rejected": -0.15919332206249237, "step": 7340 }, { "epoch": 1.33, "grad_norm": 1.7505688667297363, "learning_rate": 3.77813364390741e-06, "log_odds_chosen": 0.6708268523216248, "log_odds_ratio": -0.6100478768348694, "logits/chosen": -0.4991052746772766, "logits/rejected": -0.5121651887893677, "logps/chosen": -1.0308465957641602, "logps/rejected": -1.5460246801376343, "loss": 1.0535, "nll_loss": 0.9924944043159485, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10308466851711273, "rewards/margins": 0.05151781439781189, "rewards/rejected": -0.15460246801376343, "step": 7350 }, { "epoch": 1.33, "grad_norm": 1.5388883352279663, "learning_rate": 3.7723103799679722e-06, "log_odds_chosen": 0.7845932841300964, "log_odds_ratio": -0.5693210363388062, "logits/chosen": -0.4648071825504303, "logits/rejected": -0.47867363691329956, "logps/chosen": -0.9348169565200806, "logps/rejected": -1.4865964651107788, "loss": 0.9791, "nll_loss": 0.9221285581588745, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09348170459270477, "rewards/margins": 0.055177945643663406, "rewards/rejected": -0.14865966141223907, "step": 7360 }, { "epoch": 1.33, "grad_norm": 1.6391617059707642, "learning_rate": 3.7664871160285337e-06, "log_odds_chosen": 0.9377862215042114, "log_odds_ratio": -0.5311064124107361, "logits/chosen": -0.4713362753391266, "logits/rejected": -0.48883286118507385, "logps/chosen": -0.890163242816925, "logps/rejected": -1.537257194519043, "loss": 0.9828, "nll_loss": 0.9296792149543762, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08901633322238922, "rewards/margins": 0.0647093877196312, "rewards/rejected": -0.15372571349143982, "step": 7370 }, { "epoch": 1.33, "grad_norm": 1.7904300689697266, "learning_rate": 3.7606638520890956e-06, "log_odds_chosen": 0.9551296234130859, "log_odds_ratio": -0.5431476831436157, "logits/chosen": -0.47295600175857544, "logits/rejected": -0.475273996591568, "logps/chosen": -0.9500619173049927, "logps/rejected": -1.6278291940689087, "loss": 0.9819, "nll_loss": 0.9276124238967896, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09500618278980255, "rewards/margins": 0.06777672469615936, "rewards/rejected": -0.16278290748596191, "step": 7380 }, { "epoch": 1.33, "grad_norm": 1.3705064058303833, "learning_rate": 3.754840588149658e-06, "log_odds_chosen": 0.8254842758178711, "log_odds_ratio": -0.5407508015632629, "logits/chosen": -0.4489668011665344, "logits/rejected": -0.4651058614253998, "logps/chosen": -1.0021662712097168, "logps/rejected": -1.5822681188583374, "loss": 0.9814, "nll_loss": 0.9273598790168762, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10021662712097168, "rewards/margins": 0.05801018327474594, "rewards/rejected": -0.15822681784629822, "step": 7390 }, { "epoch": 1.34, "grad_norm": 1.5565133094787598, "learning_rate": 3.7490173242102198e-06, "log_odds_chosen": 0.780811071395874, "log_odds_ratio": -0.5810168385505676, "logits/chosen": -0.4458600580692291, "logits/rejected": -0.47487956285476685, "logps/chosen": -0.9416858553886414, "logps/rejected": -1.492948293685913, "loss": 0.9911, "nll_loss": 0.9330151677131653, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09416858851909637, "rewards/margins": 0.055126238614320755, "rewards/rejected": -0.14929482340812683, "step": 7400 }, { "epoch": 1.34, "grad_norm": 1.3818910121917725, "learning_rate": 3.7431940602707812e-06, "log_odds_chosen": 1.0065568685531616, "log_odds_ratio": -0.5279130935668945, "logits/chosen": -0.44704675674438477, "logits/rejected": -0.45673441886901855, "logps/chosen": -0.8805797696113586, "logps/rejected": -1.5971896648406982, "loss": 0.9482, "nll_loss": 0.8954331278800964, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0880579799413681, "rewards/margins": 0.07166098058223724, "rewards/rejected": -0.15971896052360535, "step": 7410 }, { "epoch": 1.34, "grad_norm": 1.7497045993804932, "learning_rate": 3.7373707963313436e-06, "log_odds_chosen": 0.8069828152656555, "log_odds_ratio": -0.5470659136772156, "logits/chosen": -0.41347962617874146, "logits/rejected": -0.42065295577049255, "logps/chosen": -0.8937679529190063, "logps/rejected": -1.451737403869629, "loss": 0.9324, "nll_loss": 0.8776780962944031, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0893767923116684, "rewards/margins": 0.05579695850610733, "rewards/rejected": -0.14517375826835632, "step": 7420 }, { "epoch": 1.34, "grad_norm": 2.3993964195251465, "learning_rate": 3.7315475323919054e-06, "log_odds_chosen": 0.6908426880836487, "log_odds_ratio": -0.6134909391403198, "logits/chosen": -0.4729135036468506, "logits/rejected": -0.4644432067871094, "logps/chosen": -1.006283164024353, "logps/rejected": -1.5135084390640259, "loss": 1.0261, "nll_loss": 0.9647462964057922, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1006283164024353, "rewards/margins": 0.050722528249025345, "rewards/rejected": -0.15135084092617035, "step": 7430 }, { "epoch": 1.34, "grad_norm": 2.989941358566284, "learning_rate": 3.7257242684524673e-06, "log_odds_chosen": 1.004838228225708, "log_odds_ratio": -0.4893857538700104, "logits/chosen": -0.47101831436157227, "logits/rejected": -0.47765296697616577, "logps/chosen": -0.8820838928222656, "logps/rejected": -1.536239504814148, "loss": 0.8653, "nll_loss": 0.8163647651672363, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0882083922624588, "rewards/margins": 0.06541556119918823, "rewards/rejected": -0.15362393856048584, "step": 7440 }, { "epoch": 1.35, "grad_norm": 1.9193861484527588, "learning_rate": 3.7199010045130296e-06, "log_odds_chosen": 0.8586718440055847, "log_odds_ratio": -0.5084580779075623, "logits/chosen": -0.4453280568122864, "logits/rejected": -0.45878106355667114, "logps/chosen": -0.8870725631713867, "logps/rejected": -1.4322541952133179, "loss": 0.9746, "nll_loss": 0.9237484931945801, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08870726078748703, "rewards/margins": 0.05451815202832222, "rewards/rejected": -0.14322540163993835, "step": 7450 }, { "epoch": 1.35, "grad_norm": 1.2012253999710083, "learning_rate": 3.714077740573591e-06, "log_odds_chosen": 0.8122612237930298, "log_odds_ratio": -0.5620238780975342, "logits/chosen": -0.43501463532447815, "logits/rejected": -0.45105236768722534, "logps/chosen": -0.9191938638687134, "logps/rejected": -1.440507173538208, "loss": 0.9885, "nll_loss": 0.9323150515556335, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09191938489675522, "rewards/margins": 0.05213134363293648, "rewards/rejected": -0.144050732254982, "step": 7460 }, { "epoch": 1.35, "grad_norm": 1.6287994384765625, "learning_rate": 3.7082544766341534e-06, "log_odds_chosen": 0.8829139471054077, "log_odds_ratio": -0.5368236899375916, "logits/chosen": -0.46378859877586365, "logits/rejected": -0.47032395005226135, "logps/chosen": -1.0091739892959595, "logps/rejected": -1.637733817100525, "loss": 0.9935, "nll_loss": 0.9397771954536438, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10091741383075714, "rewards/margins": 0.06285597383975983, "rewards/rejected": -0.16377338767051697, "step": 7470 }, { "epoch": 1.35, "grad_norm": 1.6688724756240845, "learning_rate": 3.7024312126947153e-06, "log_odds_chosen": 0.9815562963485718, "log_odds_ratio": -0.4945163130760193, "logits/chosen": -0.4513029158115387, "logits/rejected": -0.45173224806785583, "logps/chosen": -0.9311221241950989, "logps/rejected": -1.545288324356079, "loss": 0.9696, "nll_loss": 0.9201422929763794, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09311220794916153, "rewards/margins": 0.0614166185259819, "rewards/rejected": -0.15452882647514343, "step": 7480 }, { "epoch": 1.35, "grad_norm": 1.660881519317627, "learning_rate": 3.696607948755277e-06, "log_odds_chosen": 1.0118606090545654, "log_odds_ratio": -0.4933605194091797, "logits/chosen": -0.4922306537628174, "logits/rejected": -0.46402543783187866, "logps/chosen": -0.8814403414726257, "logps/rejected": -1.5738918781280518, "loss": 0.8907, "nll_loss": 0.841367244720459, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08814402669668198, "rewards/margins": 0.06924516707658768, "rewards/rejected": -0.15738919377326965, "step": 7490 }, { "epoch": 1.35, "grad_norm": 0.7219038605690002, "learning_rate": 3.690784684815839e-06, "log_odds_chosen": 0.5756661891937256, "log_odds_ratio": -0.6396831274032593, "logits/chosen": -0.4645994305610657, "logits/rejected": -0.48954564332962036, "logps/chosen": -0.9166939854621887, "logps/rejected": -1.3150080442428589, "loss": 0.951, "nll_loss": 0.887010931968689, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09166939556598663, "rewards/margins": 0.03983139619231224, "rewards/rejected": -0.13150081038475037, "step": 7500 }, { "epoch": 1.36, "grad_norm": 1.2778137922286987, "learning_rate": 3.684961420876401e-06, "log_odds_chosen": 0.9894605875015259, "log_odds_ratio": -0.5202837586402893, "logits/chosen": -0.4389236569404602, "logits/rejected": -0.4709576964378357, "logps/chosen": -0.8941949009895325, "logps/rejected": -1.6198762655258179, "loss": 0.9531, "nll_loss": 0.9010677337646484, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08941948413848877, "rewards/margins": 0.0725681334733963, "rewards/rejected": -0.16198763251304626, "step": 7510 }, { "epoch": 1.36, "grad_norm": 2.6866061687469482, "learning_rate": 3.679138156936963e-06, "log_odds_chosen": 0.891405463218689, "log_odds_ratio": -0.5774060487747192, "logits/chosen": -0.4057556986808777, "logits/rejected": -0.4247608184814453, "logps/chosen": -0.9688380360603333, "logps/rejected": -1.5794672966003418, "loss": 0.9128, "nll_loss": 0.8550981283187866, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09688380360603333, "rewards/margins": 0.06106293946504593, "rewards/rejected": -0.15794673562049866, "step": 7520 }, { "epoch": 1.36, "grad_norm": 0.8768310546875, "learning_rate": 3.673314892997525e-06, "log_odds_chosen": 0.8042165637016296, "log_odds_ratio": -0.5780975818634033, "logits/chosen": -0.4777071475982666, "logits/rejected": -0.4684552550315857, "logps/chosen": -0.9875958561897278, "logps/rejected": -1.5576165914535522, "loss": 0.9796, "nll_loss": 0.9217915534973145, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09875957667827606, "rewards/margins": 0.057002078741788864, "rewards/rejected": -0.15576167404651642, "step": 7530 }, { "epoch": 1.36, "grad_norm": 0.6389343738555908, "learning_rate": 3.667491629058087e-06, "log_odds_chosen": 1.0354812145233154, "log_odds_ratio": -0.518434464931488, "logits/chosen": -0.45959019660949707, "logits/rejected": -0.4531663954257965, "logps/chosen": -0.8949222564697266, "logps/rejected": -1.6369860172271729, "loss": 0.917, "nll_loss": 0.8651984930038452, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08949221670627594, "rewards/margins": 0.07420636713504791, "rewards/rejected": -0.16369858384132385, "step": 7540 }, { "epoch": 1.36, "grad_norm": 0.9316779971122742, "learning_rate": 3.6616683651186485e-06, "log_odds_chosen": 0.6489259600639343, "log_odds_ratio": -0.6223701238632202, "logits/chosen": -0.4836137890815735, "logits/rejected": -0.4736199975013733, "logps/chosen": -0.9614918828010559, "logps/rejected": -1.3946425914764404, "loss": 1.0313, "nll_loss": 0.9690502285957336, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09614919126033783, "rewards/margins": 0.04331507161259651, "rewards/rejected": -0.13946424424648285, "step": 7550 }, { "epoch": 1.37, "grad_norm": 1.4206702709197998, "learning_rate": 3.655845101179211e-06, "log_odds_chosen": 1.0469117164611816, "log_odds_ratio": -0.5183277726173401, "logits/chosen": -0.4339308738708496, "logits/rejected": -0.433371365070343, "logps/chosen": -0.9971551895141602, "logps/rejected": -1.678015112876892, "loss": 1.036, "nll_loss": 0.9841548204421997, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09971550107002258, "rewards/margins": 0.06808601319789886, "rewards/rejected": -0.16780151426792145, "step": 7560 }, { "epoch": 1.37, "grad_norm": 1.2541790008544922, "learning_rate": 3.6500218372397727e-06, "log_odds_chosen": 0.9297458529472351, "log_odds_ratio": -0.5026808977127075, "logits/chosen": -0.407135009765625, "logits/rejected": -0.4344724118709564, "logps/chosen": -0.8167446851730347, "logps/rejected": -1.452601671218872, "loss": 0.8584, "nll_loss": 0.8081638216972351, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08167446404695511, "rewards/margins": 0.06358569860458374, "rewards/rejected": -0.14526017010211945, "step": 7570 }, { "epoch": 1.37, "grad_norm": 0.9388701915740967, "learning_rate": 3.644198573300335e-06, "log_odds_chosen": 0.7810580730438232, "log_odds_ratio": -0.5430953502655029, "logits/chosen": -0.5032345652580261, "logits/rejected": -0.4799131453037262, "logps/chosen": -0.8443433046340942, "logps/rejected": -1.3685932159423828, "loss": 0.9451, "nll_loss": 0.8908378481864929, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08443433791399002, "rewards/margins": 0.05242498964071274, "rewards/rejected": -0.13685932755470276, "step": 7580 }, { "epoch": 1.37, "grad_norm": 2.0603744983673096, "learning_rate": 3.6383753093608965e-06, "log_odds_chosen": 0.9048040509223938, "log_odds_ratio": -0.5384871959686279, "logits/chosen": -0.46712374687194824, "logits/rejected": -0.46890658140182495, "logps/chosen": -0.8679774403572083, "logps/rejected": -1.3940092325210571, "loss": 0.9477, "nll_loss": 0.8938709497451782, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08679774403572083, "rewards/margins": 0.05260317772626877, "rewards/rejected": -0.1394009292125702, "step": 7590 }, { "epoch": 1.37, "grad_norm": 1.5286774635314941, "learning_rate": 3.6325520454214584e-06, "log_odds_chosen": 1.362618088722229, "log_odds_ratio": -0.4447413384914398, "logits/chosen": -0.45346957445144653, "logits/rejected": -0.4838164448738098, "logps/chosen": -0.9041000604629517, "logps/rejected": -1.8852291107177734, "loss": 0.8869, "nll_loss": 0.842419445514679, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09040998667478561, "rewards/margins": 0.09811293333768845, "rewards/rejected": -0.18852293491363525, "step": 7600 }, { "epoch": 1.37, "grad_norm": 0.9117116332054138, "learning_rate": 3.6267287814820207e-06, "log_odds_chosen": 0.7849031686782837, "log_odds_ratio": -0.5902391076087952, "logits/chosen": -0.4828110635280609, "logits/rejected": -0.45946455001831055, "logps/chosen": -0.8875317573547363, "logps/rejected": -1.431365728378296, "loss": 0.9272, "nll_loss": 0.8681669235229492, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.08875317126512527, "rewards/margins": 0.05438339710235596, "rewards/rejected": -0.14313657581806183, "step": 7610 }, { "epoch": 1.38, "grad_norm": 1.7428468465805054, "learning_rate": 3.6209055175425826e-06, "log_odds_chosen": 0.7643271684646606, "log_odds_ratio": -0.6414823532104492, "logits/chosen": -0.48790162801742554, "logits/rejected": -0.4671143591403961, "logps/chosen": -0.942896842956543, "logps/rejected": -1.4927890300750732, "loss": 1.0018, "nll_loss": 0.9376304745674133, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09428969025611877, "rewards/margins": 0.05498921126127243, "rewards/rejected": -0.1492789089679718, "step": 7620 }, { "epoch": 1.38, "grad_norm": 1.4223955869674683, "learning_rate": 3.6150822536031444e-06, "log_odds_chosen": 0.9342612028121948, "log_odds_ratio": -0.5149013996124268, "logits/chosen": -0.4245794713497162, "logits/rejected": -0.42594870924949646, "logps/chosen": -0.8617804646492004, "logps/rejected": -1.5337378978729248, "loss": 0.9272, "nll_loss": 0.875745415687561, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08617803454399109, "rewards/margins": 0.06719574332237244, "rewards/rejected": -0.1533738076686859, "step": 7630 }, { "epoch": 1.38, "grad_norm": 2.223407745361328, "learning_rate": 3.6092589896637063e-06, "log_odds_chosen": 1.340857744216919, "log_odds_ratio": -0.4616336226463318, "logits/chosen": -0.4703386425971985, "logits/rejected": -0.5048006176948547, "logps/chosen": -0.846632182598114, "logps/rejected": -1.7307153940200806, "loss": 0.9162, "nll_loss": 0.8700374364852905, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08466322720050812, "rewards/margins": 0.08840831369161606, "rewards/rejected": -0.17307154834270477, "step": 7640 }, { "epoch": 1.38, "grad_norm": 0.9719432592391968, "learning_rate": 3.603435725724268e-06, "log_odds_chosen": 0.9791809916496277, "log_odds_ratio": -0.5275226831436157, "logits/chosen": -0.4798418879508972, "logits/rejected": -0.4937385022640228, "logps/chosen": -0.9020121693611145, "logps/rejected": -1.598841905593872, "loss": 0.9018, "nll_loss": 0.8490481376647949, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09020121395587921, "rewards/margins": 0.06968297064304352, "rewards/rejected": -0.15988418459892273, "step": 7650 }, { "epoch": 1.38, "grad_norm": 1.381014108657837, "learning_rate": 3.59761246178483e-06, "log_odds_chosen": 0.7422740459442139, "log_odds_ratio": -0.529722273349762, "logits/chosen": -0.4818175435066223, "logits/rejected": -0.4888008236885071, "logps/chosen": -1.0153032541275024, "logps/rejected": -1.5096291303634644, "loss": 0.9453, "nll_loss": 0.8923282623291016, "rewards/accuracies": 0.75, "rewards/chosen": -0.10153033584356308, "rewards/margins": 0.04943258687853813, "rewards/rejected": -0.15096290409564972, "step": 7660 }, { "epoch": 1.39, "grad_norm": 2.111315965652466, "learning_rate": 3.5917891978453924e-06, "log_odds_chosen": 0.7107936143875122, "log_odds_ratio": -0.5873027443885803, "logits/chosen": -0.4801076352596283, "logits/rejected": -0.4742940366268158, "logps/chosen": -0.9502488374710083, "logps/rejected": -1.4334582090377808, "loss": 0.9856, "nll_loss": 0.9268776774406433, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09502488374710083, "rewards/margins": 0.048320937901735306, "rewards/rejected": -0.14334581792354584, "step": 7670 }, { "epoch": 1.39, "grad_norm": 1.6548705101013184, "learning_rate": 3.585965933905954e-06, "log_odds_chosen": 1.0609567165374756, "log_odds_ratio": -0.47992807626724243, "logits/chosen": -0.47569042444229126, "logits/rejected": -0.4778749346733093, "logps/chosen": -0.8274517059326172, "logps/rejected": -1.529815912246704, "loss": 0.8857, "nll_loss": 0.8376930356025696, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08274517953395844, "rewards/margins": 0.07023642957210541, "rewards/rejected": -0.15298160910606384, "step": 7680 }, { "epoch": 1.39, "grad_norm": 0.842616856098175, "learning_rate": 3.580142669966516e-06, "log_odds_chosen": 0.9939400553703308, "log_odds_ratio": -0.5352992415428162, "logits/chosen": -0.4965585768222809, "logits/rejected": -0.4912574291229248, "logps/chosen": -0.9213212132453918, "logps/rejected": -1.6018670797348022, "loss": 0.9687, "nll_loss": 0.9151374697685242, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09213212877511978, "rewards/margins": 0.06805459409952164, "rewards/rejected": -0.16018672287464142, "step": 7690 }, { "epoch": 1.39, "grad_norm": 1.7768384218215942, "learning_rate": 3.574319406027078e-06, "log_odds_chosen": 0.7124654054641724, "log_odds_ratio": -0.5752750039100647, "logits/chosen": -0.46104294061660767, "logits/rejected": -0.4451174736022949, "logps/chosen": -0.9411187171936035, "logps/rejected": -1.430299997329712, "loss": 0.9115, "nll_loss": 0.8539952039718628, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09411187469959259, "rewards/margins": 0.048918113112449646, "rewards/rejected": -0.14302998781204224, "step": 7700 }, { "epoch": 1.39, "grad_norm": 1.1206591129302979, "learning_rate": 3.56849614208764e-06, "log_odds_chosen": 1.004553198814392, "log_odds_ratio": -0.5533329248428345, "logits/chosen": -0.41821640729904175, "logits/rejected": -0.4664444029331207, "logps/chosen": -0.9269332885742188, "logps/rejected": -1.6279127597808838, "loss": 0.9, "nll_loss": 0.844649612903595, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09269334375858307, "rewards/margins": 0.07009793072938919, "rewards/rejected": -0.16279128193855286, "step": 7710 }, { "epoch": 1.39, "grad_norm": 1.933883547782898, "learning_rate": 3.5626728781482023e-06, "log_odds_chosen": 0.9759089350700378, "log_odds_ratio": -0.4841943383216858, "logits/chosen": -0.457724392414093, "logits/rejected": -0.4634011387825012, "logps/chosen": -0.8931747674942017, "logps/rejected": -1.5288536548614502, "loss": 0.9143, "nll_loss": 0.8658315539360046, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08931747823953629, "rewards/margins": 0.0635678842663765, "rewards/rejected": -0.1528853476047516, "step": 7720 }, { "epoch": 1.4, "grad_norm": 1.167686939239502, "learning_rate": 3.5568496142087637e-06, "log_odds_chosen": 1.0014417171478271, "log_odds_ratio": -0.48971033096313477, "logits/chosen": -0.449578195810318, "logits/rejected": -0.4728933870792389, "logps/chosen": -0.8632867932319641, "logps/rejected": -1.5455224514007568, "loss": 0.9463, "nll_loss": 0.8973382115364075, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08632868528366089, "rewards/margins": 0.06822358071804047, "rewards/rejected": -0.15455226600170135, "step": 7730 }, { "epoch": 1.4, "grad_norm": 0.8326305150985718, "learning_rate": 3.5510263502693256e-06, "log_odds_chosen": 0.9664969444274902, "log_odds_ratio": -0.48367372155189514, "logits/chosen": -0.47377943992614746, "logits/rejected": -0.511856734752655, "logps/chosen": -0.9200240969657898, "logps/rejected": -1.578240156173706, "loss": 0.9693, "nll_loss": 0.9209035038948059, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09200242906808853, "rewards/margins": 0.06582160294055939, "rewards/rejected": -0.15782400965690613, "step": 7740 }, { "epoch": 1.4, "grad_norm": 1.0749437808990479, "learning_rate": 3.545203086329888e-06, "log_odds_chosen": 0.525017499923706, "log_odds_ratio": -0.5907121896743774, "logits/chosen": -0.5242208242416382, "logits/rejected": -0.5184580087661743, "logps/chosen": -0.9486880302429199, "logps/rejected": -1.2981536388397217, "loss": 0.969, "nll_loss": 0.9098953008651733, "rewards/accuracies": 0.625, "rewards/chosen": -0.09486880898475647, "rewards/margins": 0.03494657203555107, "rewards/rejected": -0.12981536984443665, "step": 7750 }, { "epoch": 1.4, "grad_norm": 1.736507773399353, "learning_rate": 3.53937982239045e-06, "log_odds_chosen": 0.7098814249038696, "log_odds_ratio": -0.6239954829216003, "logits/chosen": -0.48016709089279175, "logits/rejected": -0.48714813590049744, "logps/chosen": -1.0151746273040771, "logps/rejected": -1.536481499671936, "loss": 1.0214, "nll_loss": 0.9589971303939819, "rewards/accuracies": 0.625, "rewards/chosen": -0.1015174612402916, "rewards/margins": 0.05213068798184395, "rewards/rejected": -0.15364815294742584, "step": 7760 }, { "epoch": 1.4, "grad_norm": 1.0307977199554443, "learning_rate": 3.5335565584510113e-06, "log_odds_chosen": 1.121014952659607, "log_odds_ratio": -0.5014979243278503, "logits/chosen": -0.39459139108657837, "logits/rejected": -0.4565156400203705, "logps/chosen": -0.8604658246040344, "logps/rejected": -1.6737045049667358, "loss": 0.8733, "nll_loss": 0.823115348815918, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08604658395051956, "rewards/margins": 0.08132388442754745, "rewards/rejected": -0.16737046837806702, "step": 7770 }, { "epoch": 1.41, "grad_norm": 1.0171877145767212, "learning_rate": 3.5277332945115736e-06, "log_odds_chosen": 0.9733405113220215, "log_odds_ratio": -0.4627884328365326, "logits/chosen": -0.38336968421936035, "logits/rejected": -0.4400951862335205, "logps/chosen": -0.9231967926025391, "logps/rejected": -1.585914969444275, "loss": 0.9007, "nll_loss": 0.8543741106987, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.09231968224048615, "rewards/margins": 0.0662718415260315, "rewards/rejected": -0.15859152376651764, "step": 7780 }, { "epoch": 1.41, "grad_norm": 0.9106153845787048, "learning_rate": 3.5219100305721355e-06, "log_odds_chosen": 1.0205250978469849, "log_odds_ratio": -0.5149970054626465, "logits/chosen": -0.4142359793186188, "logits/rejected": -0.411867618560791, "logps/chosen": -0.8714788556098938, "logps/rejected": -1.5835577249526978, "loss": 0.9246, "nll_loss": 0.8730686902999878, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08714788407087326, "rewards/margins": 0.07120787352323532, "rewards/rejected": -0.15835575759410858, "step": 7790 }, { "epoch": 1.41, "grad_norm": 1.4677820205688477, "learning_rate": 3.5160867666326978e-06, "log_odds_chosen": 0.8735268712043762, "log_odds_ratio": -0.5531715154647827, "logits/chosen": -0.48417219519615173, "logits/rejected": -0.4957372546195984, "logps/chosen": -0.9382551312446594, "logps/rejected": -1.514022946357727, "loss": 1.0231, "nll_loss": 0.9677915573120117, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09382550418376923, "rewards/margins": 0.057576775550842285, "rewards/rejected": -0.1514022946357727, "step": 7800 }, { "epoch": 1.41, "grad_norm": 3.8200325965881348, "learning_rate": 3.5102635026932592e-06, "log_odds_chosen": 1.2385016679763794, "log_odds_ratio": -0.4712151885032654, "logits/chosen": -0.42320528626441956, "logits/rejected": -0.48704639077186584, "logps/chosen": -0.851686954498291, "logps/rejected": -1.7134358882904053, "loss": 0.889, "nll_loss": 0.8419039845466614, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08516870439052582, "rewards/margins": 0.08617488294839859, "rewards/rejected": -0.171343594789505, "step": 7810 }, { "epoch": 1.41, "grad_norm": 1.053245186805725, "learning_rate": 3.504440238753821e-06, "log_odds_chosen": 0.9881025552749634, "log_odds_ratio": -0.5334082841873169, "logits/chosen": -0.4514409601688385, "logits/rejected": -0.4625988006591797, "logps/chosen": -0.8842605352401733, "logps/rejected": -1.5331977605819702, "loss": 0.9572, "nll_loss": 0.9038169980049133, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08842606842517853, "rewards/margins": 0.06489372253417969, "rewards/rejected": -0.15331977605819702, "step": 7820 }, { "epoch": 1.41, "grad_norm": 2.9712746143341064, "learning_rate": 3.4986169748143834e-06, "log_odds_chosen": 1.1356464624404907, "log_odds_ratio": -0.450702428817749, "logits/chosen": -0.4527707099914551, "logits/rejected": -0.48675090074539185, "logps/chosen": -0.8333941698074341, "logps/rejected": -1.6239534616470337, "loss": 0.8964, "nll_loss": 0.851354718208313, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08333941549062729, "rewards/margins": 0.07905593514442444, "rewards/rejected": -0.16239535808563232, "step": 7830 }, { "epoch": 1.42, "grad_norm": 0.9804763197898865, "learning_rate": 3.4927937108749453e-06, "log_odds_chosen": 0.8883824348449707, "log_odds_ratio": -0.5219795107841492, "logits/chosen": -0.45694953203201294, "logits/rejected": -0.46481671929359436, "logps/chosen": -0.9310160875320435, "logps/rejected": -1.554607629776001, "loss": 0.943, "nll_loss": 0.8907995223999023, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0931016057729721, "rewards/margins": 0.06235916540026665, "rewards/rejected": -0.15546075999736786, "step": 7840 }, { "epoch": 1.42, "grad_norm": 1.1108657121658325, "learning_rate": 3.486970446935507e-06, "log_odds_chosen": 0.9498538970947266, "log_odds_ratio": -0.5386672616004944, "logits/chosen": -0.4198557734489441, "logits/rejected": -0.4490021765232086, "logps/chosen": -0.8262109756469727, "logps/rejected": -1.4365177154541016, "loss": 0.8896, "nll_loss": 0.8356889486312866, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08262109756469727, "rewards/margins": 0.06103065609931946, "rewards/rejected": -0.14365175366401672, "step": 7850 }, { "epoch": 1.42, "grad_norm": 0.9033617973327637, "learning_rate": 3.481147182996069e-06, "log_odds_chosen": 1.0498912334442139, "log_odds_ratio": -0.5304676294326782, "logits/chosen": -0.4416961669921875, "logits/rejected": -0.47163066267967224, "logps/chosen": -0.8820828199386597, "logps/rejected": -1.591942548751831, "loss": 0.9277, "nll_loss": 0.874676525592804, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08820828795433044, "rewards/margins": 0.07098597288131714, "rewards/rejected": -0.15919426083564758, "step": 7860 }, { "epoch": 1.42, "grad_norm": 1.2365633249282837, "learning_rate": 3.475323919056631e-06, "log_odds_chosen": 0.8197916746139526, "log_odds_ratio": -0.5315700769424438, "logits/chosen": -0.4701627790927887, "logits/rejected": -0.5030553340911865, "logps/chosen": -0.9004766345024109, "logps/rejected": -1.4492244720458984, "loss": 1.0026, "nll_loss": 0.9494752883911133, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0900476723909378, "rewards/margins": 0.05487479642033577, "rewards/rejected": -0.14492245018482208, "step": 7870 }, { "epoch": 1.42, "grad_norm": 0.8577108979225159, "learning_rate": 3.469500655117193e-06, "log_odds_chosen": 1.1933759450912476, "log_odds_ratio": -0.4663127064704895, "logits/chosen": -0.45539942383766174, "logits/rejected": -0.4741978049278259, "logps/chosen": -0.8282696008682251, "logps/rejected": -1.6477653980255127, "loss": 0.9535, "nll_loss": 0.9068788290023804, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08282694965600967, "rewards/margins": 0.08194959908723831, "rewards/rejected": -0.16477656364440918, "step": 7880 }, { "epoch": 1.43, "grad_norm": 1.227028489112854, "learning_rate": 3.463677391177755e-06, "log_odds_chosen": 0.8997465372085571, "log_odds_ratio": -0.5082308650016785, "logits/chosen": -0.46594834327697754, "logits/rejected": -0.5129804611206055, "logps/chosen": -0.9946556091308594, "logps/rejected": -1.6416242122650146, "loss": 1.0179, "nll_loss": 0.9670284390449524, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09946557134389877, "rewards/margins": 0.06469685584306717, "rewards/rejected": -0.16416242718696594, "step": 7890 }, { "epoch": 1.43, "grad_norm": 2.529578447341919, "learning_rate": 3.4578541272383166e-06, "log_odds_chosen": 0.9256827235221863, "log_odds_ratio": -0.5562437772750854, "logits/chosen": -0.48142653703689575, "logits/rejected": -0.4927385449409485, "logps/chosen": -0.8803023099899292, "logps/rejected": -1.54433274269104, "loss": 0.9602, "nll_loss": 0.9045834541320801, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08803024142980576, "rewards/margins": 0.06640304625034332, "rewards/rejected": -0.15443329513072968, "step": 7900 }, { "epoch": 1.43, "grad_norm": 1.199773907661438, "learning_rate": 3.452030863298879e-06, "log_odds_chosen": 0.9792470932006836, "log_odds_ratio": -0.49643927812576294, "logits/chosen": -0.4774385392665863, "logits/rejected": -0.4975582957267761, "logps/chosen": -0.8605278730392456, "logps/rejected": -1.5568548440933228, "loss": 0.9923, "nll_loss": 0.9426447153091431, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0860527977347374, "rewards/margins": 0.06963268667459488, "rewards/rejected": -0.15568546950817108, "step": 7910 }, { "epoch": 1.43, "grad_norm": 1.121840238571167, "learning_rate": 3.446207599359441e-06, "log_odds_chosen": 0.9563971757888794, "log_odds_ratio": -0.5624344944953918, "logits/chosen": -0.44062572717666626, "logits/rejected": -0.4463469982147217, "logps/chosen": -0.956190288066864, "logps/rejected": -1.6515365839004517, "loss": 0.9733, "nll_loss": 0.9170805811882019, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09561903029680252, "rewards/margins": 0.06953462213277817, "rewards/rejected": -0.1651536524295807, "step": 7920 }, { "epoch": 1.43, "grad_norm": 1.251677393913269, "learning_rate": 3.4403843354200027e-06, "log_odds_chosen": 0.9738191366195679, "log_odds_ratio": -0.49310868978500366, "logits/chosen": -0.446247398853302, "logits/rejected": -0.5060548782348633, "logps/chosen": -1.005068063735962, "logps/rejected": -1.673824667930603, "loss": 0.9543, "nll_loss": 0.9050275683403015, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10050680488348007, "rewards/margins": 0.0668756514787674, "rewards/rejected": -0.16738246381282806, "step": 7930 }, { "epoch": 1.43, "grad_norm": 0.9788984060287476, "learning_rate": 3.434561071480565e-06, "log_odds_chosen": 0.7685114145278931, "log_odds_ratio": -0.6038535833358765, "logits/chosen": -0.45945462584495544, "logits/rejected": -0.4910176694393158, "logps/chosen": -1.03463613986969, "logps/rejected": -1.5275377035140991, "loss": 1.0876, "nll_loss": 1.0272212028503418, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10346361249685287, "rewards/margins": 0.049290161579847336, "rewards/rejected": -0.15275375545024872, "step": 7940 }, { "epoch": 1.44, "grad_norm": 2.345649480819702, "learning_rate": 3.4287378075411265e-06, "log_odds_chosen": 0.7906461954116821, "log_odds_ratio": -0.5344957113265991, "logits/chosen": -0.49102646112442017, "logits/rejected": -0.48199811577796936, "logps/chosen": -1.0372166633605957, "logps/rejected": -1.5727733373641968, "loss": 0.9831, "nll_loss": 0.9296104311943054, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10372166335582733, "rewards/margins": 0.05355566740036011, "rewards/rejected": -0.15727733075618744, "step": 7950 }, { "epoch": 1.44, "grad_norm": 3.018699884414673, "learning_rate": 3.4229145436016884e-06, "log_odds_chosen": 0.9407013058662415, "log_odds_ratio": -0.5151436924934387, "logits/chosen": -0.4773307740688324, "logits/rejected": -0.5138453841209412, "logps/chosen": -0.9273196458816528, "logps/rejected": -1.6137325763702393, "loss": 0.9307, "nll_loss": 0.8791698217391968, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09273196011781693, "rewards/margins": 0.0686412900686264, "rewards/rejected": -0.16137325763702393, "step": 7960 }, { "epoch": 1.44, "grad_norm": 1.5302883386611938, "learning_rate": 3.4170912796622507e-06, "log_odds_chosen": 1.0637813806533813, "log_odds_ratio": -0.48986929655075073, "logits/chosen": -0.47258901596069336, "logits/rejected": -0.49703383445739746, "logps/chosen": -0.8885113000869751, "logps/rejected": -1.644118309020996, "loss": 0.9679, "nll_loss": 0.9189218282699585, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08885113149881363, "rewards/margins": 0.07556071877479553, "rewards/rejected": -0.16441184282302856, "step": 7970 }, { "epoch": 1.44, "grad_norm": 1.6340556144714355, "learning_rate": 3.4112680157228126e-06, "log_odds_chosen": 0.9519853591918945, "log_odds_ratio": -0.5761106014251709, "logits/chosen": -0.4162136912345886, "logits/rejected": -0.44225525856018066, "logps/chosen": -0.899207592010498, "logps/rejected": -1.5692665576934814, "loss": 0.8714, "nll_loss": 0.8137787580490112, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0899207666516304, "rewards/margins": 0.06700590997934341, "rewards/rejected": -0.15692667663097382, "step": 7980 }, { "epoch": 1.44, "grad_norm": 1.172593116760254, "learning_rate": 3.4054447517833745e-06, "log_odds_chosen": 0.8323512077331543, "log_odds_ratio": -0.5102376937866211, "logits/chosen": -0.4562395513057709, "logits/rejected": -0.49356168508529663, "logps/chosen": -0.9360699653625488, "logps/rejected": -1.473852276802063, "loss": 0.9427, "nll_loss": 0.8917063474655151, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09360699355602264, "rewards/margins": 0.05377823859453201, "rewards/rejected": -0.14738522469997406, "step": 7990 }, { "epoch": 1.45, "grad_norm": 1.285165548324585, "learning_rate": 3.3996214878439363e-06, "log_odds_chosen": 0.9385267496109009, "log_odds_ratio": -0.5298964381217957, "logits/chosen": -0.44690507650375366, "logits/rejected": -0.4662812650203705, "logps/chosen": -0.8296586275100708, "logps/rejected": -1.4835478067398071, "loss": 0.9901, "nll_loss": 0.9370955228805542, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08296586573123932, "rewards/margins": 0.06538892537355423, "rewards/rejected": -0.14835476875305176, "step": 8000 }, { "epoch": 1.45, "grad_norm": 1.7378290891647339, "learning_rate": 3.3937982239044982e-06, "log_odds_chosen": 0.8786460161209106, "log_odds_ratio": -0.5547272562980652, "logits/chosen": -0.5031970739364624, "logits/rejected": -0.4877137243747711, "logps/chosen": -0.9259228706359863, "logps/rejected": -1.5466254949569702, "loss": 1.0045, "nll_loss": 0.9490045309066772, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09259229898452759, "rewards/margins": 0.062070250511169434, "rewards/rejected": -0.15466253459453583, "step": 8010 }, { "epoch": 1.45, "grad_norm": 1.2688935995101929, "learning_rate": 3.3879749599650605e-06, "log_odds_chosen": 0.7960363030433655, "log_odds_ratio": -0.5690153241157532, "logits/chosen": -0.4590454697608948, "logits/rejected": -0.4879334568977356, "logps/chosen": -0.8963125348091125, "logps/rejected": -1.4358001947402954, "loss": 1.0057, "nll_loss": 0.9488385915756226, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08963125199079514, "rewards/margins": 0.053948771208524704, "rewards/rejected": -0.14358003437519073, "step": 8020 }, { "epoch": 1.45, "grad_norm": 1.5005302429199219, "learning_rate": 3.3821516960256224e-06, "log_odds_chosen": 0.8584071397781372, "log_odds_ratio": -0.5413273572921753, "logits/chosen": -0.47158876061439514, "logits/rejected": -0.49479609727859497, "logps/chosen": -0.9288450479507446, "logps/rejected": -1.5227404832839966, "loss": 0.9851, "nll_loss": 0.9309417605400085, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09288450330495834, "rewards/margins": 0.05938952416181564, "rewards/rejected": -0.15227404236793518, "step": 8030 }, { "epoch": 1.45, "grad_norm": 1.1802349090576172, "learning_rate": 3.376328432086184e-06, "log_odds_chosen": 1.0664393901824951, "log_odds_ratio": -0.5075581669807434, "logits/chosen": -0.423412561416626, "logits/rejected": -0.44078055024147034, "logps/chosen": -0.8771320581436157, "logps/rejected": -1.672170639038086, "loss": 0.8723, "nll_loss": 0.8215177655220032, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08771320432424545, "rewards/margins": 0.0795038640499115, "rewards/rejected": -0.16721707582473755, "step": 8040 }, { "epoch": 1.45, "grad_norm": 3.1907968521118164, "learning_rate": 3.370505168146746e-06, "log_odds_chosen": 0.9271900057792664, "log_odds_ratio": -0.5006861686706543, "logits/chosen": -0.45901423692703247, "logits/rejected": -0.45146292448043823, "logps/chosen": -1.0008279085159302, "logps/rejected": -1.6424148082733154, "loss": 0.9053, "nll_loss": 0.8551861047744751, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10008279979228973, "rewards/margins": 0.06415869295597076, "rewards/rejected": -0.1642414927482605, "step": 8050 }, { "epoch": 1.46, "grad_norm": 0.8206511735916138, "learning_rate": 3.364681904207308e-06, "log_odds_chosen": 1.1431403160095215, "log_odds_ratio": -0.47963443398475647, "logits/chosen": -0.4267405569553375, "logits/rejected": -0.4710633158683777, "logps/chosen": -0.9099753499031067, "logps/rejected": -1.7252388000488281, "loss": 0.9361, "nll_loss": 0.8881762623786926, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09099753946065903, "rewards/margins": 0.08152634650468826, "rewards/rejected": -0.1725238859653473, "step": 8060 }, { "epoch": 1.46, "grad_norm": 1.217850685119629, "learning_rate": 3.35885864026787e-06, "log_odds_chosen": 1.0072505474090576, "log_odds_ratio": -0.5082023739814758, "logits/chosen": -0.423494815826416, "logits/rejected": -0.4322236180305481, "logps/chosen": -0.885513186454773, "logps/rejected": -1.5974544286727905, "loss": 0.9448, "nll_loss": 0.8939436674118042, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08855132758617401, "rewards/margins": 0.071194127202034, "rewards/rejected": -0.15974543988704681, "step": 8070 }, { "epoch": 1.46, "grad_norm": 1.512110948562622, "learning_rate": 3.353035376328432e-06, "log_odds_chosen": 0.8098253011703491, "log_odds_ratio": -0.5478814840316772, "logits/chosen": -0.4838322103023529, "logits/rejected": -0.4989416003227234, "logps/chosen": -0.8810871243476868, "logps/rejected": -1.42708420753479, "loss": 0.9925, "nll_loss": 0.9376763105392456, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08810871094465256, "rewards/margins": 0.05459970235824585, "rewards/rejected": -0.142708420753479, "step": 8080 }, { "epoch": 1.46, "grad_norm": 1.6474279165267944, "learning_rate": 3.3472121123889937e-06, "log_odds_chosen": 1.0930445194244385, "log_odds_ratio": -0.46995505690574646, "logits/chosen": -0.4111822545528412, "logits/rejected": -0.45124974846839905, "logps/chosen": -0.8773530125617981, "logps/rejected": -1.6252126693725586, "loss": 0.9204, "nll_loss": 0.8734337687492371, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08773529529571533, "rewards/margins": 0.07478597015142441, "rewards/rejected": -0.16252127289772034, "step": 8090 }, { "epoch": 1.46, "grad_norm": 1.075158953666687, "learning_rate": 3.341388848449556e-06, "log_odds_chosen": 0.9450801610946655, "log_odds_ratio": -0.5371429920196533, "logits/chosen": -0.4427841305732727, "logits/rejected": -0.4451626241207123, "logps/chosen": -0.9524284601211548, "logps/rejected": -1.6333316564559937, "loss": 0.8849, "nll_loss": 0.8311794996261597, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09524284303188324, "rewards/margins": 0.06809031963348389, "rewards/rejected": -0.16333314776420593, "step": 8100 }, { "epoch": 1.46, "grad_norm": 0.8650028705596924, "learning_rate": 3.335565584510118e-06, "log_odds_chosen": 0.9066849946975708, "log_odds_ratio": -0.5334414839744568, "logits/chosen": -0.4809895157814026, "logits/rejected": -0.4950200021266937, "logps/chosen": -0.9143193960189819, "logps/rejected": -1.5417639017105103, "loss": 0.985, "nll_loss": 0.9316898584365845, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09143194556236267, "rewards/margins": 0.06274445354938507, "rewards/rejected": -0.15417639911174774, "step": 8110 }, { "epoch": 1.47, "grad_norm": 1.051186442375183, "learning_rate": 3.3297423205706794e-06, "log_odds_chosen": 1.2927372455596924, "log_odds_ratio": -0.43638482689857483, "logits/chosen": -0.398875892162323, "logits/rejected": -0.45413732528686523, "logps/chosen": -0.9220579862594604, "logps/rejected": -1.8598989248275757, "loss": 0.9553, "nll_loss": 0.9116722345352173, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09220579266548157, "rewards/margins": 0.09378410875797272, "rewards/rejected": -0.18598990142345428, "step": 8120 }, { "epoch": 1.47, "grad_norm": 0.780241072177887, "learning_rate": 3.3239190566312417e-06, "log_odds_chosen": 0.8707435727119446, "log_odds_ratio": -0.5559987425804138, "logits/chosen": -0.4056832194328308, "logits/rejected": -0.43125540018081665, "logps/chosen": -0.8941282033920288, "logps/rejected": -1.4603755474090576, "loss": 0.9445, "nll_loss": 0.8889206051826477, "rewards/accuracies": 0.625, "rewards/chosen": -0.08941281586885452, "rewards/margins": 0.05662474036216736, "rewards/rejected": -0.14603756368160248, "step": 8130 }, { "epoch": 1.47, "grad_norm": 1.1260215044021606, "learning_rate": 3.3180957926918036e-06, "log_odds_chosen": 0.682217001914978, "log_odds_ratio": -0.6097729802131653, "logits/chosen": -0.39974793791770935, "logits/rejected": -0.45179304480552673, "logps/chosen": -0.9107609987258911, "logps/rejected": -1.3272377252578735, "loss": 0.9954, "nll_loss": 0.9344407320022583, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0910760909318924, "rewards/margins": 0.04164768010377884, "rewards/rejected": -0.13272377848625183, "step": 8140 }, { "epoch": 1.47, "grad_norm": 2.6169352531433105, "learning_rate": 3.3122725287523655e-06, "log_odds_chosen": 0.8187638521194458, "log_odds_ratio": -0.555806040763855, "logits/chosen": -0.4935298562049866, "logits/rejected": -0.46518078446388245, "logps/chosen": -0.9738367795944214, "logps/rejected": -1.536250114440918, "loss": 0.9823, "nll_loss": 0.9266853332519531, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09738368541002274, "rewards/margins": 0.05624132603406906, "rewards/rejected": -0.1536250114440918, "step": 8150 }, { "epoch": 1.47, "grad_norm": 1.5755623579025269, "learning_rate": 3.306449264812928e-06, "log_odds_chosen": 0.8507640957832336, "log_odds_ratio": -0.5479308366775513, "logits/chosen": -0.45997118949890137, "logits/rejected": -0.48409271240234375, "logps/chosen": -0.8985317945480347, "logps/rejected": -1.4782774448394775, "loss": 0.9323, "nll_loss": 0.8775039911270142, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0898531898856163, "rewards/margins": 0.057974569499492645, "rewards/rejected": -0.14782774448394775, "step": 8160 }, { "epoch": 1.48, "grad_norm": 0.7219186425209045, "learning_rate": 3.3006260008734893e-06, "log_odds_chosen": 1.332714319229126, "log_odds_ratio": -0.453878790140152, "logits/chosen": -0.41151317954063416, "logits/rejected": -0.4791542887687683, "logps/chosen": -0.8005874752998352, "logps/rejected": -1.7394930124282837, "loss": 0.8922, "nll_loss": 0.8467932939529419, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0800587460398674, "rewards/margins": 0.09389055520296097, "rewards/rejected": -0.17394930124282837, "step": 8170 }, { "epoch": 1.48, "grad_norm": 1.6263483762741089, "learning_rate": 3.294802736934051e-06, "log_odds_chosen": 0.953560471534729, "log_odds_ratio": -0.5101215243339539, "logits/chosen": -0.44410592317581177, "logits/rejected": -0.4457341134548187, "logps/chosen": -0.9805153608322144, "logps/rejected": -1.6686265468597412, "loss": 0.9395, "nll_loss": 0.8884419202804565, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09805154800415039, "rewards/margins": 0.06881112605333328, "rewards/rejected": -0.16686268150806427, "step": 8180 }, { "epoch": 1.48, "grad_norm": 1.2888286113739014, "learning_rate": 3.2889794729946135e-06, "log_odds_chosen": 1.142336130142212, "log_odds_ratio": -0.4796481728553772, "logits/chosen": -0.4701244831085205, "logits/rejected": -0.4900820255279541, "logps/chosen": -0.9152582287788391, "logps/rejected": -1.7305917739868164, "loss": 0.9902, "nll_loss": 0.9422494769096375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09152581542730331, "rewards/margins": 0.08153336495161057, "rewards/rejected": -0.17305919528007507, "step": 8190 }, { "epoch": 1.48, "grad_norm": 2.0623779296875, "learning_rate": 3.2831562090551753e-06, "log_odds_chosen": 0.8945195078849792, "log_odds_ratio": -0.5508736968040466, "logits/chosen": -0.46278437972068787, "logits/rejected": -0.4786438047885895, "logps/chosen": -0.9259538650512695, "logps/rejected": -1.5728238821029663, "loss": 0.9739, "nll_loss": 0.9188405871391296, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09259538352489471, "rewards/margins": 0.06468699872493744, "rewards/rejected": -0.15728238224983215, "step": 8200 }, { "epoch": 1.48, "grad_norm": 1.12440025806427, "learning_rate": 3.2773329451157372e-06, "log_odds_chosen": 1.2054493427276611, "log_odds_ratio": -0.4695872366428375, "logits/chosen": -0.46673083305358887, "logits/rejected": -0.4775332808494568, "logps/chosen": -0.8975292444229126, "logps/rejected": -1.7807804346084595, "loss": 0.891, "nll_loss": 0.8440417051315308, "rewards/accuracies": 0.75, "rewards/chosen": -0.0897529199719429, "rewards/margins": 0.0883251205086708, "rewards/rejected": -0.17807802557945251, "step": 8210 }, { "epoch": 1.48, "grad_norm": 1.0653307437896729, "learning_rate": 3.271509681176299e-06, "log_odds_chosen": 0.9999645352363586, "log_odds_ratio": -0.4829765856266022, "logits/chosen": -0.42667704820632935, "logits/rejected": -0.4870891571044922, "logps/chosen": -0.8960220217704773, "logps/rejected": -1.6177982091903687, "loss": 0.8606, "nll_loss": 0.8122758865356445, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08960221707820892, "rewards/margins": 0.07217760384082794, "rewards/rejected": -0.16177980601787567, "step": 8220 }, { "epoch": 1.49, "grad_norm": 0.6214142441749573, "learning_rate": 3.265686417236861e-06, "log_odds_chosen": 1.1718106269836426, "log_odds_ratio": -0.46951428055763245, "logits/chosen": -0.44821277260780334, "logits/rejected": -0.4748601019382477, "logps/chosen": -0.8514217138290405, "logps/rejected": -1.6607784032821655, "loss": 0.9352, "nll_loss": 0.8882169723510742, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08514218032360077, "rewards/margins": 0.08093566447496414, "rewards/rejected": -0.1660778522491455, "step": 8230 }, { "epoch": 1.49, "grad_norm": 1.8574119806289673, "learning_rate": 3.2598631532974233e-06, "log_odds_chosen": 1.05377995967865, "log_odds_ratio": -0.48320484161376953, "logits/chosen": -0.457883358001709, "logits/rejected": -0.4863380789756775, "logps/chosen": -0.9301978945732117, "logps/rejected": -1.6607166528701782, "loss": 0.9512, "nll_loss": 0.9028828740119934, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09301978349685669, "rewards/margins": 0.07305187731981277, "rewards/rejected": -0.16607165336608887, "step": 8240 }, { "epoch": 1.49, "grad_norm": 1.0782403945922852, "learning_rate": 3.254039889357985e-06, "log_odds_chosen": 0.5579296350479126, "log_odds_ratio": -0.6075290441513062, "logits/chosen": -0.4750538766384125, "logits/rejected": -0.46040016412734985, "logps/chosen": -0.9509231448173523, "logps/rejected": -1.3567991256713867, "loss": 0.9805, "nll_loss": 0.9197883605957031, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09509231895208359, "rewards/margins": 0.04058759659528732, "rewards/rejected": -0.1356799304485321, "step": 8250 }, { "epoch": 1.49, "grad_norm": 1.754353642463684, "learning_rate": 3.2482166254185467e-06, "log_odds_chosen": 1.289215087890625, "log_odds_ratio": -0.48846206068992615, "logits/chosen": -0.42231351137161255, "logits/rejected": -0.45326024293899536, "logps/chosen": -0.9167190790176392, "logps/rejected": -1.7700271606445312, "loss": 0.9213, "nll_loss": 0.8724179267883301, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0916719064116478, "rewards/margins": 0.08533082902431488, "rewards/rejected": -0.17700272798538208, "step": 8260 }, { "epoch": 1.49, "grad_norm": 1.4862349033355713, "learning_rate": 3.242393361479109e-06, "log_odds_chosen": 0.8918800354003906, "log_odds_ratio": -0.5321205854415894, "logits/chosen": -0.48621082305908203, "logits/rejected": -0.5018815398216248, "logps/chosen": -1.01481032371521, "logps/rejected": -1.704506516456604, "loss": 0.9819, "nll_loss": 0.9286412000656128, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10148103535175323, "rewards/margins": 0.06896961480379105, "rewards/rejected": -0.17045065760612488, "step": 8270 }, { "epoch": 1.5, "grad_norm": 1.2697803974151611, "learning_rate": 3.236570097539671e-06, "log_odds_chosen": 0.6413637399673462, "log_odds_ratio": -0.5947942137718201, "logits/chosen": -0.5209168195724487, "logits/rejected": -0.5378480553627014, "logps/chosen": -0.9370816349983215, "logps/rejected": -1.3779325485229492, "loss": 1.0176, "nll_loss": 0.9581189155578613, "rewards/accuracies": 0.625, "rewards/chosen": -0.09370815753936768, "rewards/margins": 0.044085096567869186, "rewards/rejected": -0.13779327273368835, "step": 8280 }, { "epoch": 1.5, "grad_norm": 0.9943523406982422, "learning_rate": 3.2307468336002327e-06, "log_odds_chosen": 0.7377707362174988, "log_odds_ratio": -0.57826167345047, "logits/chosen": -0.455329030752182, "logits/rejected": -0.48436814546585083, "logps/chosen": -0.8952816724777222, "logps/rejected": -1.4014275074005127, "loss": 1.0055, "nll_loss": 0.9476629495620728, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.0895281732082367, "rewards/margins": 0.05061458796262741, "rewards/rejected": -0.1401427686214447, "step": 8290 }, { "epoch": 1.5, "grad_norm": 1.187412977218628, "learning_rate": 3.2249235696607946e-06, "log_odds_chosen": 1.1886249780654907, "log_odds_ratio": -0.4324370324611664, "logits/chosen": -0.33722493052482605, "logits/rejected": -0.39880436658859253, "logps/chosen": -0.8110214471817017, "logps/rejected": -1.6159849166870117, "loss": 0.8536, "nll_loss": 0.8103251457214355, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.0811021476984024, "rewards/margins": 0.08049634844064713, "rewards/rejected": -0.16159851849079132, "step": 8300 }, { "epoch": 1.5, "eval_log_odds_chosen": 0.9150215983390808, "eval_log_odds_ratio": -0.5380129218101501, "eval_logits/chosen": -0.4303891062736511, "eval_logits/rejected": -0.45025941729545593, "eval_logps/chosen": -0.9312528967857361, "eval_logps/rejected": -1.5771743059158325, "eval_loss": 0.9696574211120605, "eval_nll_loss": 0.915856122970581, "eval_rewards/accuracies": 0.6705276966094971, "eval_rewards/chosen": -0.09312529861927032, "eval_rewards/margins": 0.0645921528339386, "eval_rewards/rejected": -0.15771742165088654, "eval_runtime": 2274.4333, "eval_samples_per_second": 1.025, "eval_steps_per_second": 1.025, "step": 8304 }, { "epoch": 1.5, "grad_norm": 1.3505573272705078, "learning_rate": 3.2191003057213565e-06, "log_odds_chosen": 0.7017135620117188, "log_odds_ratio": -0.5855156779289246, "logits/chosen": -0.4498369097709656, "logits/rejected": -0.47008219361305237, "logps/chosen": -1.0464216470718384, "logps/rejected": -1.5228334665298462, "loss": 0.989, "nll_loss": 0.9304904937744141, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10464215278625488, "rewards/margins": 0.04764118790626526, "rewards/rejected": -0.15228334069252014, "step": 8310 }, { "epoch": 1.5, "grad_norm": 1.1382750272750854, "learning_rate": 3.213277041781919e-06, "log_odds_chosen": 1.1573059558868408, "log_odds_ratio": -0.5401102304458618, "logits/chosen": -0.44930943846702576, "logits/rejected": -0.49644845724105835, "logps/chosen": -0.8247823715209961, "logps/rejected": -1.6526029109954834, "loss": 0.9384, "nll_loss": 0.8843981623649597, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08247824013233185, "rewards/margins": 0.08278205245733261, "rewards/rejected": -0.16526028513908386, "step": 8320 }, { "epoch": 1.5, "grad_norm": 1.3331044912338257, "learning_rate": 3.2074537778424807e-06, "log_odds_chosen": 0.8306789398193359, "log_odds_ratio": -0.565879225730896, "logits/chosen": -0.43868565559387207, "logits/rejected": -0.4368368983268738, "logps/chosen": -0.8823145627975464, "logps/rejected": -1.4312303066253662, "loss": 0.9337, "nll_loss": 0.87712162733078, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08823145925998688, "rewards/margins": 0.05489157885313034, "rewards/rejected": -0.14312303066253662, "step": 8330 }, { "epoch": 1.51, "grad_norm": 2.8568496704101562, "learning_rate": 3.2016305139030426e-06, "log_odds_chosen": 0.9271842837333679, "log_odds_ratio": -0.5752115845680237, "logits/chosen": -0.4455347955226898, "logits/rejected": -0.48211875557899475, "logps/chosen": -0.9204484820365906, "logps/rejected": -1.6012827157974243, "loss": 0.9359, "nll_loss": 0.878348171710968, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09204484522342682, "rewards/margins": 0.06808345019817352, "rewards/rejected": -0.16012828052043915, "step": 8340 }, { "epoch": 1.51, "grad_norm": 2.5041916370391846, "learning_rate": 3.1958072499636045e-06, "log_odds_chosen": 0.8253329992294312, "log_odds_ratio": -0.5409625768661499, "logits/chosen": -0.42137661576271057, "logits/rejected": -0.4258691668510437, "logps/chosen": -0.9433619379997253, "logps/rejected": -1.4537885189056396, "loss": 0.8955, "nll_loss": 0.8414531946182251, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09433619678020477, "rewards/margins": 0.05104265362024307, "rewards/rejected": -0.14537884294986725, "step": 8350 }, { "epoch": 1.51, "grad_norm": 1.5704162120819092, "learning_rate": 3.1899839860241664e-06, "log_odds_chosen": 0.7973772287368774, "log_odds_ratio": -0.5633861422538757, "logits/chosen": -0.5092782974243164, "logits/rejected": -0.506787896156311, "logps/chosen": -0.9273924827575684, "logps/rejected": -1.4619529247283936, "loss": 0.9969, "nll_loss": 0.940531849861145, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09273925423622131, "rewards/margins": 0.053456056863069534, "rewards/rejected": -0.14619530737400055, "step": 8360 }, { "epoch": 1.51, "grad_norm": 1.4402172565460205, "learning_rate": 3.1841607220847283e-06, "log_odds_chosen": 1.0130927562713623, "log_odds_ratio": -0.5257831811904907, "logits/chosen": -0.43476539850234985, "logits/rejected": -0.4592816233634949, "logps/chosen": -0.8787569999694824, "logps/rejected": -1.5716125965118408, "loss": 0.9334, "nll_loss": 0.8807721138000488, "rewards/accuracies": 0.625, "rewards/chosen": -0.08787570148706436, "rewards/margins": 0.0692855566740036, "rewards/rejected": -0.15716125071048737, "step": 8370 }, { "epoch": 1.51, "grad_norm": 1.170516014099121, "learning_rate": 3.1783374581452906e-06, "log_odds_chosen": 1.0823752880096436, "log_odds_ratio": -0.5135545134544373, "logits/chosen": -0.46290579438209534, "logits/rejected": -0.4929911494255066, "logps/chosen": -0.9144316911697388, "logps/rejected": -1.6458345651626587, "loss": 0.8758, "nll_loss": 0.8243969082832336, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09144316613674164, "rewards/margins": 0.07314030826091766, "rewards/rejected": -0.1645834743976593, "step": 8380 }, { "epoch": 1.52, "grad_norm": 1.599992275238037, "learning_rate": 3.172514194205852e-06, "log_odds_chosen": 1.2497516870498657, "log_odds_ratio": -0.48808449506759644, "logits/chosen": -0.48384079337120056, "logits/rejected": -0.512252688407898, "logps/chosen": -0.8950152397155762, "logps/rejected": -1.8100101947784424, "loss": 0.9544, "nll_loss": 0.905626654624939, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0895015150308609, "rewards/margins": 0.09149952232837677, "rewards/rejected": -0.18100103735923767, "step": 8390 }, { "epoch": 1.52, "grad_norm": 1.133752465248108, "learning_rate": 3.166690930266414e-06, "log_odds_chosen": 0.7259795665740967, "log_odds_ratio": -0.5618212819099426, "logits/chosen": -0.420632541179657, "logits/rejected": -0.48596158623695374, "logps/chosen": -0.9481450319290161, "logps/rejected": -1.441080093383789, "loss": 1.002, "nll_loss": 0.9458674192428589, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09481450170278549, "rewards/margins": 0.04929352179169655, "rewards/rejected": -0.14410802721977234, "step": 8400 }, { "epoch": 1.52, "grad_norm": 2.7289183139801025, "learning_rate": 3.1608676663269762e-06, "log_odds_chosen": 1.017851710319519, "log_odds_ratio": -0.5462450981140137, "logits/chosen": -0.46232685446739197, "logits/rejected": -0.473463773727417, "logps/chosen": -1.0255944728851318, "logps/rejected": -1.7579063177108765, "loss": 1.0151, "nll_loss": 0.9605172276496887, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10255946218967438, "rewards/margins": 0.07323118299245834, "rewards/rejected": -0.17579063773155212, "step": 8410 }, { "epoch": 1.52, "grad_norm": 1.4500068426132202, "learning_rate": 3.155044402387538e-06, "log_odds_chosen": 0.6885607838630676, "log_odds_ratio": -0.6273342370986938, "logits/chosen": -0.4968503415584564, "logits/rejected": -0.5165926218032837, "logps/chosen": -0.9877980947494507, "logps/rejected": -1.4659242630004883, "loss": 1.0864, "nll_loss": 1.0236256122589111, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0987798199057579, "rewards/margins": 0.047812603414058685, "rewards/rejected": -0.1465924233198166, "step": 8420 }, { "epoch": 1.52, "grad_norm": 1.252193808555603, "learning_rate": 3.1492211384481004e-06, "log_odds_chosen": 0.9662941694259644, "log_odds_ratio": -0.4978674054145813, "logits/chosen": -0.41275423765182495, "logits/rejected": -0.4935991168022156, "logps/chosen": -0.8912984728813171, "logps/rejected": -1.511781930923462, "loss": 0.9413, "nll_loss": 0.8915241360664368, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08912985026836395, "rewards/margins": 0.06204833835363388, "rewards/rejected": -0.15117818117141724, "step": 8430 }, { "epoch": 1.52, "grad_norm": 1.68789541721344, "learning_rate": 3.143397874508662e-06, "log_odds_chosen": 0.9445701837539673, "log_odds_ratio": -0.5349358320236206, "logits/chosen": -0.4067501425743103, "logits/rejected": -0.4603399336338043, "logps/chosen": -0.9744407534599304, "logps/rejected": -1.6701091527938843, "loss": 1.015, "nll_loss": 0.9614804983139038, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0974440798163414, "rewards/margins": 0.06956683844327927, "rewards/rejected": -0.16701093316078186, "step": 8440 }, { "epoch": 1.53, "grad_norm": 1.3491225242614746, "learning_rate": 3.1375746105692238e-06, "log_odds_chosen": 0.6654536128044128, "log_odds_ratio": -0.5875769853591919, "logits/chosen": -0.47851261496543884, "logits/rejected": -0.5094243288040161, "logps/chosen": -0.969096839427948, "logps/rejected": -1.4325406551361084, "loss": 0.939, "nll_loss": 0.8802341222763062, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09690967947244644, "rewards/margins": 0.04634439945220947, "rewards/rejected": -0.14325407147407532, "step": 8450 }, { "epoch": 1.53, "grad_norm": 1.4580334424972534, "learning_rate": 3.131751346629786e-06, "log_odds_chosen": 0.9235858917236328, "log_odds_ratio": -0.4925766885280609, "logits/chosen": -0.439488023519516, "logits/rejected": -0.46981319785118103, "logps/chosen": -0.8824410438537598, "logps/rejected": -1.5106605291366577, "loss": 0.9123, "nll_loss": 0.8630245327949524, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08824410289525986, "rewards/margins": 0.06282194703817368, "rewards/rejected": -0.15106606483459473, "step": 8460 }, { "epoch": 1.53, "grad_norm": 1.3175749778747559, "learning_rate": 3.125928082690348e-06, "log_odds_chosen": 1.1061830520629883, "log_odds_ratio": -0.49878525733947754, "logits/chosen": -0.39459967613220215, "logits/rejected": -0.44091662764549255, "logps/chosen": -0.8368655443191528, "logps/rejected": -1.6044450998306274, "loss": 0.9248, "nll_loss": 0.8748943209648132, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08368656039237976, "rewards/margins": 0.07675794512033463, "rewards/rejected": -0.1604444980621338, "step": 8470 }, { "epoch": 1.53, "grad_norm": 1.8184188604354858, "learning_rate": 3.1201048187509094e-06, "log_odds_chosen": 1.3821189403533936, "log_odds_ratio": -0.4116799235343933, "logits/chosen": -0.3508300185203552, "logits/rejected": -0.374864399433136, "logps/chosen": -0.8738387227058411, "logps/rejected": -1.8437020778656006, "loss": 0.862, "nll_loss": 0.8208430409431458, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08738387376070023, "rewards/margins": 0.09698633849620819, "rewards/rejected": -0.18437020480632782, "step": 8480 }, { "epoch": 1.53, "grad_norm": 1.1924893856048584, "learning_rate": 3.1142815548114717e-06, "log_odds_chosen": 1.1683502197265625, "log_odds_ratio": -0.4778009057044983, "logits/chosen": -0.35688087344169617, "logits/rejected": -0.40984684228897095, "logps/chosen": -0.8918578028678894, "logps/rejected": -1.6981723308563232, "loss": 0.908, "nll_loss": 0.8602396249771118, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08918578922748566, "rewards/margins": 0.08063144981861115, "rewards/rejected": -0.1698172390460968, "step": 8490 }, { "epoch": 1.54, "grad_norm": 1.0544793605804443, "learning_rate": 3.1084582908720336e-06, "log_odds_chosen": 1.074487566947937, "log_odds_ratio": -0.5217118859291077, "logits/chosen": -0.4763789772987366, "logits/rejected": -0.47022953629493713, "logps/chosen": -0.9271343350410461, "logps/rejected": -1.6965866088867188, "loss": 1.0064, "nll_loss": 0.9542236328125, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09271343052387238, "rewards/margins": 0.0769452303647995, "rewards/rejected": -0.16965866088867188, "step": 8500 }, { "epoch": 1.54, "grad_norm": 1.2964205741882324, "learning_rate": 3.1026350269325955e-06, "log_odds_chosen": 1.1331610679626465, "log_odds_ratio": -0.43135523796081543, "logits/chosen": -0.4021880626678467, "logits/rejected": -0.44057130813598633, "logps/chosen": -0.8385679125785828, "logps/rejected": -1.58628249168396, "loss": 0.9221, "nll_loss": 0.8789209127426147, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08385679870843887, "rewards/margins": 0.074771448969841, "rewards/rejected": -0.15862825512886047, "step": 8510 }, { "epoch": 1.54, "grad_norm": 2.1097097396850586, "learning_rate": 3.096811762993158e-06, "log_odds_chosen": 0.8757414817810059, "log_odds_ratio": -0.5435615181922913, "logits/chosen": -0.43857377767562866, "logits/rejected": -0.4581020772457123, "logps/chosen": -0.9276224970817566, "logps/rejected": -1.4827194213867188, "loss": 1.0161, "nll_loss": 0.9617037773132324, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09276226162910461, "rewards/margins": 0.055509693920612335, "rewards/rejected": -0.14827194809913635, "step": 8520 }, { "epoch": 1.54, "grad_norm": 1.5196740627288818, "learning_rate": 3.0909884990537193e-06, "log_odds_chosen": 0.7281922101974487, "log_odds_ratio": -0.5750101208686829, "logits/chosen": -0.4723603129386902, "logits/rejected": -0.47291263937950134, "logps/chosen": -0.9452505111694336, "logps/rejected": -1.4306466579437256, "loss": 0.9857, "nll_loss": 0.9282245635986328, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0945250540971756, "rewards/margins": 0.04853961616754532, "rewards/rejected": -0.1430646777153015, "step": 8530 }, { "epoch": 1.54, "grad_norm": 1.1302449703216553, "learning_rate": 3.0851652351142816e-06, "log_odds_chosen": 1.0263969898223877, "log_odds_ratio": -0.5024701356887817, "logits/chosen": -0.4505384564399719, "logits/rejected": -0.4901925027370453, "logps/chosen": -0.9041630029678345, "logps/rejected": -1.636066198348999, "loss": 0.9306, "nll_loss": 0.880358874797821, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09041629731655121, "rewards/margins": 0.07319033890962601, "rewards/rejected": -0.1636066436767578, "step": 8540 }, { "epoch": 1.54, "grad_norm": 1.616918921470642, "learning_rate": 3.0793419711748435e-06, "log_odds_chosen": 0.7561261057853699, "log_odds_ratio": -0.5439547896385193, "logits/chosen": -0.4394764006137848, "logits/rejected": -0.42637330293655396, "logps/chosen": -0.8382787704467773, "logps/rejected": -1.2982994318008423, "loss": 0.9224, "nll_loss": 0.8679793477058411, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08382787555456161, "rewards/margins": 0.04600207880139351, "rewards/rejected": -0.12982995808124542, "step": 8550 }, { "epoch": 1.55, "grad_norm": 1.5272725820541382, "learning_rate": 3.0735187072354054e-06, "log_odds_chosen": 0.8503424525260925, "log_odds_ratio": -0.6126225590705872, "logits/chosen": -0.45884138345718384, "logits/rejected": -0.4699079096317291, "logps/chosen": -1.0466539859771729, "logps/rejected": -1.6558605432510376, "loss": 1.0305, "nll_loss": 0.9692071676254272, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10466538369655609, "rewards/margins": 0.06092067435383797, "rewards/rejected": -0.16558606922626495, "step": 8560 }, { "epoch": 1.55, "grad_norm": 1.7004966735839844, "learning_rate": 3.0676954432959672e-06, "log_odds_chosen": 1.068210244178772, "log_odds_ratio": -0.535982608795166, "logits/chosen": -0.43431219458580017, "logits/rejected": -0.47066861391067505, "logps/chosen": -1.005386233329773, "logps/rejected": -1.7771772146224976, "loss": 0.9732, "nll_loss": 0.9196001887321472, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10053862631320953, "rewards/margins": 0.07717911154031754, "rewards/rejected": -0.17771773040294647, "step": 8570 }, { "epoch": 1.55, "grad_norm": 1.500475287437439, "learning_rate": 3.061872179356529e-06, "log_odds_chosen": 1.255358099937439, "log_odds_ratio": -0.4606574475765228, "logits/chosen": -0.38432154059410095, "logits/rejected": -0.4414575695991516, "logps/chosen": -0.8076623678207397, "logps/rejected": -1.679369330406189, "loss": 0.9022, "nll_loss": 0.8561829328536987, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08076624572277069, "rewards/margins": 0.08717069029808044, "rewards/rejected": -0.16793695092201233, "step": 8580 }, { "epoch": 1.55, "grad_norm": 1.182091236114502, "learning_rate": 3.056048915417091e-06, "log_odds_chosen": 0.8635237812995911, "log_odds_ratio": -0.5242233276367188, "logits/chosen": -0.4404246211051941, "logits/rejected": -0.4629266858100891, "logps/chosen": -0.8578693270683289, "logps/rejected": -1.4637280702590942, "loss": 0.9752, "nll_loss": 0.9227339625358582, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08578693866729736, "rewards/margins": 0.060585867613554, "rewards/rejected": -0.14637281000614166, "step": 8590 }, { "epoch": 1.55, "grad_norm": 1.498415231704712, "learning_rate": 3.0502256514776533e-06, "log_odds_chosen": 0.7312828898429871, "log_odds_ratio": -0.524425208568573, "logits/chosen": -0.4358999729156494, "logits/rejected": -0.4618275761604309, "logps/chosen": -0.9581681489944458, "logps/rejected": -1.448223352432251, "loss": 0.9313, "nll_loss": 0.8788281679153442, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09581682085990906, "rewards/margins": 0.04900550842285156, "rewards/rejected": -0.14482232928276062, "step": 8600 }, { "epoch": 1.56, "grad_norm": 2.19492769241333, "learning_rate": 3.044402387538215e-06, "log_odds_chosen": 0.8028427958488464, "log_odds_ratio": -0.5451768636703491, "logits/chosen": -0.4661959707736969, "logits/rejected": -0.43399032950401306, "logps/chosen": -0.9671751856803894, "logps/rejected": -1.5540294647216797, "loss": 0.9725, "nll_loss": 0.9180120229721069, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09671752154827118, "rewards/margins": 0.05868542194366455, "rewards/rejected": -0.15540295839309692, "step": 8610 }, { "epoch": 1.56, "grad_norm": 2.256150484085083, "learning_rate": 3.0385791235987767e-06, "log_odds_chosen": 0.8371201753616333, "log_odds_ratio": -0.5272762179374695, "logits/chosen": -0.460941880941391, "logits/rejected": -0.47110190987586975, "logps/chosen": -0.9126766920089722, "logps/rejected": -1.4907863140106201, "loss": 0.883, "nll_loss": 0.8302776217460632, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0912676751613617, "rewards/margins": 0.057810962200164795, "rewards/rejected": -0.1490786373615265, "step": 8620 }, { "epoch": 1.56, "grad_norm": 1.4068905115127563, "learning_rate": 3.032755859659339e-06, "log_odds_chosen": 0.9666420221328735, "log_odds_ratio": -0.5143001675605774, "logits/chosen": -0.4444963037967682, "logits/rejected": -0.4654978811740875, "logps/chosen": -0.861626148223877, "logps/rejected": -1.5031956434249878, "loss": 0.9535, "nll_loss": 0.9020698666572571, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08616261184215546, "rewards/margins": 0.06415696442127228, "rewards/rejected": -0.15031957626342773, "step": 8630 }, { "epoch": 1.56, "grad_norm": 1.0998035669326782, "learning_rate": 3.026932595719901e-06, "log_odds_chosen": 0.9140321612358093, "log_odds_ratio": -0.5074976682662964, "logits/chosen": -0.4687994420528412, "logits/rejected": -0.47800904512405396, "logps/chosen": -0.8865720629692078, "logps/rejected": -1.4897964000701904, "loss": 0.9641, "nll_loss": 0.9133057594299316, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08865721523761749, "rewards/margins": 0.06032242625951767, "rewards/rejected": -0.14897963404655457, "step": 8640 }, { "epoch": 1.56, "grad_norm": 1.6498323678970337, "learning_rate": 3.021109331780463e-06, "log_odds_chosen": 0.9924195408821106, "log_odds_ratio": -0.562451183795929, "logits/chosen": -0.4367518424987793, "logits/rejected": -0.40149760246276855, "logps/chosen": -0.8667885661125183, "logps/rejected": -1.558685064315796, "loss": 0.8615, "nll_loss": 0.8052981495857239, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.0866788700222969, "rewards/margins": 0.06918965280056, "rewards/rejected": -0.1558685153722763, "step": 8650 }, { "epoch": 1.56, "grad_norm": 1.2060490846633911, "learning_rate": 3.0152860678410246e-06, "log_odds_chosen": 0.7882088422775269, "log_odds_ratio": -0.562271773815155, "logits/chosen": -0.4282412528991699, "logits/rejected": -0.45196279883384705, "logps/chosen": -0.8789991140365601, "logps/rejected": -1.443538784980774, "loss": 0.9452, "nll_loss": 0.8889563679695129, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08789992332458496, "rewards/margins": 0.05645396560430527, "rewards/rejected": -0.14435386657714844, "step": 8660 }, { "epoch": 1.57, "grad_norm": 1.8238736391067505, "learning_rate": 3.0094628039015865e-06, "log_odds_chosen": 1.1830130815505981, "log_odds_ratio": -0.4648486077785492, "logits/chosen": -0.40792226791381836, "logits/rejected": -0.4477695822715759, "logps/chosen": -0.9096108675003052, "logps/rejected": -1.7371028661727905, "loss": 0.915, "nll_loss": 0.8684867024421692, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09096109867095947, "rewards/margins": 0.08274922519922256, "rewards/rejected": -0.17371031641960144, "step": 8670 }, { "epoch": 1.57, "grad_norm": 1.2026002407073975, "learning_rate": 3.003639539962149e-06, "log_odds_chosen": 1.1199390888214111, "log_odds_ratio": -0.5301384329795837, "logits/chosen": -0.4494144916534424, "logits/rejected": -0.4676884710788727, "logps/chosen": -1.0210682153701782, "logps/rejected": -1.905693769454956, "loss": 1.0464, "nll_loss": 0.993392288684845, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.10210682451725006, "rewards/margins": 0.08846259117126465, "rewards/rejected": -0.1905694305896759, "step": 8680 }, { "epoch": 1.57, "grad_norm": 1.8002581596374512, "learning_rate": 2.9978162760227107e-06, "log_odds_chosen": 0.8180960416793823, "log_odds_ratio": -0.5964370965957642, "logits/chosen": -0.4645138680934906, "logits/rejected": -0.415048748254776, "logps/chosen": -0.9569841623306274, "logps/rejected": -1.5479731559753418, "loss": 0.9345, "nll_loss": 0.8748123049736023, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09569840133190155, "rewards/margins": 0.05909890681505203, "rewards/rejected": -0.15479730069637299, "step": 8690 }, { "epoch": 1.57, "grad_norm": 1.1029233932495117, "learning_rate": 2.991993012083272e-06, "log_odds_chosen": 0.8041459321975708, "log_odds_ratio": -0.5897382497787476, "logits/chosen": -0.49731239676475525, "logits/rejected": -0.4990456998348236, "logps/chosen": -0.9147828817367554, "logps/rejected": -1.5065150260925293, "loss": 0.9862, "nll_loss": 0.9271799325942993, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09147828817367554, "rewards/margins": 0.059173207730054855, "rewards/rejected": -0.1506514996290207, "step": 8700 }, { "epoch": 1.57, "grad_norm": 1.7081480026245117, "learning_rate": 2.9861697481438345e-06, "log_odds_chosen": 0.8758634328842163, "log_odds_ratio": -0.5533775687217712, "logits/chosen": -0.45495352149009705, "logits/rejected": -0.46826472878456116, "logps/chosen": -0.9041604995727539, "logps/rejected": -1.5035583972930908, "loss": 0.9567, "nll_loss": 0.9013868570327759, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09041605144739151, "rewards/margins": 0.059939801692962646, "rewards/rejected": -0.15035584568977356, "step": 8710 }, { "epoch": 1.58, "grad_norm": 0.9252613186836243, "learning_rate": 2.9803464842043964e-06, "log_odds_chosen": 0.9316560626029968, "log_odds_ratio": -0.5237702131271362, "logits/chosen": -0.4001654088497162, "logits/rejected": -0.4136221408843994, "logps/chosen": -0.8662740588188171, "logps/rejected": -1.4723564386367798, "loss": 0.9343, "nll_loss": 0.8818756341934204, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08662740886211395, "rewards/margins": 0.060608237981796265, "rewards/rejected": -0.1472356617450714, "step": 8720 }, { "epoch": 1.58, "grad_norm": 1.780712366104126, "learning_rate": 2.9745232202649583e-06, "log_odds_chosen": 0.9977982640266418, "log_odds_ratio": -0.49909859895706177, "logits/chosen": -0.4587010443210602, "logits/rejected": -0.49831119179725647, "logps/chosen": -0.927727997303009, "logps/rejected": -1.643951654434204, "loss": 0.9722, "nll_loss": 0.9222747683525085, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09277280420064926, "rewards/margins": 0.07162235677242279, "rewards/rejected": -0.16439515352249146, "step": 8730 }, { "epoch": 1.58, "grad_norm": 1.4339555501937866, "learning_rate": 2.9686999563255206e-06, "log_odds_chosen": 0.9230550527572632, "log_odds_ratio": -0.5562025308609009, "logits/chosen": -0.4476137161254883, "logits/rejected": -0.47444948554039, "logps/chosen": -0.9881235957145691, "logps/rejected": -1.6829208135604858, "loss": 1.0277, "nll_loss": 0.9721002578735352, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09881236404180527, "rewards/margins": 0.06947972625494003, "rewards/rejected": -0.1682920902967453, "step": 8740 }, { "epoch": 1.58, "grad_norm": 2.0522637367248535, "learning_rate": 2.962876692386082e-06, "log_odds_chosen": 1.1478326320648193, "log_odds_ratio": -0.48287123441696167, "logits/chosen": -0.4314839243888855, "logits/rejected": -0.4502708911895752, "logps/chosen": -0.8756439089775085, "logps/rejected": -1.6750850677490234, "loss": 0.9321, "nll_loss": 0.8838540315628052, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0875643938779831, "rewards/margins": 0.07994408905506134, "rewards/rejected": -0.16750849783420563, "step": 8750 }, { "epoch": 1.58, "grad_norm": 0.9408858418464661, "learning_rate": 2.9570534284466444e-06, "log_odds_chosen": 0.7013543844223022, "log_odds_ratio": -0.5859674215316772, "logits/chosen": -0.4224696159362793, "logits/rejected": -0.4474635124206543, "logps/chosen": -0.9406406283378601, "logps/rejected": -1.4665216207504272, "loss": 0.9571, "nll_loss": 0.8984783887863159, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09406407177448273, "rewards/margins": 0.052588094025850296, "rewards/rejected": -0.14665216207504272, "step": 8760 }, { "epoch": 1.58, "grad_norm": 2.0329740047454834, "learning_rate": 2.9512301645072062e-06, "log_odds_chosen": 0.6817148923873901, "log_odds_ratio": -0.6015090942382812, "logits/chosen": -0.45950907468795776, "logits/rejected": -0.4683297276496887, "logps/chosen": -0.9494982957839966, "logps/rejected": -1.4097508192062378, "loss": 0.9656, "nll_loss": 0.9054635763168335, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09494982659816742, "rewards/margins": 0.04602526128292084, "rewards/rejected": -0.14097508788108826, "step": 8770 }, { "epoch": 1.59, "grad_norm": 1.3164424896240234, "learning_rate": 2.945406900567768e-06, "log_odds_chosen": 0.7522329092025757, "log_odds_ratio": -0.5482112765312195, "logits/chosen": -0.48109740018844604, "logits/rejected": -0.4810869097709656, "logps/chosen": -0.9835283160209656, "logps/rejected": -1.5171191692352295, "loss": 1.013, "nll_loss": 0.9581700563430786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0983528345823288, "rewards/margins": 0.05335908383131027, "rewards/rejected": -0.15171192586421967, "step": 8780 }, { "epoch": 1.59, "grad_norm": 1.5961617231369019, "learning_rate": 2.93958363662833e-06, "log_odds_chosen": 1.1709040403366089, "log_odds_ratio": -0.47756949067115784, "logits/chosen": -0.428290456533432, "logits/rejected": -0.44113603234291077, "logps/chosen": -0.9102188348770142, "logps/rejected": -1.7039642333984375, "loss": 0.9331, "nll_loss": 0.8853162527084351, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09102188050746918, "rewards/margins": 0.0793745368719101, "rewards/rejected": -0.17039641737937927, "step": 8790 }, { "epoch": 1.59, "grad_norm": 0.7731590270996094, "learning_rate": 2.933760372688892e-06, "log_odds_chosen": 0.7696908712387085, "log_odds_ratio": -0.5423186421394348, "logits/chosen": -0.44001665711402893, "logits/rejected": -0.4458009600639343, "logps/chosen": -0.8562023043632507, "logps/rejected": -1.3373987674713135, "loss": 0.9334, "nll_loss": 0.8791570663452148, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08562023192644119, "rewards/margins": 0.048119645565748215, "rewards/rejected": -0.1337398737668991, "step": 8800 }, { "epoch": 1.59, "grad_norm": 1.536658525466919, "learning_rate": 2.927937108749454e-06, "log_odds_chosen": 0.7098883986473083, "log_odds_ratio": -0.5896470546722412, "logits/chosen": -0.47359657287597656, "logits/rejected": -0.47002777457237244, "logps/chosen": -0.9735046625137329, "logps/rejected": -1.4823663234710693, "loss": 1.06, "nll_loss": 1.0009952783584595, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09735047817230225, "rewards/margins": 0.05088616535067558, "rewards/rejected": -0.14823663234710693, "step": 8810 }, { "epoch": 1.59, "grad_norm": 1.0456104278564453, "learning_rate": 2.922113844810016e-06, "log_odds_chosen": 0.876265823841095, "log_odds_ratio": -0.5287500619888306, "logits/chosen": -0.3807294964790344, "logits/rejected": -0.41154319047927856, "logps/chosen": -0.899895966053009, "logps/rejected": -1.4913125038146973, "loss": 0.9526, "nll_loss": 0.8997262120246887, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08998960256576538, "rewards/margins": 0.05914165824651718, "rewards/rejected": -0.14913125336170197, "step": 8820 }, { "epoch": 1.6, "grad_norm": 1.8245923519134521, "learning_rate": 2.916290580870578e-06, "log_odds_chosen": 0.9418030977249146, "log_odds_ratio": -0.54529869556427, "logits/chosen": -0.4727003574371338, "logits/rejected": -0.4494483470916748, "logps/chosen": -0.9340311288833618, "logps/rejected": -1.609712839126587, "loss": 0.9158, "nll_loss": 0.8612591028213501, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09340310096740723, "rewards/margins": 0.06756816804409027, "rewards/rejected": -0.1609712690114975, "step": 8830 }, { "epoch": 1.6, "grad_norm": 2.012312650680542, "learning_rate": 2.9104673169311394e-06, "log_odds_chosen": 0.8511514663696289, "log_odds_ratio": -0.5999525189399719, "logits/chosen": -0.4229847490787506, "logits/rejected": -0.43726539611816406, "logps/chosen": -0.9030078053474426, "logps/rejected": -1.5179579257965088, "loss": 0.9474, "nll_loss": 0.8874059915542603, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0903007760643959, "rewards/margins": 0.06149500608444214, "rewards/rejected": -0.15179578959941864, "step": 8840 }, { "epoch": 1.6, "grad_norm": 1.1240602731704712, "learning_rate": 2.9046440529917018e-06, "log_odds_chosen": 0.7420207858085632, "log_odds_ratio": -0.5621017813682556, "logits/chosen": -0.39741191267967224, "logits/rejected": -0.4037668704986572, "logps/chosen": -0.8269944190979004, "logps/rejected": -1.315613031387329, "loss": 0.9393, "nll_loss": 0.8831076622009277, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08269945532083511, "rewards/margins": 0.048861853778362274, "rewards/rejected": -0.1315612941980362, "step": 8850 }, { "epoch": 1.6, "grad_norm": 1.4114630222320557, "learning_rate": 2.8988207890522636e-06, "log_odds_chosen": 1.1417728662490845, "log_odds_ratio": -0.46272262930870056, "logits/chosen": -0.4243449568748474, "logits/rejected": -0.44843998551368713, "logps/chosen": -0.7630897760391235, "logps/rejected": -1.504429578781128, "loss": 0.842, "nll_loss": 0.7956916689872742, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07630898058414459, "rewards/margins": 0.0741339921951294, "rewards/rejected": -0.150442972779274, "step": 8860 }, { "epoch": 1.6, "grad_norm": 1.9253188371658325, "learning_rate": 2.892997525112826e-06, "log_odds_chosen": 0.8595142364501953, "log_odds_ratio": -0.5433337092399597, "logits/chosen": -0.4623066782951355, "logits/rejected": -0.44778022170066833, "logps/chosen": -0.8706346750259399, "logps/rejected": -1.444229245185852, "loss": 0.9428, "nll_loss": 0.8884536623954773, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.08706346899271011, "rewards/margins": 0.057359449565410614, "rewards/rejected": -0.14442291855812073, "step": 8870 }, { "epoch": 1.6, "grad_norm": 1.1447951793670654, "learning_rate": 2.8871742611733874e-06, "log_odds_chosen": 1.263009786605835, "log_odds_ratio": -0.4398309588432312, "logits/chosen": -0.43632984161376953, "logits/rejected": -0.46356701850891113, "logps/chosen": -0.846616268157959, "logps/rejected": -1.691075086593628, "loss": 0.9603, "nll_loss": 0.9162972569465637, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08466162532567978, "rewards/margins": 0.08444588631391525, "rewards/rejected": -0.16910752654075623, "step": 8880 }, { "epoch": 1.61, "grad_norm": 1.8909757137298584, "learning_rate": 2.8813509972339493e-06, "log_odds_chosen": 0.8786866068840027, "log_odds_ratio": -0.5085036158561707, "logits/chosen": -0.42095041275024414, "logits/rejected": -0.4742346405982971, "logps/chosen": -0.9058700799942017, "logps/rejected": -1.4910926818847656, "loss": 1.0104, "nll_loss": 0.9595681428909302, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09058699756860733, "rewards/margins": 0.05852225422859192, "rewards/rejected": -0.14910925924777985, "step": 8890 }, { "epoch": 1.61, "grad_norm": 2.0623555183410645, "learning_rate": 2.8755277332945116e-06, "log_odds_chosen": 0.9732965230941772, "log_odds_ratio": -0.5587271451950073, "logits/chosen": -0.42247653007507324, "logits/rejected": -0.41432175040245056, "logps/chosen": -0.8597976565361023, "logps/rejected": -1.5737887620925903, "loss": 0.9569, "nll_loss": 0.9010313153266907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08597976714372635, "rewards/margins": 0.07139910757541656, "rewards/rejected": -0.1573788821697235, "step": 8900 }, { "epoch": 1.61, "grad_norm": 1.143496036529541, "learning_rate": 2.8697044693550735e-06, "log_odds_chosen": 0.9191802740097046, "log_odds_ratio": -0.5371075868606567, "logits/chosen": -0.43688470125198364, "logits/rejected": -0.4470733106136322, "logps/chosen": -0.9116252660751343, "logps/rejected": -1.576756477355957, "loss": 0.9379, "nll_loss": 0.884225070476532, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09116252511739731, "rewards/margins": 0.06651312112808228, "rewards/rejected": -0.15767565369606018, "step": 8910 }, { "epoch": 1.61, "grad_norm": 1.6985821723937988, "learning_rate": 2.863881205415635e-06, "log_odds_chosen": 0.8602274656295776, "log_odds_ratio": -0.500104546546936, "logits/chosen": -0.45361995697021484, "logits/rejected": -0.4823782444000244, "logps/chosen": -0.8865026235580444, "logps/rejected": -1.4952237606048584, "loss": 0.9312, "nll_loss": 0.881218433380127, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08865025639533997, "rewards/margins": 0.060872115194797516, "rewards/rejected": -0.14952236413955688, "step": 8920 }, { "epoch": 1.61, "grad_norm": 1.4285775423049927, "learning_rate": 2.8580579414761973e-06, "log_odds_chosen": 0.898389458656311, "log_odds_ratio": -0.5091103315353394, "logits/chosen": -0.43957382440567017, "logits/rejected": -0.47268661856651306, "logps/chosen": -0.887039840221405, "logps/rejected": -1.5192763805389404, "loss": 1.055, "nll_loss": 1.0040767192840576, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08870398998260498, "rewards/margins": 0.06322365999221802, "rewards/rejected": -0.1519276648759842, "step": 8930 }, { "epoch": 1.61, "grad_norm": 2.452505111694336, "learning_rate": 2.852234677536759e-06, "log_odds_chosen": 0.8278576731681824, "log_odds_ratio": -0.5373989343643188, "logits/chosen": -0.44924673438072205, "logits/rejected": -0.450298547744751, "logps/chosen": -0.9012764096260071, "logps/rejected": -1.4678680896759033, "loss": 0.9021, "nll_loss": 0.848312258720398, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09012763947248459, "rewards/margins": 0.05665916949510574, "rewards/rejected": -0.14678680896759033, "step": 8940 }, { "epoch": 1.62, "grad_norm": 1.5261414051055908, "learning_rate": 2.846411413597321e-06, "log_odds_chosen": 0.8018957376480103, "log_odds_ratio": -0.5665901899337769, "logits/chosen": -0.43171626329421997, "logits/rejected": -0.4220046401023865, "logps/chosen": -0.862989068031311, "logps/rejected": -1.4592608213424683, "loss": 0.9395, "nll_loss": 0.8827921152114868, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08629890531301498, "rewards/margins": 0.05962717533111572, "rewards/rejected": -0.1459260880947113, "step": 8950 }, { "epoch": 1.62, "grad_norm": 1.0437229871749878, "learning_rate": 2.8405881496578834e-06, "log_odds_chosen": 1.2830219268798828, "log_odds_ratio": -0.48459410667419434, "logits/chosen": -0.37114351987838745, "logits/rejected": -0.40893077850341797, "logps/chosen": -0.848936915397644, "logps/rejected": -1.6950023174285889, "loss": 0.9134, "nll_loss": 0.8649131655693054, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08489370346069336, "rewards/margins": 0.08460653573274612, "rewards/rejected": -0.1695002317428589, "step": 8960 }, { "epoch": 1.62, "grad_norm": 1.8305917978286743, "learning_rate": 2.834764885718445e-06, "log_odds_chosen": 1.3773221969604492, "log_odds_ratio": -0.47211503982543945, "logits/chosen": -0.3935226500034332, "logits/rejected": -0.38803496956825256, "logps/chosen": -0.9036442041397095, "logps/rejected": -1.9123483896255493, "loss": 0.9417, "nll_loss": 0.8944932818412781, "rewards/accuracies": 0.75, "rewards/chosen": -0.09036441147327423, "rewards/margins": 0.10087043046951294, "rewards/rejected": -0.19123484194278717, "step": 8970 }, { "epoch": 1.62, "grad_norm": 1.0194758176803589, "learning_rate": 2.828941621779007e-06, "log_odds_chosen": 0.6662958860397339, "log_odds_ratio": -0.6157139539718628, "logits/chosen": -0.44147419929504395, "logits/rejected": -0.441021591424942, "logps/chosen": -0.9676446914672852, "logps/rejected": -1.4576784372329712, "loss": 1.0337, "nll_loss": 0.9721538424491882, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0967644602060318, "rewards/margins": 0.04900338873267174, "rewards/rejected": -0.14576785266399384, "step": 8980 }, { "epoch": 1.62, "grad_norm": 1.9268519878387451, "learning_rate": 2.823118357839569e-06, "log_odds_chosen": 1.1404383182525635, "log_odds_ratio": -0.48538607358932495, "logits/chosen": -0.4639511704444885, "logits/rejected": -0.4613065719604492, "logps/chosen": -0.9420214891433716, "logps/rejected": -1.7059471607208252, "loss": 0.9394, "nll_loss": 0.8908447027206421, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09420214593410492, "rewards/margins": 0.07639256864786148, "rewards/rejected": -0.1705947071313858, "step": 8990 }, { "epoch": 1.63, "grad_norm": 1.572337031364441, "learning_rate": 2.817295093900131e-06, "log_odds_chosen": 0.6553648710250854, "log_odds_ratio": -0.5741513967514038, "logits/chosen": -0.47056522965431213, "logits/rejected": -0.46010923385620117, "logps/chosen": -0.9969121217727661, "logps/rejected": -1.453446865081787, "loss": 1.0014, "nll_loss": 0.944003701210022, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09969121217727661, "rewards/margins": 0.045653462409973145, "rewards/rejected": -0.14534467458724976, "step": 9000 }, { "epoch": 1.63, "grad_norm": 1.9777848720550537, "learning_rate": 2.8114718299606928e-06, "log_odds_chosen": 0.803533673286438, "log_odds_ratio": -0.5443662405014038, "logits/chosen": -0.4233883321285248, "logits/rejected": -0.4425373673439026, "logps/chosen": -0.9458332061767578, "logps/rejected": -1.5298562049865723, "loss": 0.9113, "nll_loss": 0.8568891286849976, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09458333253860474, "rewards/margins": 0.05840228870511055, "rewards/rejected": -0.1529856026172638, "step": 9010 }, { "epoch": 1.63, "grad_norm": 1.8121646642684937, "learning_rate": 2.8056485660212547e-06, "log_odds_chosen": 1.1956027746200562, "log_odds_ratio": -0.5159161686897278, "logits/chosen": -0.38231539726257324, "logits/rejected": -0.3957478404045105, "logps/chosen": -0.7938886284828186, "logps/rejected": -1.6044238805770874, "loss": 0.8469, "nll_loss": 0.7953472137451172, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.07938887178897858, "rewards/margins": 0.08105353266000748, "rewards/rejected": -0.16044239699840546, "step": 9020 }, { "epoch": 1.63, "grad_norm": 1.4421942234039307, "learning_rate": 2.7998253020818166e-06, "log_odds_chosen": 0.8288308382034302, "log_odds_ratio": -0.5798860788345337, "logits/chosen": -0.47319093346595764, "logits/rejected": -0.47874951362609863, "logps/chosen": -0.9910385012626648, "logps/rejected": -1.6084444522857666, "loss": 1.0151, "nll_loss": 0.9570778012275696, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09910385310649872, "rewards/margins": 0.06174057722091675, "rewards/rejected": -0.16084444522857666, "step": 9030 }, { "epoch": 1.63, "grad_norm": 1.8703491687774658, "learning_rate": 2.794002038142379e-06, "log_odds_chosen": 0.8713129758834839, "log_odds_ratio": -0.5886852741241455, "logits/chosen": -0.4230705201625824, "logits/rejected": -0.4477715492248535, "logps/chosen": -0.9506195187568665, "logps/rejected": -1.5880171060562134, "loss": 0.9558, "nll_loss": 0.8969224095344543, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09506195783615112, "rewards/margins": 0.06373975425958633, "rewards/rejected": -0.15880170464515686, "step": 9040 }, { "epoch": 1.63, "grad_norm": 1.8566488027572632, "learning_rate": 2.7881787742029408e-06, "log_odds_chosen": 0.9162837862968445, "log_odds_ratio": -0.5790117979049683, "logits/chosen": -0.39925554394721985, "logits/rejected": -0.4360648989677429, "logps/chosen": -0.8828533887863159, "logps/rejected": -1.5269739627838135, "loss": 0.9833, "nll_loss": 0.9254306554794312, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08828534185886383, "rewards/margins": 0.06441205739974976, "rewards/rejected": -0.15269741415977478, "step": 9050 }, { "epoch": 1.64, "grad_norm": 2.002180576324463, "learning_rate": 2.7823555102635022e-06, "log_odds_chosen": 0.6226949095726013, "log_odds_ratio": -0.5851758122444153, "logits/chosen": -0.42530399560928345, "logits/rejected": -0.44639119505882263, "logps/chosen": -0.9246541261672974, "logps/rejected": -1.3595139980316162, "loss": 0.9609, "nll_loss": 0.9023898243904114, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09246542304754257, "rewards/margins": 0.043485984206199646, "rewards/rejected": -0.13595139980316162, "step": 9060 }, { "epoch": 1.64, "grad_norm": 1.6212151050567627, "learning_rate": 2.7765322463240645e-06, "log_odds_chosen": 1.1360653638839722, "log_odds_ratio": -0.5271188020706177, "logits/chosen": -0.36451902985572815, "logits/rejected": -0.40917712450027466, "logps/chosen": -0.8709207773208618, "logps/rejected": -1.6949536800384521, "loss": 0.944, "nll_loss": 0.8913170099258423, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0870920792222023, "rewards/margins": 0.08240329474210739, "rewards/rejected": -0.1694953739643097, "step": 9070 }, { "epoch": 1.64, "grad_norm": 1.0041602849960327, "learning_rate": 2.7707089823846264e-06, "log_odds_chosen": 1.0473252534866333, "log_odds_ratio": -0.527521014213562, "logits/chosen": -0.39088183641433716, "logits/rejected": -0.42638707160949707, "logps/chosen": -0.9256051778793335, "logps/rejected": -1.677172064781189, "loss": 0.9516, "nll_loss": 0.8988776206970215, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0925605297088623, "rewards/margins": 0.07515668123960495, "rewards/rejected": -0.16771721839904785, "step": 9080 }, { "epoch": 1.64, "grad_norm": 1.1408195495605469, "learning_rate": 2.7648857184451887e-06, "log_odds_chosen": 1.3326835632324219, "log_odds_ratio": -0.4328557848930359, "logits/chosen": -0.3287041485309601, "logits/rejected": -0.38711437582969666, "logps/chosen": -0.837693989276886, "logps/rejected": -1.7427314519882202, "loss": 0.8344, "nll_loss": 0.7911303639411926, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.08376939594745636, "rewards/margins": 0.0905037596821785, "rewards/rejected": -0.17427316308021545, "step": 9090 }, { "epoch": 1.64, "grad_norm": 1.2761762142181396, "learning_rate": 2.75906245450575e-06, "log_odds_chosen": 0.9336652755737305, "log_odds_ratio": -0.551077663898468, "logits/chosen": -0.4241195321083069, "logits/rejected": -0.42867010831832886, "logps/chosen": -0.8934443593025208, "logps/rejected": -1.5422897338867188, "loss": 0.9204, "nll_loss": 0.8653148412704468, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08934443444013596, "rewards/margins": 0.06488454341888428, "rewards/rejected": -0.15422898530960083, "step": 9100 }, { "epoch": 1.65, "grad_norm": 1.2373225688934326, "learning_rate": 2.753239190566312e-06, "log_odds_chosen": 0.9416915774345398, "log_odds_ratio": -0.520602822303772, "logits/chosen": -0.39296430349349976, "logits/rejected": -0.41580742597579956, "logps/chosen": -1.0003538131713867, "logps/rejected": -1.6513385772705078, "loss": 0.9442, "nll_loss": 0.8921074867248535, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10003536939620972, "rewards/margins": 0.06509849429130554, "rewards/rejected": -0.16513387858867645, "step": 9110 }, { "epoch": 1.65, "grad_norm": 1.3835368156433105, "learning_rate": 2.7474159266268744e-06, "log_odds_chosen": 0.9459671974182129, "log_odds_ratio": -0.48943382501602173, "logits/chosen": -0.4307325482368469, "logits/rejected": -0.44662827253341675, "logps/chosen": -0.9193538427352905, "logps/rejected": -1.5974020957946777, "loss": 0.9734, "nll_loss": 0.9244106411933899, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09193538129329681, "rewards/margins": 0.06780483573675156, "rewards/rejected": -0.15974020957946777, "step": 9120 }, { "epoch": 1.65, "grad_norm": 2.2756528854370117, "learning_rate": 2.7415926626874363e-06, "log_odds_chosen": 0.8301292657852173, "log_odds_ratio": -0.5760712027549744, "logits/chosen": -0.4282712936401367, "logits/rejected": -0.4218481183052063, "logps/chosen": -0.922931969165802, "logps/rejected": -1.4867548942565918, "loss": 0.9996, "nll_loss": 0.9419782757759094, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09229318052530289, "rewards/margins": 0.05638228729367256, "rewards/rejected": -0.14867548644542694, "step": 9130 }, { "epoch": 1.65, "grad_norm": 0.843051552772522, "learning_rate": 2.735769398747998e-06, "log_odds_chosen": 1.2046692371368408, "log_odds_ratio": -0.4969426095485687, "logits/chosen": -0.40997037291526794, "logits/rejected": -0.4544478952884674, "logps/chosen": -0.878393828868866, "logps/rejected": -1.7253586053848267, "loss": 0.8817, "nll_loss": 0.8320406675338745, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08783938735723495, "rewards/margins": 0.08469647914171219, "rewards/rejected": -0.17253586649894714, "step": 9140 }, { "epoch": 1.65, "grad_norm": 1.9792512655258179, "learning_rate": 2.72994613480856e-06, "log_odds_chosen": 1.3317620754241943, "log_odds_ratio": -0.4221356511116028, "logits/chosen": -0.41216516494750977, "logits/rejected": -0.4431169629096985, "logps/chosen": -0.8426889181137085, "logps/rejected": -1.7870067358016968, "loss": 0.8691, "nll_loss": 0.8268448114395142, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08426889777183533, "rewards/margins": 0.0944317951798439, "rewards/rejected": -0.17870070040225983, "step": 9150 }, { "epoch": 1.65, "grad_norm": 2.411402940750122, "learning_rate": 2.724122870869122e-06, "log_odds_chosen": 0.6889928579330444, "log_odds_ratio": -0.5732430219650269, "logits/chosen": -0.4233470559120178, "logits/rejected": -0.4282502233982086, "logps/chosen": -0.9491702318191528, "logps/rejected": -1.455941081047058, "loss": 0.9649, "nll_loss": 0.9075853228569031, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09491701424121857, "rewards/margins": 0.0506770983338356, "rewards/rejected": -0.14559412002563477, "step": 9160 }, { "epoch": 1.66, "grad_norm": 1.2510401010513306, "learning_rate": 2.7182996069296842e-06, "log_odds_chosen": 0.7998046875, "log_odds_ratio": -0.5143226981163025, "logits/chosen": -0.4312712252140045, "logits/rejected": -0.4719986915588379, "logps/chosen": -0.8872987627983093, "logps/rejected": -1.4246270656585693, "loss": 0.8871, "nll_loss": 0.835619330406189, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08872988075017929, "rewards/margins": 0.05373283475637436, "rewards/rejected": -0.14246270060539246, "step": 9170 }, { "epoch": 1.66, "grad_norm": 1.4718730449676514, "learning_rate": 2.712476342990246e-06, "log_odds_chosen": 0.8228703737258911, "log_odds_ratio": -0.5644127130508423, "logits/chosen": -0.43898096680641174, "logits/rejected": -0.4336237907409668, "logps/chosen": -0.8965182304382324, "logps/rejected": -1.4306434392929077, "loss": 0.9294, "nll_loss": 0.8729426264762878, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08965182304382324, "rewards/margins": 0.053412534296512604, "rewards/rejected": -0.14306434988975525, "step": 9180 }, { "epoch": 1.66, "grad_norm": 1.5248944759368896, "learning_rate": 2.7066530790508076e-06, "log_odds_chosen": 0.9961272478103638, "log_odds_ratio": -0.4423357844352722, "logits/chosen": -0.4377953112125397, "logits/rejected": -0.4404810070991516, "logps/chosen": -0.9476546049118042, "logps/rejected": -1.602682113647461, "loss": 1.0167, "nll_loss": 0.9725112915039062, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09476545453071594, "rewards/margins": 0.06550275534391403, "rewards/rejected": -0.16026821732521057, "step": 9190 }, { "epoch": 1.66, "grad_norm": 1.005291223526001, "learning_rate": 2.70082981511137e-06, "log_odds_chosen": 0.8400213122367859, "log_odds_ratio": -0.5274345278739929, "logits/chosen": -0.4171048104763031, "logits/rejected": -0.4462948739528656, "logps/chosen": -0.9608150720596313, "logps/rejected": -1.5505651235580444, "loss": 0.9606, "nll_loss": 0.9078795313835144, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09608151018619537, "rewards/margins": 0.05897500365972519, "rewards/rejected": -0.15505652129650116, "step": 9200 }, { "epoch": 1.66, "grad_norm": 3.342343330383301, "learning_rate": 2.6950065511719318e-06, "log_odds_chosen": 0.9597989916801453, "log_odds_ratio": -0.5116977691650391, "logits/chosen": -0.4211476445198059, "logits/rejected": -0.4668583869934082, "logps/chosen": -0.9648736119270325, "logps/rejected": -1.6064956188201904, "loss": 0.9446, "nll_loss": 0.8934422731399536, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09648735821247101, "rewards/margins": 0.06416221708059311, "rewards/rejected": -0.16064956784248352, "step": 9210 }, { "epoch": 1.67, "grad_norm": 1.7705780267715454, "learning_rate": 2.6891832872324937e-06, "log_odds_chosen": 0.861775279045105, "log_odds_ratio": -0.5697323679924011, "logits/chosen": -0.44634613394737244, "logits/rejected": -0.4355178475379944, "logps/chosen": -0.8917131423950195, "logps/rejected": -1.510679006576538, "loss": 0.9564, "nll_loss": 0.8994709253311157, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08917130529880524, "rewards/margins": 0.06189659237861633, "rewards/rejected": -0.15106788277626038, "step": 9220 }, { "epoch": 1.67, "grad_norm": 0.8565041422843933, "learning_rate": 2.683360023293056e-06, "log_odds_chosen": 0.9789537191390991, "log_odds_ratio": -0.5268505215644836, "logits/chosen": -0.4173552095890045, "logits/rejected": -0.4489436745643616, "logps/chosen": -0.9345428347587585, "logps/rejected": -1.6449741125106812, "loss": 0.9736, "nll_loss": 0.9208728671073914, "rewards/accuracies": 0.75, "rewards/chosen": -0.0934542864561081, "rewards/margins": 0.07104314863681793, "rewards/rejected": -0.16449742019176483, "step": 9230 }, { "epoch": 1.67, "grad_norm": 2.507345199584961, "learning_rate": 2.6775367593536174e-06, "log_odds_chosen": 1.2452746629714966, "log_odds_ratio": -0.493520587682724, "logits/chosen": -0.416081964969635, "logits/rejected": -0.4274824559688568, "logps/chosen": -0.9222303628921509, "logps/rejected": -1.828566551208496, "loss": 0.9633, "nll_loss": 0.9139355421066284, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09222304075956345, "rewards/margins": 0.09063363820314407, "rewards/rejected": -0.18285667896270752, "step": 9240 }, { "epoch": 1.67, "grad_norm": 1.6871551275253296, "learning_rate": 2.6717134954141793e-06, "log_odds_chosen": 1.0459063053131104, "log_odds_ratio": -0.513852596282959, "logits/chosen": -0.3885519504547119, "logits/rejected": -0.40226420760154724, "logps/chosen": -0.8817213773727417, "logps/rejected": -1.6192827224731445, "loss": 0.9459, "nll_loss": 0.8944852948188782, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08817213773727417, "rewards/margins": 0.07375612109899521, "rewards/rejected": -0.16192826628684998, "step": 9250 }, { "epoch": 1.67, "grad_norm": 1.1326770782470703, "learning_rate": 2.6658902314747416e-06, "log_odds_chosen": 0.49963292479515076, "log_odds_ratio": -0.6053067445755005, "logits/chosen": -0.459317147731781, "logits/rejected": -0.4486822187900543, "logps/chosen": -1.020218849182129, "logps/rejected": -1.3288309574127197, "loss": 1.0477, "nll_loss": 0.9871250987052917, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10202188789844513, "rewards/margins": 0.030861202627420425, "rewards/rejected": -0.13288308680057526, "step": 9260 }, { "epoch": 1.67, "grad_norm": 2.13757061958313, "learning_rate": 2.6600669675353035e-06, "log_odds_chosen": 1.017632246017456, "log_odds_ratio": -0.5229853391647339, "logits/chosen": -0.4418388307094574, "logits/rejected": -0.4649677276611328, "logps/chosen": -0.8779067993164062, "logps/rejected": -1.5976464748382568, "loss": 0.9128, "nll_loss": 0.860500693321228, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08779068291187286, "rewards/margins": 0.07197396457195282, "rewards/rejected": -0.15976464748382568, "step": 9270 }, { "epoch": 1.68, "grad_norm": 1.8838378190994263, "learning_rate": 2.6542437035958654e-06, "log_odds_chosen": 1.0137577056884766, "log_odds_ratio": -0.5111135244369507, "logits/chosen": -0.4243897497653961, "logits/rejected": -0.479489803314209, "logps/chosen": -0.9173609614372253, "logps/rejected": -1.6629959344863892, "loss": 0.9531, "nll_loss": 0.901984691619873, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09173610806465149, "rewards/margins": 0.07456348836421967, "rewards/rejected": -0.16629959642887115, "step": 9280 }, { "epoch": 1.68, "grad_norm": 0.8887907862663269, "learning_rate": 2.6484204396564273e-06, "log_odds_chosen": 1.2527879476547241, "log_odds_ratio": -0.4821585714817047, "logits/chosen": -0.3953564763069153, "logits/rejected": -0.41715526580810547, "logps/chosen": -0.8679831624031067, "logps/rejected": -1.749703049659729, "loss": 0.9136, "nll_loss": 0.8653751611709595, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08679831027984619, "rewards/margins": 0.08817199617624283, "rewards/rejected": -0.17497031390666962, "step": 9290 }, { "epoch": 1.68, "grad_norm": 3.8302605152130127, "learning_rate": 2.642597175716989e-06, "log_odds_chosen": 1.1876541376113892, "log_odds_ratio": -0.49830159544944763, "logits/chosen": -0.4293610155582428, "logits/rejected": -0.45646485686302185, "logps/chosen": -0.8713734745979309, "logps/rejected": -1.6922852993011475, "loss": 1.0033, "nll_loss": 0.9534996151924133, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08713734894990921, "rewards/margins": 0.08209117501974106, "rewards/rejected": -0.16922852396965027, "step": 9300 }, { "epoch": 1.68, "grad_norm": 2.138373613357544, "learning_rate": 2.6367739117775515e-06, "log_odds_chosen": 1.16695237159729, "log_odds_ratio": -0.46931418776512146, "logits/chosen": -0.39013200998306274, "logits/rejected": -0.41718345880508423, "logps/chosen": -0.8600034713745117, "logps/rejected": -1.6642802953720093, "loss": 0.8434, "nll_loss": 0.7965083718299866, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08600034564733505, "rewards/margins": 0.08042767643928528, "rewards/rejected": -0.16642801463603973, "step": 9310 }, { "epoch": 1.68, "grad_norm": 1.1626324653625488, "learning_rate": 2.630950647838113e-06, "log_odds_chosen": 0.8146978616714478, "log_odds_ratio": -0.5507025718688965, "logits/chosen": -0.4694862365722656, "logits/rejected": -0.4689878821372986, "logps/chosen": -0.948291003704071, "logps/rejected": -1.5086925029754639, "loss": 0.9501, "nll_loss": 0.8950142860412598, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09482909739017487, "rewards/margins": 0.056040145456790924, "rewards/rejected": -0.150869220495224, "step": 9320 }, { "epoch": 1.69, "grad_norm": 2.426990509033203, "learning_rate": 2.625127383898675e-06, "log_odds_chosen": 0.7397544384002686, "log_odds_ratio": -0.5745836496353149, "logits/chosen": -0.4914630949497223, "logits/rejected": -0.48593616485595703, "logps/chosen": -0.9268978238105774, "logps/rejected": -1.4810400009155273, "loss": 1.0306, "nll_loss": 0.9731773138046265, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09268978983163834, "rewards/margins": 0.05541421100497246, "rewards/rejected": -0.1481039822101593, "step": 9330 }, { "epoch": 1.69, "grad_norm": 1.5938129425048828, "learning_rate": 2.619304119959237e-06, "log_odds_chosen": 0.9346494674682617, "log_odds_ratio": -0.491645872592926, "logits/chosen": -0.4693544805049896, "logits/rejected": -0.46639999747276306, "logps/chosen": -0.9760904312133789, "logps/rejected": -1.650472640991211, "loss": 0.9957, "nll_loss": 0.9465456008911133, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09760904312133789, "rewards/margins": 0.06743822246789932, "rewards/rejected": -0.16504724323749542, "step": 9340 }, { "epoch": 1.69, "grad_norm": 1.5292596817016602, "learning_rate": 2.613480856019799e-06, "log_odds_chosen": 0.6154406666755676, "log_odds_ratio": -0.5639275312423706, "logits/chosen": -0.4870131015777588, "logits/rejected": -0.4909876883029938, "logps/chosen": -0.8956283330917358, "logps/rejected": -1.3006092309951782, "loss": 0.9209, "nll_loss": 0.8644782900810242, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08956284075975418, "rewards/margins": 0.04049808531999588, "rewards/rejected": -0.13006091117858887, "step": 9350 }, { "epoch": 1.69, "grad_norm": 0.821647047996521, "learning_rate": 2.607657592080361e-06, "log_odds_chosen": 1.121782660484314, "log_odds_ratio": -0.4775637090206146, "logits/chosen": -0.4000754952430725, "logits/rejected": -0.4561656415462494, "logps/chosen": -0.8684180974960327, "logps/rejected": -1.652954339981079, "loss": 0.947, "nll_loss": 0.8992889523506165, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08684180676937103, "rewards/margins": 0.07845362275838852, "rewards/rejected": -0.16529543697834015, "step": 9360 }, { "epoch": 1.69, "grad_norm": 1.416046380996704, "learning_rate": 2.601834328140923e-06, "log_odds_chosen": 1.0560827255249023, "log_odds_ratio": -0.48382264375686646, "logits/chosen": -0.485282838344574, "logits/rejected": -0.4757692217826843, "logps/chosen": -0.85932856798172, "logps/rejected": -1.613471269607544, "loss": 0.9261, "nll_loss": 0.8776809573173523, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08593286573886871, "rewards/margins": 0.0754142552614212, "rewards/rejected": -0.16134712100028992, "step": 9370 }, { "epoch": 1.69, "grad_norm": 1.1823846101760864, "learning_rate": 2.5960110642014847e-06, "log_odds_chosen": 1.0517069101333618, "log_odds_ratio": -0.49165159463882446, "logits/chosen": -0.4569578170776367, "logits/rejected": -0.41528910398483276, "logps/chosen": -0.9491796493530273, "logps/rejected": -1.6688644886016846, "loss": 0.934, "nll_loss": 0.8848093152046204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09491796791553497, "rewards/margins": 0.07196847349405289, "rewards/rejected": -0.16688643395900726, "step": 9380 }, { "epoch": 1.7, "grad_norm": 1.1237273216247559, "learning_rate": 2.590187800262047e-06, "log_odds_chosen": 1.1736353635787964, "log_odds_ratio": -0.47512874007225037, "logits/chosen": -0.41880425810813904, "logits/rejected": -0.39208707213401794, "logps/chosen": -0.8113826513290405, "logps/rejected": -1.6218392848968506, "loss": 0.9054, "nll_loss": 0.8578837513923645, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08113826811313629, "rewards/margins": 0.08104566484689713, "rewards/rejected": -0.162183940410614, "step": 9390 }, { "epoch": 1.7, "grad_norm": 1.7111291885375977, "learning_rate": 2.584364536322609e-06, "log_odds_chosen": 0.8534847497940063, "log_odds_ratio": -0.5059072375297546, "logits/chosen": -0.48690375685691833, "logits/rejected": -0.4755523204803467, "logps/chosen": -0.9035647511482239, "logps/rejected": -1.4580377340316772, "loss": 0.9428, "nll_loss": 0.8922485113143921, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09035647660493851, "rewards/margins": 0.05544731765985489, "rewards/rejected": -0.1458037793636322, "step": 9400 }, { "epoch": 1.7, "grad_norm": 1.1456893682479858, "learning_rate": 2.5785412723831704e-06, "log_odds_chosen": 1.171532392501831, "log_odds_ratio": -0.50825035572052, "logits/chosen": -0.370635986328125, "logits/rejected": -0.3998872935771942, "logps/chosen": -0.8561755418777466, "logps/rejected": -1.715456247329712, "loss": 0.9125, "nll_loss": 0.8616338968276978, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0856175571680069, "rewards/margins": 0.08592807501554489, "rewards/rejected": -0.1715456247329712, "step": 9410 }, { "epoch": 1.7, "grad_norm": 1.8371309041976929, "learning_rate": 2.5727180084437327e-06, "log_odds_chosen": 1.2050302028656006, "log_odds_ratio": -0.44126567244529724, "logits/chosen": -0.4281534254550934, "logits/rejected": -0.43468666076660156, "logps/chosen": -0.8962228894233704, "logps/rejected": -1.6920740604400635, "loss": 0.9079, "nll_loss": 0.8637346029281616, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08962228894233704, "rewards/margins": 0.07958510518074036, "rewards/rejected": -0.169207364320755, "step": 9420 }, { "epoch": 1.7, "grad_norm": 2.1131508350372314, "learning_rate": 2.5668947445042945e-06, "log_odds_chosen": 1.251560091972351, "log_odds_ratio": -0.48243609070777893, "logits/chosen": -0.43499651551246643, "logits/rejected": -0.4199391305446625, "logps/chosen": -0.9016925692558289, "logps/rejected": -1.7924721240997314, "loss": 0.9366, "nll_loss": 0.8883956074714661, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.090169258415699, "rewards/margins": 0.08907793462276459, "rewards/rejected": -0.1792472004890442, "step": 9430 }, { "epoch": 1.71, "grad_norm": 2.5323009490966797, "learning_rate": 2.5610714805648564e-06, "log_odds_chosen": 0.9132372736930847, "log_odds_ratio": -0.5154244899749756, "logits/chosen": -0.4603002667427063, "logits/rejected": -0.4696424901485443, "logps/chosen": -0.9054134488105774, "logps/rejected": -1.5461491346359253, "loss": 0.9467, "nll_loss": 0.8951939344406128, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09054134041070938, "rewards/margins": 0.0640735775232315, "rewards/rejected": -0.1546149104833603, "step": 9440 }, { "epoch": 1.71, "grad_norm": 1.876884937286377, "learning_rate": 2.5552482166254187e-06, "log_odds_chosen": 0.6280218958854675, "log_odds_ratio": -0.5808584690093994, "logits/chosen": -0.41892462968826294, "logits/rejected": -0.45282214879989624, "logps/chosen": -0.9991976618766785, "logps/rejected": -1.454352617263794, "loss": 0.952, "nll_loss": 0.8938835263252258, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09991975873708725, "rewards/margins": 0.045515503734350204, "rewards/rejected": -0.14543527364730835, "step": 9450 }, { "epoch": 1.71, "grad_norm": 1.4596505165100098, "learning_rate": 2.54942495268598e-06, "log_odds_chosen": 0.981662392616272, "log_odds_ratio": -0.5320886373519897, "logits/chosen": -0.43657493591308594, "logits/rejected": -0.4576547145843506, "logps/chosen": -0.8840090036392212, "logps/rejected": -1.5034620761871338, "loss": 1.0212, "nll_loss": 0.9680083394050598, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08840090036392212, "rewards/margins": 0.06194530799984932, "rewards/rejected": -0.15034620463848114, "step": 9460 }, { "epoch": 1.71, "grad_norm": 0.9314261674880981, "learning_rate": 2.543601688746542e-06, "log_odds_chosen": 1.2412148714065552, "log_odds_ratio": -0.44239601492881775, "logits/chosen": -0.4623107314109802, "logits/rejected": -0.48735103011131287, "logps/chosen": -0.9057219624519348, "logps/rejected": -1.7566516399383545, "loss": 0.9482, "nll_loss": 0.9039756059646606, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.09057219326496124, "rewards/margins": 0.08509297668933868, "rewards/rejected": -0.17566516995429993, "step": 9470 }, { "epoch": 1.71, "grad_norm": 1.9713478088378906, "learning_rate": 2.5377784248071044e-06, "log_odds_chosen": 1.1023633480072021, "log_odds_ratio": -0.47931233048439026, "logits/chosen": -0.4793943762779236, "logits/rejected": -0.48942771553993225, "logps/chosen": -0.8621703386306763, "logps/rejected": -1.6248624324798584, "loss": 0.9769, "nll_loss": 0.9289461970329285, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08621703088283539, "rewards/margins": 0.07626921683549881, "rewards/rejected": -0.1624862402677536, "step": 9480 }, { "epoch": 1.71, "grad_norm": 1.8307769298553467, "learning_rate": 2.5319551608676663e-06, "log_odds_chosen": 0.8780630826950073, "log_odds_ratio": -0.5695803165435791, "logits/chosen": -0.4494501054286957, "logits/rejected": -0.41476479172706604, "logps/chosen": -0.9255537986755371, "logps/rejected": -1.55754816532135, "loss": 1.0147, "nll_loss": 0.9577773809432983, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09255538135766983, "rewards/margins": 0.06319941580295563, "rewards/rejected": -0.15575480461120605, "step": 9490 }, { "epoch": 1.72, "grad_norm": 0.9392945766448975, "learning_rate": 2.526131896928228e-06, "log_odds_chosen": 1.1123392581939697, "log_odds_ratio": -0.5118842720985413, "logits/chosen": -0.44842857122421265, "logits/rejected": -0.45434433221817017, "logps/chosen": -0.8633874654769897, "logps/rejected": -1.617057204246521, "loss": 0.8661, "nll_loss": 0.814924418926239, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08633874356746674, "rewards/margins": 0.07536698877811432, "rewards/rejected": -0.16170573234558105, "step": 9500 }, { "epoch": 1.72, "grad_norm": 1.8088247776031494, "learning_rate": 2.52030863298879e-06, "log_odds_chosen": 1.3172489404678345, "log_odds_ratio": -0.4467700123786926, "logits/chosen": -0.43645501136779785, "logits/rejected": -0.44458237290382385, "logps/chosen": -0.7922109961509705, "logps/rejected": -1.7160981893539429, "loss": 0.9327, "nll_loss": 0.8880230784416199, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07922110706567764, "rewards/margins": 0.09238873422145844, "rewards/rejected": -0.17160983383655548, "step": 9510 }, { "epoch": 1.72, "grad_norm": 2.9287898540496826, "learning_rate": 2.514485369049352e-06, "log_odds_chosen": 0.775164783000946, "log_odds_ratio": -0.5303457975387573, "logits/chosen": -0.4484976828098297, "logits/rejected": -0.45544663071632385, "logps/chosen": -0.9658139944076538, "logps/rejected": -1.4660108089447021, "loss": 0.9324, "nll_loss": 0.8793715238571167, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09658139944076538, "rewards/margins": 0.05001969262957573, "rewards/rejected": -0.14660108089447021, "step": 9520 }, { "epoch": 1.72, "grad_norm": 1.672947645187378, "learning_rate": 2.5086621051099143e-06, "log_odds_chosen": 1.0569748878479004, "log_odds_ratio": -0.4957321584224701, "logits/chosen": -0.42946892976760864, "logits/rejected": -0.41913193464279175, "logps/chosen": -0.9915930032730103, "logps/rejected": -1.7599254846572876, "loss": 0.9782, "nll_loss": 0.9286025166511536, "rewards/accuracies": 0.75, "rewards/chosen": -0.09915930032730103, "rewards/margins": 0.07683324813842773, "rewards/rejected": -0.17599254846572876, "step": 9530 }, { "epoch": 1.72, "grad_norm": 1.7308794260025024, "learning_rate": 2.502838841170476e-06, "log_odds_chosen": 1.1496503353118896, "log_odds_ratio": -0.5147876739501953, "logits/chosen": -0.4033949375152588, "logits/rejected": -0.40659064054489136, "logps/chosen": -0.9279058575630188, "logps/rejected": -1.677899718284607, "loss": 0.9095, "nll_loss": 0.8580510020256042, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09279057383537292, "rewards/margins": 0.07499939203262329, "rewards/rejected": -0.16778996586799622, "step": 9540 }, { "epoch": 1.73, "grad_norm": 1.5058820247650146, "learning_rate": 2.4970155772310376e-06, "log_odds_chosen": 0.6924060583114624, "log_odds_ratio": -0.5773219466209412, "logits/chosen": -0.42662039399147034, "logits/rejected": -0.4317251145839691, "logps/chosen": -0.9882136583328247, "logps/rejected": -1.4816371202468872, "loss": 0.9622, "nll_loss": 0.9044473767280579, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09882136434316635, "rewards/margins": 0.04934234172105789, "rewards/rejected": -0.14816370606422424, "step": 9550 }, { "epoch": 1.73, "grad_norm": 1.561524510383606, "learning_rate": 2.4911923132916e-06, "log_odds_chosen": 0.9440113306045532, "log_odds_ratio": -0.5216168165206909, "logits/chosen": -0.4172740578651428, "logits/rejected": -0.4479880928993225, "logps/chosen": -0.9342236518859863, "logps/rejected": -1.585782766342163, "loss": 0.9582, "nll_loss": 0.906027615070343, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09342236816883087, "rewards/margins": 0.06515590846538544, "rewards/rejected": -0.1585782915353775, "step": 9560 }, { "epoch": 1.73, "grad_norm": 1.6601184606552124, "learning_rate": 2.485369049352162e-06, "log_odds_chosen": 0.9561376571655273, "log_odds_ratio": -0.4933817386627197, "logits/chosen": -0.48122167587280273, "logits/rejected": -0.5029059648513794, "logps/chosen": -1.0312833786010742, "logps/rejected": -1.712125539779663, "loss": 1.0299, "nll_loss": 0.9805164337158203, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1031283363699913, "rewards/margins": 0.068084217607975, "rewards/rejected": -0.1712125688791275, "step": 9570 }, { "epoch": 1.73, "grad_norm": 1.6917434930801392, "learning_rate": 2.4795457854127237e-06, "log_odds_chosen": 0.5046578645706177, "log_odds_ratio": -0.6268638372421265, "logits/chosen": -0.4347049593925476, "logits/rejected": -0.4096949100494385, "logps/chosen": -0.9971641302108765, "logps/rejected": -1.3836841583251953, "loss": 0.9449, "nll_loss": 0.882180392742157, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09971641004085541, "rewards/margins": 0.038651980459690094, "rewards/rejected": -0.1383683979511261, "step": 9580 }, { "epoch": 1.73, "grad_norm": 2.294053792953491, "learning_rate": 2.4737225214732856e-06, "log_odds_chosen": 0.924883246421814, "log_odds_ratio": -0.5290486216545105, "logits/chosen": -0.4396088719367981, "logits/rejected": -0.4460625648498535, "logps/chosen": -0.9009316563606262, "logps/rejected": -1.5800864696502686, "loss": 0.9309, "nll_loss": 0.8779793977737427, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09009316563606262, "rewards/margins": 0.06791548430919647, "rewards/rejected": -0.1580086499452591, "step": 9590 }, { "epoch": 1.73, "grad_norm": 1.6921731233596802, "learning_rate": 2.4678992575338475e-06, "log_odds_chosen": 1.2375385761260986, "log_odds_ratio": -0.4622219204902649, "logits/chosen": -0.41445812582969666, "logits/rejected": -0.43146657943725586, "logps/chosen": -0.8876203298568726, "logps/rejected": -1.799599289894104, "loss": 0.9212, "nll_loss": 0.8750225901603699, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08876203000545502, "rewards/margins": 0.0911978930234909, "rewards/rejected": -0.17995992302894592, "step": 9600 }, { "epoch": 1.74, "grad_norm": 0.9766725897789001, "learning_rate": 2.4620759935944098e-06, "log_odds_chosen": 0.7962125539779663, "log_odds_ratio": -0.5203061103820801, "logits/chosen": -0.4127267897129059, "logits/rejected": -0.41843312978744507, "logps/chosen": -0.917259693145752, "logps/rejected": -1.4751300811767578, "loss": 0.9822, "nll_loss": 0.9301362037658691, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09172599017620087, "rewards/margins": 0.05578702688217163, "rewards/rejected": -0.1475130021572113, "step": 9610 }, { "epoch": 1.74, "grad_norm": 1.1548055410385132, "learning_rate": 2.4562527296549717e-06, "log_odds_chosen": 1.2751153707504272, "log_odds_ratio": -0.4778338372707367, "logits/chosen": -0.4446820616722107, "logits/rejected": -0.4349389970302582, "logps/chosen": -0.8049659729003906, "logps/rejected": -1.6605899333953857, "loss": 0.9126, "nll_loss": 0.8648591041564941, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08049659430980682, "rewards/margins": 0.08556241542100906, "rewards/rejected": -0.1660590022802353, "step": 9620 }, { "epoch": 1.74, "grad_norm": 2.5714190006256104, "learning_rate": 2.450429465715533e-06, "log_odds_chosen": 1.2102885246276855, "log_odds_ratio": -0.4847165644168854, "logits/chosen": -0.37883883714675903, "logits/rejected": -0.40896421670913696, "logps/chosen": -0.7805262804031372, "logps/rejected": -1.6285765171051025, "loss": 0.8425, "nll_loss": 0.7940112352371216, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07805263251066208, "rewards/margins": 0.08480501919984818, "rewards/rejected": -0.16285763680934906, "step": 9630 }, { "epoch": 1.74, "grad_norm": 1.6911524534225464, "learning_rate": 2.4446062017760954e-06, "log_odds_chosen": 0.9459424018859863, "log_odds_ratio": -0.5352808833122253, "logits/chosen": -0.46002206206321716, "logits/rejected": -0.5104098320007324, "logps/chosen": -0.8379015922546387, "logps/rejected": -1.4783127307891846, "loss": 1.0092, "nll_loss": 0.9556834101676941, "rewards/accuracies": 0.75, "rewards/chosen": -0.08379016816616058, "rewards/margins": 0.0640411227941513, "rewards/rejected": -0.1478312909603119, "step": 9640 }, { "epoch": 1.74, "grad_norm": 1.3632971048355103, "learning_rate": 2.4387829378366573e-06, "log_odds_chosen": 0.7504720687866211, "log_odds_ratio": -0.5602525472640991, "logits/chosen": -0.4938369691371918, "logits/rejected": -0.4717886447906494, "logps/chosen": -0.8749774098396301, "logps/rejected": -1.4180119037628174, "loss": 0.96, "nll_loss": 0.9039288759231567, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08749774843454361, "rewards/margins": 0.05430345609784126, "rewards/rejected": -0.14180120825767517, "step": 9650 }, { "epoch": 1.74, "grad_norm": 1.4129406213760376, "learning_rate": 2.432959673897219e-06, "log_odds_chosen": 0.9551762342453003, "log_odds_ratio": -0.5054140686988831, "logits/chosen": -0.40426698327064514, "logits/rejected": -0.4051760137081146, "logps/chosen": -0.8844103813171387, "logps/rejected": -1.5333651304244995, "loss": 0.9301, "nll_loss": 0.8796060681343079, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08844103664159775, "rewards/margins": 0.06489548832178116, "rewards/rejected": -0.1533365249633789, "step": 9660 }, { "epoch": 1.75, "grad_norm": 1.6229116916656494, "learning_rate": 2.4271364099577815e-06, "log_odds_chosen": 1.0696156024932861, "log_odds_ratio": -0.4787077307701111, "logits/chosen": -0.44113072752952576, "logits/rejected": -0.44910669326782227, "logps/chosen": -0.8486385345458984, "logps/rejected": -1.6105642318725586, "loss": 0.9211, "nll_loss": 0.873221755027771, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08486385643482208, "rewards/margins": 0.07619258016347885, "rewards/rejected": -0.16105642914772034, "step": 9670 }, { "epoch": 1.75, "grad_norm": 2.514098644256592, "learning_rate": 2.421313146018343e-06, "log_odds_chosen": 0.8378480076789856, "log_odds_ratio": -0.48678427934646606, "logits/chosen": -0.46361297369003296, "logits/rejected": -0.45725908875465393, "logps/chosen": -0.8935993313789368, "logps/rejected": -1.4331048727035522, "loss": 0.9382, "nll_loss": 0.8894980549812317, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08935993909835815, "rewards/margins": 0.053950559347867966, "rewards/rejected": -0.14331048727035522, "step": 9680 }, { "epoch": 1.75, "grad_norm": 0.9816827774047852, "learning_rate": 2.415489882078905e-06, "log_odds_chosen": 1.1938976049423218, "log_odds_ratio": -0.48203128576278687, "logits/chosen": -0.4673733711242676, "logits/rejected": -0.47799110412597656, "logps/chosen": -0.8920567631721497, "logps/rejected": -1.721644401550293, "loss": 0.9587, "nll_loss": 0.9105375409126282, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08920568227767944, "rewards/margins": 0.08295877277851105, "rewards/rejected": -0.1721644401550293, "step": 9690 }, { "epoch": 1.75, "grad_norm": 1.19546377658844, "learning_rate": 2.409666618139467e-06, "log_odds_chosen": 1.1333242654800415, "log_odds_ratio": -0.5056787729263306, "logits/chosen": -0.4303444027900696, "logits/rejected": -0.41326698660850525, "logps/chosen": -0.8802807927131653, "logps/rejected": -1.6464083194732666, "loss": 0.8832, "nll_loss": 0.8326579332351685, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08802806586027145, "rewards/margins": 0.07661274820566177, "rewards/rejected": -0.16464082896709442, "step": 9700 }, { "epoch": 1.75, "grad_norm": 0.8413112759590149, "learning_rate": 2.403843354200029e-06, "log_odds_chosen": 0.7889858484268188, "log_odds_ratio": -0.565024733543396, "logits/chosen": -0.41838541626930237, "logits/rejected": -0.4011848568916321, "logps/chosen": -0.8995911478996277, "logps/rejected": -1.4334033727645874, "loss": 0.9728, "nll_loss": 0.9163390398025513, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08995912224054337, "rewards/margins": 0.05338122323155403, "rewards/rejected": -0.1433403491973877, "step": 9710 }, { "epoch": 1.76, "grad_norm": 1.572912573814392, "learning_rate": 2.3980200902605914e-06, "log_odds_chosen": 1.1054677963256836, "log_odds_ratio": -0.5001887083053589, "logits/chosen": -0.4604727625846863, "logits/rejected": -0.49845361709594727, "logps/chosen": -0.8892809748649597, "logps/rejected": -1.6379826068878174, "loss": 1.0193, "nll_loss": 0.9693231582641602, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08892810344696045, "rewards/margins": 0.07487015426158905, "rewards/rejected": -0.1637982577085495, "step": 9720 }, { "epoch": 1.76, "grad_norm": 0.9494357705116272, "learning_rate": 2.392196826321153e-06, "log_odds_chosen": 1.0644892454147339, "log_odds_ratio": -0.5272237658500671, "logits/chosen": -0.4514384865760803, "logits/rejected": -0.45779746770858765, "logps/chosen": -0.8867007493972778, "logps/rejected": -1.6791248321533203, "loss": 1.0191, "nll_loss": 0.9663643836975098, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08867006003856659, "rewards/margins": 0.07924243062734604, "rewards/rejected": -0.16791249811649323, "step": 9730 }, { "epoch": 1.76, "grad_norm": 1.5654778480529785, "learning_rate": 2.3863735623817147e-06, "log_odds_chosen": 0.6653653979301453, "log_odds_ratio": -0.578784167766571, "logits/chosen": -0.4321361482143402, "logits/rejected": -0.42093291878700256, "logps/chosen": -0.9089535474777222, "logps/rejected": -1.3596817255020142, "loss": 0.9331, "nll_loss": 0.8752476572990417, "rewards/accuracies": 0.625, "rewards/chosen": -0.09089535474777222, "rewards/margins": 0.04507281631231308, "rewards/rejected": -0.1359681636095047, "step": 9740 }, { "epoch": 1.76, "grad_norm": 1.4848594665527344, "learning_rate": 2.380550298442277e-06, "log_odds_chosen": 0.9823587536811829, "log_odds_ratio": -0.5315570831298828, "logits/chosen": -0.4404227137565613, "logits/rejected": -0.4610070288181305, "logps/chosen": -0.9339573979377747, "logps/rejected": -1.6493076086044312, "loss": 1.0242, "nll_loss": 0.971057116985321, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09339574724435806, "rewards/margins": 0.07153502851724625, "rewards/rejected": -0.16493076086044312, "step": 9750 }, { "epoch": 1.76, "grad_norm": 1.2012207508087158, "learning_rate": 2.374727034502839e-06, "log_odds_chosen": 1.0185325145721436, "log_odds_ratio": -0.5244266390800476, "logits/chosen": -0.45023947954177856, "logits/rejected": -0.488433301448822, "logps/chosen": -0.9145146608352661, "logps/rejected": -1.5999908447265625, "loss": 1.0072, "nll_loss": 0.9547730684280396, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09145145863294601, "rewards/margins": 0.06854760646820068, "rewards/rejected": -0.1599990725517273, "step": 9760 }, { "epoch": 1.76, "grad_norm": 1.891196370124817, "learning_rate": 2.3689037705634004e-06, "log_odds_chosen": 0.9177335500717163, "log_odds_ratio": -0.45958462357521057, "logits/chosen": -0.4622717499732971, "logits/rejected": -0.46385353803634644, "logps/chosen": -0.9317830801010132, "logps/rejected": -1.5283830165863037, "loss": 1.0005, "nll_loss": 0.9544920921325684, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09317831695079803, "rewards/margins": 0.059659987688064575, "rewards/rejected": -0.1528383046388626, "step": 9770 }, { "epoch": 1.77, "grad_norm": 2.1724750995635986, "learning_rate": 2.3630805066239627e-06, "log_odds_chosen": 1.292080044746399, "log_odds_ratio": -0.4301665425300598, "logits/chosen": -0.3887042701244354, "logits/rejected": -0.3993460536003113, "logps/chosen": -0.8036476373672485, "logps/rejected": -1.6760154962539673, "loss": 0.8367, "nll_loss": 0.7936657667160034, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08036476373672485, "rewards/margins": 0.08723679184913635, "rewards/rejected": -0.1676015555858612, "step": 9780 }, { "epoch": 1.77, "grad_norm": 2.153475761413574, "learning_rate": 2.3572572426845246e-06, "log_odds_chosen": 0.9047862887382507, "log_odds_ratio": -0.5216431617736816, "logits/chosen": -0.4829631745815277, "logits/rejected": -0.4913422167301178, "logps/chosen": -0.9199289083480835, "logps/rejected": -1.498349905014038, "loss": 1.0016, "nll_loss": 0.9494854211807251, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09199289977550507, "rewards/margins": 0.057842087000608444, "rewards/rejected": -0.14983497560024261, "step": 9790 }, { "epoch": 1.77, "grad_norm": 2.112905263900757, "learning_rate": 2.3514339787450865e-06, "log_odds_chosen": 1.2953317165374756, "log_odds_ratio": -0.43290096521377563, "logits/chosen": -0.44959911704063416, "logits/rejected": -0.42779120802879333, "logps/chosen": -0.8838974237442017, "logps/rejected": -1.8059628009796143, "loss": 0.9752, "nll_loss": 0.9318834543228149, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08838973939418793, "rewards/margins": 0.09220656007528305, "rewards/rejected": -0.18059630692005157, "step": 9800 }, { "epoch": 1.77, "grad_norm": 1.9552569389343262, "learning_rate": 2.3456107148056483e-06, "log_odds_chosen": 1.206032156944275, "log_odds_ratio": -0.4666607975959778, "logits/chosen": -0.37482333183288574, "logits/rejected": -0.40247243642807007, "logps/chosen": -0.8406115770339966, "logps/rejected": -1.7043174505233765, "loss": 0.9233, "nll_loss": 0.8766835927963257, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0840611681342125, "rewards/margins": 0.08637058734893799, "rewards/rejected": -0.17043174803256989, "step": 9810 }, { "epoch": 1.77, "grad_norm": 1.8231532573699951, "learning_rate": 2.3397874508662102e-06, "log_odds_chosen": 0.8592397570610046, "log_odds_ratio": -0.5452404022216797, "logits/chosen": -0.43346747756004333, "logits/rejected": -0.44631171226501465, "logps/chosen": -0.9697023630142212, "logps/rejected": -1.4977270364761353, "loss": 0.9444, "nll_loss": 0.8898833990097046, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09697023779153824, "rewards/margins": 0.05280245468020439, "rewards/rejected": -0.14977270364761353, "step": 9820 }, { "epoch": 1.78, "grad_norm": 1.39771568775177, "learning_rate": 2.3339641869267725e-06, "log_odds_chosen": 0.6571693420410156, "log_odds_ratio": -0.5906838774681091, "logits/chosen": -0.44236254692077637, "logits/rejected": -0.44203391671180725, "logps/chosen": -0.9650084376335144, "logps/rejected": -1.4409892559051514, "loss": 1.0067, "nll_loss": 0.9475903511047363, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09650083631277084, "rewards/margins": 0.047598086297512054, "rewards/rejected": -0.1440989375114441, "step": 9830 }, { "epoch": 1.78, "grad_norm": 2.2172060012817383, "learning_rate": 2.3281409229873344e-06, "log_odds_chosen": 0.7106519937515259, "log_odds_ratio": -0.539935827255249, "logits/chosen": -0.4445548951625824, "logits/rejected": -0.43924275040626526, "logps/chosen": -0.906205952167511, "logps/rejected": -1.373451590538025, "loss": 0.9577, "nll_loss": 0.9036978483200073, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09062059223651886, "rewards/margins": 0.046724557876586914, "rewards/rejected": -0.13734516501426697, "step": 9840 }, { "epoch": 1.78, "grad_norm": 0.8983945250511169, "learning_rate": 2.3223176590478963e-06, "log_odds_chosen": 1.0619983673095703, "log_odds_ratio": -0.49372729659080505, "logits/chosen": -0.4473651349544525, "logits/rejected": -0.4316297173500061, "logps/chosen": -0.9270407557487488, "logps/rejected": -1.694265604019165, "loss": 0.9834, "nll_loss": 0.9340094327926636, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09270408004522324, "rewards/margins": 0.07672245800495148, "rewards/rejected": -0.16942653059959412, "step": 9850 }, { "epoch": 1.78, "grad_norm": 1.786767840385437, "learning_rate": 2.316494395108458e-06, "log_odds_chosen": 1.2031980752944946, "log_odds_ratio": -0.4756312370300293, "logits/chosen": -0.45098644495010376, "logits/rejected": -0.4427367150783539, "logps/chosen": -0.863764762878418, "logps/rejected": -1.763685941696167, "loss": 0.8749, "nll_loss": 0.8273009061813354, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08637648075819016, "rewards/margins": 0.08999210596084595, "rewards/rejected": -0.1763685941696167, "step": 9860 }, { "epoch": 1.78, "grad_norm": 1.3234548568725586, "learning_rate": 2.31067113116902e-06, "log_odds_chosen": 1.1590311527252197, "log_odds_ratio": -0.5146543383598328, "logits/chosen": -0.4411509931087494, "logits/rejected": -0.41618838906288147, "logps/chosen": -0.9716132283210754, "logps/rejected": -1.8201326131820679, "loss": 0.9392, "nll_loss": 0.8877296447753906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09716132283210754, "rewards/margins": 0.084851935505867, "rewards/rejected": -0.18201325833797455, "step": 9870 }, { "epoch": 1.78, "grad_norm": 1.1882553100585938, "learning_rate": 2.304847867229582e-06, "log_odds_chosen": 1.0298006534576416, "log_odds_ratio": -0.5688801407814026, "logits/chosen": -0.4849920868873596, "logits/rejected": -0.4678238034248352, "logps/chosen": -0.8872254490852356, "logps/rejected": -1.601528525352478, "loss": 0.9184, "nll_loss": 0.8614827990531921, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08872254192829132, "rewards/margins": 0.07143032550811768, "rewards/rejected": -0.160152867436409, "step": 9880 }, { "epoch": 1.79, "grad_norm": 1.4803732633590698, "learning_rate": 2.2990246032901443e-06, "log_odds_chosen": 1.0394694805145264, "log_odds_ratio": -0.5270654559135437, "logits/chosen": -0.4371699392795563, "logits/rejected": -0.4427434504032135, "logps/chosen": -0.9934245944023132, "logps/rejected": -1.740962028503418, "loss": 0.9955, "nll_loss": 0.9427839517593384, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0993424504995346, "rewards/margins": 0.07475373893976212, "rewards/rejected": -0.17409618198871613, "step": 9890 }, { "epoch": 1.79, "grad_norm": 1.5292103290557861, "learning_rate": 2.2932013393507057e-06, "log_odds_chosen": 0.7853134870529175, "log_odds_ratio": -0.5695432424545288, "logits/chosen": -0.45036354660987854, "logits/rejected": -0.4651457369327545, "logps/chosen": -0.9447164535522461, "logps/rejected": -1.521506905555725, "loss": 0.972, "nll_loss": 0.9150772094726562, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09447164833545685, "rewards/margins": 0.05767903849482536, "rewards/rejected": -0.1521506905555725, "step": 9900 }, { "epoch": 1.79, "grad_norm": 2.0200865268707275, "learning_rate": 2.2873780754112676e-06, "log_odds_chosen": 1.0599063634872437, "log_odds_ratio": -0.5302172899246216, "logits/chosen": -0.4713926315307617, "logits/rejected": -0.4528167247772217, "logps/chosen": -0.8392230868339539, "logps/rejected": -1.5280404090881348, "loss": 0.9771, "nll_loss": 0.9240929484367371, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08392231166362762, "rewards/margins": 0.06888174265623093, "rewards/rejected": -0.15280406177043915, "step": 9910 }, { "epoch": 1.79, "grad_norm": 1.2986183166503906, "learning_rate": 2.28155481147183e-06, "log_odds_chosen": 0.6897674202919006, "log_odds_ratio": -0.6012214422225952, "logits/chosen": -0.48748350143432617, "logits/rejected": -0.5002898573875427, "logps/chosen": -1.021876573562622, "logps/rejected": -1.4822615385055542, "loss": 1.03, "nll_loss": 0.9698923826217651, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10218765586614609, "rewards/margins": 0.04603850096464157, "rewards/rejected": -0.14822617173194885, "step": 9920 }, { "epoch": 1.79, "grad_norm": 0.9028427600860596, "learning_rate": 2.275731547532392e-06, "log_odds_chosen": 1.1438870429992676, "log_odds_ratio": -0.4906987249851227, "logits/chosen": -0.4426101744174957, "logits/rejected": -0.44273123145103455, "logps/chosen": -0.8546684384346008, "logps/rejected": -1.698992371559143, "loss": 0.9586, "nll_loss": 0.9095567464828491, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08546683937311172, "rewards/margins": 0.08443240821361542, "rewards/rejected": -0.16989924013614655, "step": 9930 }, { "epoch": 1.8, "grad_norm": 2.1396689414978027, "learning_rate": 2.269908283592954e-06, "log_odds_chosen": 1.3287298679351807, "log_odds_ratio": -0.4608958661556244, "logits/chosen": -0.41538411378860474, "logits/rejected": -0.4415758550167084, "logps/chosen": -0.8634954690933228, "logps/rejected": -1.8478952646255493, "loss": 0.9885, "nll_loss": 0.9423999786376953, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08634954690933228, "rewards/margins": 0.09843997657299042, "rewards/rejected": -0.1847895085811615, "step": 9940 }, { "epoch": 1.8, "grad_norm": 2.275242328643799, "learning_rate": 2.2640850196535156e-06, "log_odds_chosen": 0.6594809293746948, "log_odds_ratio": -0.5583489537239075, "logits/chosen": -0.48110976815223694, "logits/rejected": -0.5074446797370911, "logps/chosen": -0.9442492723464966, "logps/rejected": -1.3900192975997925, "loss": 0.9743, "nll_loss": 0.9184621572494507, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09442492574453354, "rewards/margins": 0.04457702115178108, "rewards/rejected": -0.13900193572044373, "step": 9950 }, { "epoch": 1.8, "grad_norm": 1.6967520713806152, "learning_rate": 2.2582617557140775e-06, "log_odds_chosen": 0.7493517398834229, "log_odds_ratio": -0.5595013499259949, "logits/chosen": -0.5545259714126587, "logits/rejected": -0.5433686375617981, "logps/chosen": -0.9461520314216614, "logps/rejected": -1.4703208208084106, "loss": 1.0238, "nll_loss": 0.967811107635498, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09461519867181778, "rewards/margins": 0.05241687223315239, "rewards/rejected": -0.14703206717967987, "step": 9960 }, { "epoch": 1.8, "grad_norm": 1.5574473142623901, "learning_rate": 2.25243849177464e-06, "log_odds_chosen": 1.2710860967636108, "log_odds_ratio": -0.4554689824581146, "logits/chosen": -0.45570430159568787, "logits/rejected": -0.4198875427246094, "logps/chosen": -0.9600180387496948, "logps/rejected": -1.9265406131744385, "loss": 0.9535, "nll_loss": 0.9079564213752747, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09600180387496948, "rewards/margins": 0.09665225446224213, "rewards/rejected": -0.1926540583372116, "step": 9970 }, { "epoch": 1.8, "grad_norm": 1.789555311203003, "learning_rate": 2.2466152278352017e-06, "log_odds_chosen": 1.1111174821853638, "log_odds_ratio": -0.49643293023109436, "logits/chosen": -0.4519789218902588, "logits/rejected": -0.4754433035850525, "logps/chosen": -0.8925978541374207, "logps/rejected": -1.6644165515899658, "loss": 0.9226, "nll_loss": 0.8729545474052429, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0892597883939743, "rewards/margins": 0.0771818682551384, "rewards/rejected": -0.1664416491985321, "step": 9980 }, { "epoch": 1.8, "grad_norm": 1.401259183883667, "learning_rate": 2.240791963895763e-06, "log_odds_chosen": 1.0764435529708862, "log_odds_ratio": -0.5070358514785767, "logits/chosen": -0.454425573348999, "logits/rejected": -0.4417805075645447, "logps/chosen": -0.9401386380195618, "logps/rejected": -1.649101972579956, "loss": 0.9432, "nll_loss": 0.8924501538276672, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09401386976242065, "rewards/margins": 0.07089634984731674, "rewards/rejected": -0.1649101972579956, "step": 9990 }, { "epoch": 1.81, "grad_norm": 0.8955624103546143, "learning_rate": 2.2349686999563254e-06, "log_odds_chosen": 0.9595357775688171, "log_odds_ratio": -0.5476449131965637, "logits/chosen": -0.5105107426643372, "logits/rejected": -0.513346254825592, "logps/chosen": -0.9034037590026855, "logps/rejected": -1.5487500429153442, "loss": 1.0059, "nll_loss": 0.9511575698852539, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09034038335084915, "rewards/margins": 0.06453461945056915, "rewards/rejected": -0.1548749953508377, "step": 10000 }, { "epoch": 1.81, "grad_norm": 2.2015881538391113, "learning_rate": 2.2291454360168873e-06, "log_odds_chosen": 1.0903244018554688, "log_odds_ratio": -0.4742640554904938, "logits/chosen": -0.44515347480773926, "logits/rejected": -0.48136234283447266, "logps/chosen": -0.9461368322372437, "logps/rejected": -1.7086235284805298, "loss": 0.9172, "nll_loss": 0.8697601556777954, "rewards/accuracies": 0.75, "rewards/chosen": -0.0946136862039566, "rewards/margins": 0.0762486606836319, "rewards/rejected": -0.1708623617887497, "step": 10010 }, { "epoch": 1.81, "grad_norm": 0.9485613107681274, "learning_rate": 2.2233221720774492e-06, "log_odds_chosen": 0.9951506853103638, "log_odds_ratio": -0.4970771372318268, "logits/chosen": -0.3801301419734955, "logits/rejected": -0.40891996026039124, "logps/chosen": -0.9870649576187134, "logps/rejected": -1.6817152500152588, "loss": 0.9403, "nll_loss": 0.890583872795105, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09870649874210358, "rewards/margins": 0.0694650262594223, "rewards/rejected": -0.16817152500152588, "step": 10020 }, { "epoch": 1.81, "grad_norm": 1.397936463356018, "learning_rate": 2.2174989081380115e-06, "log_odds_chosen": 1.0712759494781494, "log_odds_ratio": -0.516617476940155, "logits/chosen": -0.45938482880592346, "logits/rejected": -0.47290220856666565, "logps/chosen": -0.908774197101593, "logps/rejected": -1.6888080835342407, "loss": 0.9708, "nll_loss": 0.9191882014274597, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09087742865085602, "rewards/margins": 0.0780033990740776, "rewards/rejected": -0.16888082027435303, "step": 10030 }, { "epoch": 1.81, "grad_norm": 1.9884051084518433, "learning_rate": 2.211675644198573e-06, "log_odds_chosen": 0.7885714769363403, "log_odds_ratio": -0.62605881690979, "logits/chosen": -0.4488893151283264, "logits/rejected": -0.4411085247993469, "logps/chosen": -0.9576784372329712, "logps/rejected": -1.561320185661316, "loss": 0.9844, "nll_loss": 0.9217513799667358, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09576784074306488, "rewards/margins": 0.06036417931318283, "rewards/rejected": -0.1561320275068283, "step": 10040 }, { "epoch": 1.82, "grad_norm": 1.355200171470642, "learning_rate": 2.2058523802591353e-06, "log_odds_chosen": 1.0193064212799072, "log_odds_ratio": -0.5205415487289429, "logits/chosen": -0.47365063428878784, "logits/rejected": -0.4856860637664795, "logps/chosen": -0.882122814655304, "logps/rejected": -1.5590479373931885, "loss": 0.9558, "nll_loss": 0.9037929773330688, "rewards/accuracies": 0.75, "rewards/chosen": -0.08821228891611099, "rewards/margins": 0.06769250333309174, "rewards/rejected": -0.15590479969978333, "step": 10050 }, { "epoch": 1.82, "grad_norm": 0.8581579327583313, "learning_rate": 2.200029116319697e-06, "log_odds_chosen": 0.9242337942123413, "log_odds_ratio": -0.5349970459938049, "logits/chosen": -0.4497644901275635, "logits/rejected": -0.452747106552124, "logps/chosen": -0.8844796419143677, "logps/rejected": -1.5484631061553955, "loss": 1.0005, "nll_loss": 0.9469534754753113, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08844795823097229, "rewards/margins": 0.06639834493398666, "rewards/rejected": -0.15484629571437836, "step": 10060 }, { "epoch": 1.82, "grad_norm": 2.4767024517059326, "learning_rate": 2.194205852380259e-06, "log_odds_chosen": 1.2953739166259766, "log_odds_ratio": -0.42459726333618164, "logits/chosen": -0.43675118684768677, "logits/rejected": -0.46500349044799805, "logps/chosen": -0.8256725072860718, "logps/rejected": -1.7106800079345703, "loss": 0.9456, "nll_loss": 0.9031159281730652, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0825672596693039, "rewards/margins": 0.08850078284740448, "rewards/rejected": -0.17106802761554718, "step": 10070 }, { "epoch": 1.82, "grad_norm": 2.0872771739959717, "learning_rate": 2.188382588440821e-06, "log_odds_chosen": 0.7203958630561829, "log_odds_ratio": -0.5865238904953003, "logits/chosen": -0.4555412828922272, "logits/rejected": -0.43005886673927307, "logps/chosen": -0.9552356600761414, "logps/rejected": -1.4711189270019531, "loss": 0.9704, "nll_loss": 0.9117962718009949, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09552355855703354, "rewards/margins": 0.05158834531903267, "rewards/rejected": -0.1471119225025177, "step": 10080 }, { "epoch": 1.82, "grad_norm": 0.9507777690887451, "learning_rate": 2.182559324501383e-06, "log_odds_chosen": 1.4054168462753296, "log_odds_ratio": -0.49908047914505005, "logits/chosen": -0.4112454950809479, "logits/rejected": -0.392622709274292, "logps/chosen": -0.8882936239242554, "logps/rejected": -1.9450048208236694, "loss": 0.9121, "nll_loss": 0.862238883972168, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08882936090230942, "rewards/margins": 0.10567110776901245, "rewards/rejected": -0.19450047612190247, "step": 10090 }, { "epoch": 1.82, "grad_norm": 1.3266576528549194, "learning_rate": 2.1767360605619447e-06, "log_odds_chosen": 0.7194452285766602, "log_odds_ratio": -0.5657768249511719, "logits/chosen": -0.4552794396877289, "logits/rejected": -0.4327179491519928, "logps/chosen": -0.927895188331604, "logps/rejected": -1.453904390335083, "loss": 0.9989, "nll_loss": 0.9423456192016602, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.09278951585292816, "rewards/margins": 0.05260094255208969, "rewards/rejected": -0.14539046585559845, "step": 10100 }, { "epoch": 1.83, "grad_norm": 1.1379450559616089, "learning_rate": 2.170912796622507e-06, "log_odds_chosen": 0.949578583240509, "log_odds_ratio": -0.52561354637146, "logits/chosen": -0.45671501755714417, "logits/rejected": -0.478085994720459, "logps/chosen": -0.9839479327201843, "logps/rejected": -1.619179368019104, "loss": 0.9291, "nll_loss": 0.876539409160614, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09839479625225067, "rewards/margins": 0.06352315098047256, "rewards/rejected": -0.16191793978214264, "step": 10110 }, { "epoch": 1.83, "grad_norm": 0.87856125831604, "learning_rate": 2.1650895326830685e-06, "log_odds_chosen": 1.1006487607955933, "log_odds_ratio": -0.45974189043045044, "logits/chosen": -0.4218316674232483, "logits/rejected": -0.46339720487594604, "logps/chosen": -0.8847794532775879, "logps/rejected": -1.6512638330459595, "loss": 0.9331, "nll_loss": 0.887128472328186, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08847793191671371, "rewards/margins": 0.07664843648672104, "rewards/rejected": -0.16512638330459595, "step": 10120 }, { "epoch": 1.83, "grad_norm": 1.5756317377090454, "learning_rate": 2.1592662687436304e-06, "log_odds_chosen": 1.3205078840255737, "log_odds_ratio": -0.45531734824180603, "logits/chosen": -0.40888315439224243, "logits/rejected": -0.4066869616508484, "logps/chosen": -0.8146879076957703, "logps/rejected": -1.7089191675186157, "loss": 0.8601, "nll_loss": 0.8145501017570496, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08146879822015762, "rewards/margins": 0.08942312747240067, "rewards/rejected": -0.1708919107913971, "step": 10130 }, { "epoch": 1.83, "grad_norm": 2.2840380668640137, "learning_rate": 2.1534430048041927e-06, "log_odds_chosen": 0.9677292108535767, "log_odds_ratio": -0.5366533994674683, "logits/chosen": -0.4365871846675873, "logits/rejected": -0.45229673385620117, "logps/chosen": -0.9193583726882935, "logps/rejected": -1.5962176322937012, "loss": 0.8965, "nll_loss": 0.8428059816360474, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09193582832813263, "rewards/margins": 0.06768593192100525, "rewards/rejected": -0.15962176024913788, "step": 10140 }, { "epoch": 1.83, "grad_norm": 1.5510019063949585, "learning_rate": 2.1476197408647546e-06, "log_odds_chosen": 1.1609294414520264, "log_odds_ratio": -0.5104137063026428, "logits/chosen": -0.4274633824825287, "logits/rejected": -0.46502789855003357, "logps/chosen": -0.8433011770248413, "logps/rejected": -1.6716792583465576, "loss": 0.8893, "nll_loss": 0.8382207155227661, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08433012664318085, "rewards/margins": 0.08283781260251999, "rewards/rejected": -0.16716793179512024, "step": 10150 }, { "epoch": 1.84, "grad_norm": 1.7966018915176392, "learning_rate": 2.141796476925317e-06, "log_odds_chosen": 1.1001217365264893, "log_odds_ratio": -0.4984889030456543, "logits/chosen": -0.4312856197357178, "logits/rejected": -0.45621857047080994, "logps/chosen": -0.904300332069397, "logps/rejected": -1.708540678024292, "loss": 0.9235, "nll_loss": 0.8736575841903687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09043003618717194, "rewards/margins": 0.08042405545711517, "rewards/rejected": -0.1708541065454483, "step": 10160 }, { "epoch": 1.84, "grad_norm": 1.6098682880401611, "learning_rate": 2.1359732129858784e-06, "log_odds_chosen": 0.7923783659934998, "log_odds_ratio": -0.5927404761314392, "logits/chosen": -0.4654027819633484, "logits/rejected": -0.44761067628860474, "logps/chosen": -0.9215261340141296, "logps/rejected": -1.476153016090393, "loss": 0.9836, "nll_loss": 0.9243131875991821, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09215261787176132, "rewards/margins": 0.05546267703175545, "rewards/rejected": -0.14761529862880707, "step": 10170 }, { "epoch": 1.84, "grad_norm": 1.7316303253173828, "learning_rate": 2.1301499490464402e-06, "log_odds_chosen": 0.8692628741264343, "log_odds_ratio": -0.5368847250938416, "logits/chosen": -0.5000066757202148, "logits/rejected": -0.4991425573825836, "logps/chosen": -1.024705410003662, "logps/rejected": -1.6390259265899658, "loss": 1.0576, "nll_loss": 1.0039422512054443, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.10247053951025009, "rewards/margins": 0.06143205612897873, "rewards/rejected": -0.16390261054039001, "step": 10180 }, { "epoch": 1.84, "grad_norm": 1.9501219987869263, "learning_rate": 2.1243266851070026e-06, "log_odds_chosen": 1.5202281475067139, "log_odds_ratio": -0.44537049531936646, "logits/chosen": -0.411649227142334, "logits/rejected": -0.43437641859054565, "logps/chosen": -0.9563344717025757, "logps/rejected": -2.076907157897949, "loss": 0.9448, "nll_loss": 0.9002774953842163, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09563344717025757, "rewards/margins": 0.11205726861953735, "rewards/rejected": -0.20769071578979492, "step": 10190 }, { "epoch": 1.84, "grad_norm": 1.2907829284667969, "learning_rate": 2.1185034211675644e-06, "log_odds_chosen": 0.8441012501716614, "log_odds_ratio": -0.563463568687439, "logits/chosen": -0.5322362184524536, "logits/rejected": -0.51605224609375, "logps/chosen": -1.0157923698425293, "logps/rejected": -1.6410290002822876, "loss": 1.0297, "nll_loss": 0.973351776599884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10157923400402069, "rewards/margins": 0.06252367049455643, "rewards/rejected": -0.16410291194915771, "step": 10200 }, { "epoch": 1.84, "grad_norm": 1.910621166229248, "learning_rate": 2.112680157228126e-06, "log_odds_chosen": 1.0539970397949219, "log_odds_ratio": -0.4770180583000183, "logits/chosen": -0.44424891471862793, "logits/rejected": -0.450103223323822, "logps/chosen": -0.9240690469741821, "logps/rejected": -1.6838849782943726, "loss": 0.9446, "nll_loss": 0.8969265818595886, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09240689128637314, "rewards/margins": 0.07598160207271576, "rewards/rejected": -0.1683885157108307, "step": 10210 }, { "epoch": 1.85, "grad_norm": 1.2610995769500732, "learning_rate": 2.1068568932886882e-06, "log_odds_chosen": 1.1630918979644775, "log_odds_ratio": -0.49245303869247437, "logits/chosen": -0.4346703588962555, "logits/rejected": -0.4480690360069275, "logps/chosen": -0.879808247089386, "logps/rejected": -1.7137651443481445, "loss": 0.8559, "nll_loss": 0.8066719770431519, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08798082172870636, "rewards/margins": 0.08339568227529526, "rewards/rejected": -0.17137651145458221, "step": 10220 }, { "epoch": 1.85, "grad_norm": 1.153172492980957, "learning_rate": 2.10103362934925e-06, "log_odds_chosen": 1.0892242193222046, "log_odds_ratio": -0.5428228378295898, "logits/chosen": -0.47800391912460327, "logits/rejected": -0.46864748001098633, "logps/chosen": -0.8633974194526672, "logps/rejected": -1.635337471961975, "loss": 0.9987, "nll_loss": 0.9444171786308289, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08633974194526672, "rewards/margins": 0.07719399780035019, "rewards/rejected": -0.1635337471961975, "step": 10230 }, { "epoch": 1.85, "grad_norm": 1.19821035861969, "learning_rate": 2.095210365409812e-06, "log_odds_chosen": 1.3325726985931396, "log_odds_ratio": -0.4372948706150055, "logits/chosen": -0.39473778009414673, "logits/rejected": -0.45915713906288147, "logps/chosen": -0.8078659772872925, "logps/rejected": -1.7254482507705688, "loss": 0.8942, "nll_loss": 0.8504945635795593, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08078660070896149, "rewards/margins": 0.09175822883844376, "rewards/rejected": -0.17254483699798584, "step": 10240 }, { "epoch": 1.85, "grad_norm": 0.8175916075706482, "learning_rate": 2.0893871014703743e-06, "log_odds_chosen": 0.9957863688468933, "log_odds_ratio": -0.4890173375606537, "logits/chosen": -0.4783898890018463, "logits/rejected": -0.44662246108055115, "logps/chosen": -0.8859121203422546, "logps/rejected": -1.575537919998169, "loss": 0.8932, "nll_loss": 0.8443046808242798, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08859121799468994, "rewards/margins": 0.06896258145570755, "rewards/rejected": -0.1575538069009781, "step": 10250 }, { "epoch": 1.85, "grad_norm": 1.2621811628341675, "learning_rate": 2.0835638375309358e-06, "log_odds_chosen": 0.5714842081069946, "log_odds_ratio": -0.6428765654563904, "logits/chosen": -0.4756339490413666, "logits/rejected": -0.4753071367740631, "logps/chosen": -0.949560821056366, "logps/rejected": -1.346477746963501, "loss": 0.9943, "nll_loss": 0.9299713373184204, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09495609253644943, "rewards/margins": 0.03969167545437813, "rewards/rejected": -0.13464777171611786, "step": 10260 }, { "epoch": 1.86, "grad_norm": 1.2808736562728882, "learning_rate": 2.077740573591498e-06, "log_odds_chosen": 1.1980714797973633, "log_odds_ratio": -0.4109151363372803, "logits/chosen": -0.4529429078102112, "logits/rejected": -0.46951961517333984, "logps/chosen": -0.8301106691360474, "logps/rejected": -1.5841398239135742, "loss": 0.923, "nll_loss": 0.8818821907043457, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.08301106840372086, "rewards/margins": 0.07540292292833328, "rewards/rejected": -0.15841399133205414, "step": 10270 }, { "epoch": 1.86, "grad_norm": 1.2657065391540527, "learning_rate": 2.07191730965206e-06, "log_odds_chosen": 0.738211989402771, "log_odds_ratio": -0.5358443856239319, "logits/chosen": -0.5280221700668335, "logits/rejected": -0.5341017246246338, "logps/chosen": -1.0342191457748413, "logps/rejected": -1.5212204456329346, "loss": 1.0247, "nll_loss": 0.9711573719978333, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10342191159725189, "rewards/margins": 0.04870011284947395, "rewards/rejected": -0.15212205052375793, "step": 10280 }, { "epoch": 1.86, "grad_norm": 1.4096204042434692, "learning_rate": 2.066094045712622e-06, "log_odds_chosen": 0.7922347784042358, "log_odds_ratio": -0.6058631539344788, "logits/chosen": -0.46681904792785645, "logits/rejected": -0.46833691000938416, "logps/chosen": -0.8852903246879578, "logps/rejected": -1.4500166177749634, "loss": 0.922, "nll_loss": 0.8614572286605835, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08852903544902802, "rewards/margins": 0.05647264048457146, "rewards/rejected": -0.14500167965888977, "step": 10290 }, { "epoch": 1.86, "grad_norm": 1.230337142944336, "learning_rate": 2.0602707817731837e-06, "log_odds_chosen": 0.8310950398445129, "log_odds_ratio": -0.537976861000061, "logits/chosen": -0.43221116065979004, "logits/rejected": -0.4261694550514221, "logps/chosen": -0.9257045984268188, "logps/rejected": -1.5209006071090698, "loss": 1.0114, "nll_loss": 0.9576155543327332, "rewards/accuracies": 0.625, "rewards/chosen": -0.092570461332798, "rewards/margins": 0.05951959639787674, "rewards/rejected": -0.15209007263183594, "step": 10300 }, { "epoch": 1.86, "grad_norm": 1.3708995580673218, "learning_rate": 2.0544475178337456e-06, "log_odds_chosen": 0.9308149218559265, "log_odds_ratio": -0.5380842685699463, "logits/chosen": -0.41678470373153687, "logits/rejected": -0.46170076727867126, "logps/chosen": -0.840059757232666, "logps/rejected": -1.5198965072631836, "loss": 0.9102, "nll_loss": 0.856410026550293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08400598913431168, "rewards/margins": 0.06798367202281952, "rewards/rejected": -0.1519896537065506, "step": 10310 }, { "epoch": 1.86, "grad_norm": 1.3757504224777222, "learning_rate": 2.0486242538943075e-06, "log_odds_chosen": 1.27957284450531, "log_odds_ratio": -0.444243848323822, "logits/chosen": -0.374275267124176, "logits/rejected": -0.4208962917327881, "logps/chosen": -0.8259752988815308, "logps/rejected": -1.6976467370986938, "loss": 0.8901, "nll_loss": 0.8456643223762512, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0825975313782692, "rewards/margins": 0.08716712892055511, "rewards/rejected": -0.1697646528482437, "step": 10320 }, { "epoch": 1.87, "grad_norm": 0.9839291572570801, "learning_rate": 2.04280098995487e-06, "log_odds_chosen": 1.0052300691604614, "log_odds_ratio": -0.49391070008277893, "logits/chosen": -0.4329233169555664, "logits/rejected": -0.4708273410797119, "logps/chosen": -0.9669458270072937, "logps/rejected": -1.6411478519439697, "loss": 0.9361, "nll_loss": 0.8866797685623169, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09669457376003265, "rewards/margins": 0.06742019951343536, "rewards/rejected": -0.1641147881746292, "step": 10330 }, { "epoch": 1.87, "grad_norm": 1.147086262702942, "learning_rate": 2.0369777260154317e-06, "log_odds_chosen": 0.954150378704071, "log_odds_ratio": -0.5189642310142517, "logits/chosen": -0.4370267391204834, "logits/rejected": -0.4634695053100586, "logps/chosen": -0.8270130157470703, "logps/rejected": -1.4731693267822266, "loss": 0.935, "nll_loss": 0.8830587267875671, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08270130306482315, "rewards/margins": 0.0646156296133995, "rewards/rejected": -0.14731693267822266, "step": 10340 }, { "epoch": 1.87, "grad_norm": 1.124394178390503, "learning_rate": 2.0311544620759936e-06, "log_odds_chosen": 1.1465202569961548, "log_odds_ratio": -0.5014799237251282, "logits/chosen": -0.43889516592025757, "logits/rejected": -0.45893678069114685, "logps/chosen": -0.9335900545120239, "logps/rejected": -1.7377182245254517, "loss": 0.9606, "nll_loss": 0.910497784614563, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09335900843143463, "rewards/margins": 0.08041281998157501, "rewards/rejected": -0.17377182841300964, "step": 10350 }, { "epoch": 1.87, "grad_norm": 1.9966627359390259, "learning_rate": 2.0253311981365555e-06, "log_odds_chosen": 1.1126954555511475, "log_odds_ratio": -0.5292860269546509, "logits/chosen": -0.46195143461227417, "logits/rejected": -0.48550620675086975, "logps/chosen": -0.8964789509773254, "logps/rejected": -1.733088731765747, "loss": 0.963, "nll_loss": 0.9100550413131714, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08964791148900986, "rewards/margins": 0.08366095274686813, "rewards/rejected": -0.17330887913703918, "step": 10360 }, { "epoch": 1.87, "grad_norm": 2.0089480876922607, "learning_rate": 2.0195079341971174e-06, "log_odds_chosen": 1.166711449623108, "log_odds_ratio": -0.5026243925094604, "logits/chosen": -0.44632425904273987, "logits/rejected": -0.43838948011398315, "logps/chosen": -0.8504183888435364, "logps/rejected": -1.643214225769043, "loss": 0.95, "nll_loss": 0.899712860584259, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0850418359041214, "rewards/margins": 0.0792795792222023, "rewards/rejected": -0.1643213927745819, "step": 10370 }, { "epoch": 1.88, "grad_norm": 1.4208773374557495, "learning_rate": 2.0136846702576797e-06, "log_odds_chosen": 1.0821831226348877, "log_odds_ratio": -0.5470158457756042, "logits/chosen": -0.4514341950416565, "logits/rejected": -0.439272403717041, "logps/chosen": -0.9203437566757202, "logps/rejected": -1.6602756977081299, "loss": 0.8832, "nll_loss": 0.8285048604011536, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09203437715768814, "rewards/margins": 0.07399321347475052, "rewards/rejected": -0.16602759063243866, "step": 10380 }, { "epoch": 1.88, "grad_norm": 1.332013726234436, "learning_rate": 2.007861406318241e-06, "log_odds_chosen": 1.0032024383544922, "log_odds_ratio": -0.49925652146339417, "logits/chosen": -0.41910386085510254, "logits/rejected": -0.44506892561912537, "logps/chosen": -0.7921913862228394, "logps/rejected": -1.494015097618103, "loss": 0.9336, "nll_loss": 0.8837070465087891, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07921913266181946, "rewards/margins": 0.07018236815929413, "rewards/rejected": -0.14940151572227478, "step": 10390 }, { "epoch": 1.88, "grad_norm": 1.406699299812317, "learning_rate": 2.002038142378803e-06, "log_odds_chosen": 1.0385334491729736, "log_odds_ratio": -0.5291948318481445, "logits/chosen": -0.4945642948150635, "logits/rejected": -0.502414345741272, "logps/chosen": -0.8367093801498413, "logps/rejected": -1.5903352499008179, "loss": 1.0034, "nll_loss": 0.9504679441452026, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08367092907428741, "rewards/margins": 0.07536258548498154, "rewards/rejected": -0.15903352200984955, "step": 10400 }, { "epoch": 1.88, "grad_norm": 1.2992877960205078, "learning_rate": 1.996214878439365e-06, "log_odds_chosen": 1.3700332641601562, "log_odds_ratio": -0.4885827898979187, "logits/chosen": -0.4483235478401184, "logits/rejected": -0.45449957251548767, "logps/chosen": -0.8136239051818848, "logps/rejected": -1.7881104946136475, "loss": 0.9034, "nll_loss": 0.8545898199081421, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08136239647865295, "rewards/margins": 0.09744866192340851, "rewards/rejected": -0.17881104350090027, "step": 10410 }, { "epoch": 1.88, "grad_norm": 1.1529314517974854, "learning_rate": 1.9903916144999272e-06, "log_odds_chosen": 1.0313104391098022, "log_odds_ratio": -0.5160423517227173, "logits/chosen": -0.4965154528617859, "logits/rejected": -0.4709865152835846, "logps/chosen": -0.9274126291275024, "logps/rejected": -1.6849533319473267, "loss": 0.962, "nll_loss": 0.910438060760498, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09274126589298248, "rewards/margins": 0.07575404644012451, "rewards/rejected": -0.168495312333107, "step": 10420 }, { "epoch": 1.88, "grad_norm": 2.287060499191284, "learning_rate": 1.984568350560489e-06, "log_odds_chosen": 1.3181803226470947, "log_odds_ratio": -0.4897107481956482, "logits/chosen": -0.4968651235103607, "logits/rejected": -0.5039435625076294, "logps/chosen": -0.8362882733345032, "logps/rejected": -1.8195197582244873, "loss": 0.9172, "nll_loss": 0.8682142496109009, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0836288332939148, "rewards/margins": 0.09832315146923065, "rewards/rejected": -0.18195198476314545, "step": 10430 }, { "epoch": 1.89, "grad_norm": 2.968153953552246, "learning_rate": 1.978745086621051e-06, "log_odds_chosen": 1.207155466079712, "log_odds_ratio": -0.4584660530090332, "logits/chosen": -0.449285089969635, "logits/rejected": -0.45987534523010254, "logps/chosen": -0.8811396360397339, "logps/rejected": -1.7007176876068115, "loss": 0.9449, "nll_loss": 0.899084746837616, "rewards/accuracies": 0.75, "rewards/chosen": -0.08811396360397339, "rewards/margins": 0.08195780217647552, "rewards/rejected": -0.1700717806816101, "step": 10440 }, { "epoch": 1.89, "grad_norm": 1.2522773742675781, "learning_rate": 1.972921822681613e-06, "log_odds_chosen": 1.0319015979766846, "log_odds_ratio": -0.5145862102508545, "logits/chosen": -0.4706307053565979, "logits/rejected": -0.45233672857284546, "logps/chosen": -0.8894746899604797, "logps/rejected": -1.6508815288543701, "loss": 0.935, "nll_loss": 0.8835735321044922, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08894746750593185, "rewards/margins": 0.07614068686962128, "rewards/rejected": -0.16508816182613373, "step": 10450 }, { "epoch": 1.89, "grad_norm": 0.9435124397277832, "learning_rate": 1.9670985587421748e-06, "log_odds_chosen": 1.142953634262085, "log_odds_ratio": -0.4884144365787506, "logits/chosen": -0.4347442090511322, "logits/rejected": -0.4512789249420166, "logps/chosen": -0.8941251039505005, "logps/rejected": -1.6644847393035889, "loss": 0.9366, "nll_loss": 0.887799859046936, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08941250294446945, "rewards/margins": 0.07703599333763123, "rewards/rejected": -0.16644850373268127, "step": 10460 }, { "epoch": 1.89, "grad_norm": 1.5761239528656006, "learning_rate": 1.961275294802737e-06, "log_odds_chosen": 1.017764687538147, "log_odds_ratio": -0.5214357376098633, "logits/chosen": -0.4196850657463074, "logits/rejected": -0.41368383169174194, "logps/chosen": -1.0039745569229126, "logps/rejected": -1.7455123662948608, "loss": 0.9837, "nll_loss": 0.931601881980896, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10039746761322021, "rewards/margins": 0.07415377348661423, "rewards/rejected": -0.17455123364925385, "step": 10470 }, { "epoch": 1.89, "grad_norm": 1.391938328742981, "learning_rate": 1.9554520308632985e-06, "log_odds_chosen": 0.8118370175361633, "log_odds_ratio": -0.5162337422370911, "logits/chosen": -0.4641450345516205, "logits/rejected": -0.46027374267578125, "logps/chosen": -0.9821346402168274, "logps/rejected": -1.5798102617263794, "loss": 0.9943, "nll_loss": 0.9427239298820496, "rewards/accuracies": 0.625, "rewards/chosen": -0.09821345657110214, "rewards/margins": 0.059767574071884155, "rewards/rejected": -0.1579810380935669, "step": 10480 }, { "epoch": 1.89, "grad_norm": 1.5339981317520142, "learning_rate": 1.949628766923861e-06, "log_odds_chosen": 1.3268300294876099, "log_odds_ratio": -0.43518322706222534, "logits/chosen": -0.40181851387023926, "logits/rejected": -0.4130808711051941, "logps/chosen": -0.8143070340156555, "logps/rejected": -1.6981405019760132, "loss": 0.8613, "nll_loss": 0.8178032040596008, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08143070340156555, "rewards/margins": 0.08838334679603577, "rewards/rejected": -0.1698140650987625, "step": 10490 }, { "epoch": 1.9, "grad_norm": 1.1120244264602661, "learning_rate": 1.9438055029844227e-06, "log_odds_chosen": 0.5836787223815918, "log_odds_ratio": -0.6393161416053772, "logits/chosen": -0.5042875409126282, "logits/rejected": -0.4676700532436371, "logps/chosen": -1.0195683240890503, "logps/rejected": -1.4169611930847168, "loss": 0.9633, "nll_loss": 0.8993996381759644, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10195682942867279, "rewards/margins": 0.03973928466439247, "rewards/rejected": -0.14169611036777496, "step": 10500 }, { "epoch": 1.9, "grad_norm": 3.777588367462158, "learning_rate": 1.9379822390449846e-06, "log_odds_chosen": 0.7282012701034546, "log_odds_ratio": -0.5987340807914734, "logits/chosen": -0.4834275245666504, "logits/rejected": -0.49766093492507935, "logps/chosen": -0.9485651254653931, "logps/rejected": -1.4880679845809937, "loss": 1.0014, "nll_loss": 0.9415055513381958, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.09485651552677155, "rewards/margins": 0.053950272500514984, "rewards/rejected": -0.14880679547786713, "step": 10510 }, { "epoch": 1.9, "grad_norm": 1.464909315109253, "learning_rate": 1.9321589751055465e-06, "log_odds_chosen": 0.9100486636161804, "log_odds_ratio": -0.5332632064819336, "logits/chosen": -0.4278945326805115, "logits/rejected": -0.4454631209373474, "logps/chosen": -0.8939846754074097, "logps/rejected": -1.5239169597625732, "loss": 0.9558, "nll_loss": 0.9025092124938965, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08939848095178604, "rewards/margins": 0.06299323588609695, "rewards/rejected": -0.1523916870355606, "step": 10520 }, { "epoch": 1.9, "grad_norm": 1.9130334854125977, "learning_rate": 1.9263357111661084e-06, "log_odds_chosen": 0.9289069175720215, "log_odds_ratio": -0.48663145303726196, "logits/chosen": -0.46611160039901733, "logits/rejected": -0.470900297164917, "logps/chosen": -0.9118865728378296, "logps/rejected": -1.5583826303482056, "loss": 0.9263, "nll_loss": 0.8776055574417114, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09118865430355072, "rewards/margins": 0.06464960426092148, "rewards/rejected": -0.155838280916214, "step": 10530 }, { "epoch": 1.9, "grad_norm": 2.1003193855285645, "learning_rate": 1.9205124472266707e-06, "log_odds_chosen": 0.6223214864730835, "log_odds_ratio": -0.5882120132446289, "logits/chosen": -0.5001975297927856, "logits/rejected": -0.4912651479244232, "logps/chosen": -0.9899564981460571, "logps/rejected": -1.4137847423553467, "loss": 0.9585, "nll_loss": 0.8996666073799133, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09899566322565079, "rewards/margins": 0.042382821440696716, "rewards/rejected": -0.1413784921169281, "step": 10540 }, { "epoch": 1.91, "grad_norm": 0.984575092792511, "learning_rate": 1.9146891832872326e-06, "log_odds_chosen": 0.868719220161438, "log_odds_ratio": -0.570231556892395, "logits/chosen": -0.46261295676231384, "logits/rejected": -0.4425339102745056, "logps/chosen": -1.009018898010254, "logps/rejected": -1.5990309715270996, "loss": 0.9591, "nll_loss": 0.9021209478378296, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10090188682079315, "rewards/margins": 0.05900119990110397, "rewards/rejected": -0.15990306437015533, "step": 10550 }, { "epoch": 1.91, "grad_norm": 1.5289955139160156, "learning_rate": 1.9088659193477945e-06, "log_odds_chosen": 0.7999789714813232, "log_odds_ratio": -0.5057905316352844, "logits/chosen": -0.49575895071029663, "logits/rejected": -0.5013529062271118, "logps/chosen": -0.8297192454338074, "logps/rejected": -1.3387572765350342, "loss": 0.9545, "nll_loss": 0.9039432406425476, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08297193050384521, "rewards/margins": 0.050903789699077606, "rewards/rejected": -0.13387572765350342, "step": 10560 }, { "epoch": 1.91, "grad_norm": 2.148205041885376, "learning_rate": 1.9030426554083564e-06, "log_odds_chosen": 1.1249816417694092, "log_odds_ratio": -0.4598170816898346, "logits/chosen": -0.4110940098762512, "logits/rejected": -0.4696727693080902, "logps/chosen": -0.8722484707832336, "logps/rejected": -1.6248779296875, "loss": 0.9154, "nll_loss": 0.869467556476593, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08722484111785889, "rewards/margins": 0.07526294887065887, "rewards/rejected": -0.16248780488967896, "step": 10570 }, { "epoch": 1.91, "grad_norm": 2.001585006713867, "learning_rate": 1.8972193914689182e-06, "log_odds_chosen": 0.7757295370101929, "log_odds_ratio": -0.5288358926773071, "logits/chosen": -0.43838778138160706, "logits/rejected": -0.4571276605129242, "logps/chosen": -0.8766340017318726, "logps/rejected": -1.429776668548584, "loss": 0.8802, "nll_loss": 0.8273234367370605, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08766339719295502, "rewards/margins": 0.05531426519155502, "rewards/rejected": -0.14297766983509064, "step": 10580 }, { "epoch": 1.91, "grad_norm": 1.1974306106567383, "learning_rate": 1.8913961275294801e-06, "log_odds_chosen": 1.1978665590286255, "log_odds_ratio": -0.46727222204208374, "logits/chosen": -0.4318181574344635, "logits/rejected": -0.43885666131973267, "logps/chosen": -0.9730944633483887, "logps/rejected": -1.8150783777236938, "loss": 0.9856, "nll_loss": 0.9389021992683411, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09730944782495499, "rewards/margins": 0.08419839292764664, "rewards/rejected": -0.18150784075260162, "step": 10590 }, { "epoch": 1.91, "grad_norm": 1.1426621675491333, "learning_rate": 1.885572863590042e-06, "log_odds_chosen": 0.7413898706436157, "log_odds_ratio": -0.5726367235183716, "logits/chosen": -0.4949052929878235, "logits/rejected": -0.4821873605251312, "logps/chosen": -0.9837077260017395, "logps/rejected": -1.532716989517212, "loss": 1.0599, "nll_loss": 1.0026252269744873, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09837077558040619, "rewards/margins": 0.05490092560648918, "rewards/rejected": -0.15327170491218567, "step": 10600 }, { "epoch": 1.92, "grad_norm": 0.9339642524719238, "learning_rate": 1.8797495996506041e-06, "log_odds_chosen": 1.1743080615997314, "log_odds_ratio": -0.4658436179161072, "logits/chosen": -0.4181239604949951, "logits/rejected": -0.3980239927768707, "logps/chosen": -0.8593562841415405, "logps/rejected": -1.6901576519012451, "loss": 0.8921, "nll_loss": 0.8454761505126953, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08593563735485077, "rewards/margins": 0.08308015763759613, "rewards/rejected": -0.1690157949924469, "step": 10610 }, { "epoch": 1.92, "grad_norm": 1.617745041847229, "learning_rate": 1.873926335711166e-06, "log_odds_chosen": 1.598487377166748, "log_odds_ratio": -0.43908342719078064, "logits/chosen": -0.3870371878147125, "logits/rejected": -0.44044798612594604, "logps/chosen": -0.788866400718689, "logps/rejected": -1.896519660949707, "loss": 0.9262, "nll_loss": 0.8823148608207703, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07888665050268173, "rewards/margins": 0.11076532304286957, "rewards/rejected": -0.1896519660949707, "step": 10620 }, { "epoch": 1.92, "grad_norm": 2.108999729156494, "learning_rate": 1.8681030717717279e-06, "log_odds_chosen": 0.9158283472061157, "log_odds_ratio": -0.5347623229026794, "logits/chosen": -0.47179970145225525, "logits/rejected": -0.4699479937553406, "logps/chosen": -0.9932149052619934, "logps/rejected": -1.637498140335083, "loss": 0.968, "nll_loss": 0.91449373960495, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09932147711515427, "rewards/margins": 0.06442831456661224, "rewards/rejected": -0.1637497991323471, "step": 10630 }, { "epoch": 1.92, "grad_norm": 1.5383033752441406, "learning_rate": 1.86227980783229e-06, "log_odds_chosen": 0.801388144493103, "log_odds_ratio": -0.5564557909965515, "logits/chosen": -0.4179549217224121, "logits/rejected": -0.4258841574192047, "logps/chosen": -0.8891839981079102, "logps/rejected": -1.4303535223007202, "loss": 0.9048, "nll_loss": 0.8491758108139038, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08891840279102325, "rewards/margins": 0.05411697179079056, "rewards/rejected": -0.1430353820323944, "step": 10640 }, { "epoch": 1.92, "grad_norm": 1.2563393115997314, "learning_rate": 1.8564565438928519e-06, "log_odds_chosen": 1.0633662939071655, "log_odds_ratio": -0.4974342882633209, "logits/chosen": -0.4390109181404114, "logits/rejected": -0.45791512727737427, "logps/chosen": -0.9185087084770203, "logps/rejected": -1.6706024408340454, "loss": 0.9623, "nll_loss": 0.9125840067863464, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0918508842587471, "rewards/margins": 0.0752093642950058, "rewards/rejected": -0.1670602411031723, "step": 10650 }, { "epoch": 1.93, "grad_norm": 1.8719691038131714, "learning_rate": 1.850633279953414e-06, "log_odds_chosen": 1.075117826461792, "log_odds_ratio": -0.5143997669219971, "logits/chosen": -0.42022705078125, "logits/rejected": -0.43766602873802185, "logps/chosen": -0.8672161102294922, "logps/rejected": -1.615086317062378, "loss": 0.9092, "nll_loss": 0.8577867746353149, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08672161400318146, "rewards/margins": 0.07478703558444977, "rewards/rejected": -0.16150864958763123, "step": 10660 }, { "epoch": 1.93, "grad_norm": 1.6300572156906128, "learning_rate": 1.8448100160139756e-06, "log_odds_chosen": 0.7079966068267822, "log_odds_ratio": -0.5975538492202759, "logits/chosen": -0.4701474606990814, "logits/rejected": -0.442574679851532, "logps/chosen": -0.8829503059387207, "logps/rejected": -1.3568874597549438, "loss": 0.9297, "nll_loss": 0.8699787855148315, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08829504251480103, "rewards/margins": 0.04739370197057724, "rewards/rejected": -0.13568873703479767, "step": 10670 }, { "epoch": 1.93, "grad_norm": 1.6964452266693115, "learning_rate": 1.8389867520745377e-06, "log_odds_chosen": 0.7021001577377319, "log_odds_ratio": -0.5906314849853516, "logits/chosen": -0.4793156683444977, "logits/rejected": -0.458412230014801, "logps/chosen": -1.0250890254974365, "logps/rejected": -1.5190303325653076, "loss": 1.0251, "nll_loss": 0.9660388231277466, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.10250891745090485, "rewards/margins": 0.04939410835504532, "rewards/rejected": -0.15190303325653076, "step": 10680 }, { "epoch": 1.93, "grad_norm": 1.7809092998504639, "learning_rate": 1.8331634881350996e-06, "log_odds_chosen": 0.7496052980422974, "log_odds_ratio": -0.5474902987480164, "logits/chosen": -0.46370047330856323, "logits/rejected": -0.47559723258018494, "logps/chosen": -0.945774257183075, "logps/rejected": -1.4582557678222656, "loss": 0.9561, "nll_loss": 0.9013026356697083, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09457743167877197, "rewards/margins": 0.05124815180897713, "rewards/rejected": -0.1458255797624588, "step": 10690 }, { "epoch": 1.93, "grad_norm": 0.7367926836013794, "learning_rate": 1.8273402241956617e-06, "log_odds_chosen": 0.6301822066307068, "log_odds_ratio": -0.6171268224716187, "logits/chosen": -0.4975239634513855, "logits/rejected": -0.47095784544944763, "logps/chosen": -0.9024847149848938, "logps/rejected": -1.346397876739502, "loss": 0.9553, "nll_loss": 0.8935791254043579, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0902484729886055, "rewards/margins": 0.04439132288098335, "rewards/rejected": -0.13463978469371796, "step": 10700 }, { "epoch": 1.93, "grad_norm": 1.028935432434082, "learning_rate": 1.8215169602562234e-06, "log_odds_chosen": 0.8751462697982788, "log_odds_ratio": -0.5305973887443542, "logits/chosen": -0.46825432777404785, "logits/rejected": -0.48088812828063965, "logps/chosen": -0.8856697082519531, "logps/rejected": -1.4735255241394043, "loss": 0.9443, "nll_loss": 0.8912407755851746, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08856697380542755, "rewards/margins": 0.058785580098629, "rewards/rejected": -0.14735254645347595, "step": 10710 }, { "epoch": 1.94, "grad_norm": 0.9565833210945129, "learning_rate": 1.8156936963167855e-06, "log_odds_chosen": 0.7781075239181519, "log_odds_ratio": -0.611039400100708, "logits/chosen": -0.436478853225708, "logits/rejected": -0.42543378472328186, "logps/chosen": -1.1170709133148193, "logps/rejected": -1.660090684890747, "loss": 1.0555, "nll_loss": 0.9943556785583496, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.11170710623264313, "rewards/margins": 0.05430195480585098, "rewards/rejected": -0.1660090684890747, "step": 10720 }, { "epoch": 1.94, "grad_norm": 1.5010349750518799, "learning_rate": 1.8098704323773476e-06, "log_odds_chosen": 1.050125002861023, "log_odds_ratio": -0.5018699169158936, "logits/chosen": -0.4801279902458191, "logits/rejected": -0.4455450177192688, "logps/chosen": -0.8765707015991211, "logps/rejected": -1.631679892539978, "loss": 0.942, "nll_loss": 0.8918358087539673, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08765707910060883, "rewards/margins": 0.07551092654466629, "rewards/rejected": -0.16316799819469452, "step": 10730 }, { "epoch": 1.94, "grad_norm": 2.181539535522461, "learning_rate": 1.8040471684379093e-06, "log_odds_chosen": 0.9742463827133179, "log_odds_ratio": -0.5732168555259705, "logits/chosen": -0.45570212602615356, "logits/rejected": -0.4601594805717468, "logps/chosen": -0.9455745816230774, "logps/rejected": -1.6646206378936768, "loss": 0.9501, "nll_loss": 0.8927611112594604, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09455744922161102, "rewards/margins": 0.07190461456775665, "rewards/rejected": -0.16646204888820648, "step": 10740 }, { "epoch": 1.94, "grad_norm": 2.1117494106292725, "learning_rate": 1.7982239044984714e-06, "log_odds_chosen": 0.8903090357780457, "log_odds_ratio": -0.5152918696403503, "logits/chosen": -0.43422913551330566, "logits/rejected": -0.46289482712745667, "logps/chosen": -0.8911846876144409, "logps/rejected": -1.4724773168563843, "loss": 0.9887, "nll_loss": 0.9372140169143677, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08911846578121185, "rewards/margins": 0.05812928080558777, "rewards/rejected": -0.14724776148796082, "step": 10750 }, { "epoch": 1.94, "grad_norm": 1.099900245666504, "learning_rate": 1.7924006405590332e-06, "log_odds_chosen": 1.094017505645752, "log_odds_ratio": -0.5704022645950317, "logits/chosen": -0.434635728597641, "logits/rejected": -0.43005552887916565, "logps/chosen": -0.8817561268806458, "logps/rejected": -1.7000316381454468, "loss": 0.9863, "nll_loss": 0.9292108416557312, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08817560225725174, "rewards/margins": 0.0818275660276413, "rewards/rejected": -0.17000316083431244, "step": 10760 }, { "epoch": 1.95, "grad_norm": 1.4841543436050415, "learning_rate": 1.7865773766195953e-06, "log_odds_chosen": 0.9462421536445618, "log_odds_ratio": -0.5376307368278503, "logits/chosen": -0.45013323426246643, "logits/rejected": -0.4466262757778168, "logps/chosen": -0.9163403511047363, "logps/rejected": -1.6084949970245361, "loss": 0.9662, "nll_loss": 0.9124782681465149, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09163403511047363, "rewards/margins": 0.06921547651290894, "rewards/rejected": -0.16084951162338257, "step": 10770 }, { "epoch": 1.95, "grad_norm": 0.9597242474555969, "learning_rate": 1.780754112680157e-06, "log_odds_chosen": 0.9573777914047241, "log_odds_ratio": -0.5530497431755066, "logits/chosen": -0.4183259606361389, "logits/rejected": -0.4384457468986511, "logps/chosen": -0.9744545817375183, "logps/rejected": -1.6454483270645142, "loss": 1.0157, "nll_loss": 0.9604204297065735, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.09744546562433243, "rewards/margins": 0.06709937006235123, "rewards/rejected": -0.16454483568668365, "step": 10780 }, { "epoch": 1.95, "grad_norm": 1.0203475952148438, "learning_rate": 1.7749308487407191e-06, "log_odds_chosen": 0.9382126927375793, "log_odds_ratio": -0.5153268575668335, "logits/chosen": -0.45768433809280396, "logits/rejected": -0.4407344460487366, "logps/chosen": -0.9273079633712769, "logps/rejected": -1.5609608888626099, "loss": 0.9625, "nll_loss": 0.9110045433044434, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09273079037666321, "rewards/margins": 0.06336529552936554, "rewards/rejected": -0.15609610080718994, "step": 10790 }, { "epoch": 1.95, "grad_norm": 1.6416630744934082, "learning_rate": 1.769107584801281e-06, "log_odds_chosen": 1.2006902694702148, "log_odds_ratio": -0.4718552529811859, "logits/chosen": -0.4566555619239807, "logits/rejected": -0.4525316655635834, "logps/chosen": -0.9319143295288086, "logps/rejected": -1.8008266687393188, "loss": 0.8939, "nll_loss": 0.8467473983764648, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09319143742322922, "rewards/margins": 0.08689123392105103, "rewards/rejected": -0.18008264899253845, "step": 10800 }, { "epoch": 1.95, "grad_norm": 2.494734764099121, "learning_rate": 1.763284320861843e-06, "log_odds_chosen": 0.9643856883049011, "log_odds_ratio": -0.5331692695617676, "logits/chosen": -0.47048425674438477, "logits/rejected": -0.46407657861709595, "logps/chosen": -0.9381911158561707, "logps/rejected": -1.6207780838012695, "loss": 0.9624, "nll_loss": 0.9090880155563354, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09381911903619766, "rewards/margins": 0.06825868785381317, "rewards/rejected": -0.16207781434059143, "step": 10810 }, { "epoch": 1.95, "grad_norm": 1.1414620876312256, "learning_rate": 1.7574610569224048e-06, "log_odds_chosen": 1.0904492139816284, "log_odds_ratio": -0.5350916385650635, "logits/chosen": -0.4038774371147156, "logits/rejected": -0.45020437240600586, "logps/chosen": -0.9018272161483765, "logps/rejected": -1.7020618915557861, "loss": 0.9503, "nll_loss": 0.8968209028244019, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09018273651599884, "rewards/margins": 0.08002346754074097, "rewards/rejected": -0.1702061891555786, "step": 10820 }, { "epoch": 1.96, "grad_norm": 1.8628263473510742, "learning_rate": 1.7516377929829669e-06, "log_odds_chosen": 1.0281225442886353, "log_odds_ratio": -0.4999011158943176, "logits/chosen": -0.4683711528778076, "logits/rejected": -0.475578635931015, "logps/chosen": -0.916462779045105, "logps/rejected": -1.6054890155792236, "loss": 0.9858, "nll_loss": 0.9357932806015015, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.09164627641439438, "rewards/margins": 0.06890259683132172, "rewards/rejected": -0.1605488806962967, "step": 10830 }, { "epoch": 1.96, "grad_norm": 1.0807766914367676, "learning_rate": 1.745814529043529e-06, "log_odds_chosen": 1.076616644859314, "log_odds_ratio": -0.482137531042099, "logits/chosen": -0.4159305989742279, "logits/rejected": -0.39468756318092346, "logps/chosen": -0.8382644653320312, "logps/rejected": -1.6001307964324951, "loss": 0.8777, "nll_loss": 0.8294404149055481, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.083826445043087, "rewards/margins": 0.0761866495013237, "rewards/rejected": -0.1600130945444107, "step": 10840 }, { "epoch": 1.96, "grad_norm": 1.4051988124847412, "learning_rate": 1.7399912651040906e-06, "log_odds_chosen": 0.8923505544662476, "log_odds_ratio": -0.5629561543464661, "logits/chosen": -0.4815370440483093, "logits/rejected": -0.4566279351711273, "logps/chosen": -0.8964195251464844, "logps/rejected": -1.5521047115325928, "loss": 1.0146, "nll_loss": 0.958348274230957, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08964196592569351, "rewards/margins": 0.06556852906942368, "rewards/rejected": -0.155210480093956, "step": 10850 }, { "epoch": 1.96, "grad_norm": 0.9403336048126221, "learning_rate": 1.7341680011646527e-06, "log_odds_chosen": 1.1863648891448975, "log_odds_ratio": -0.5220173001289368, "logits/chosen": -0.4481693208217621, "logits/rejected": -0.4287276268005371, "logps/chosen": -0.9415832757949829, "logps/rejected": -1.7910654544830322, "loss": 0.9736, "nll_loss": 0.9214186668395996, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09415832906961441, "rewards/margins": 0.08494821935892105, "rewards/rejected": -0.17910653352737427, "step": 10860 }, { "epoch": 1.96, "grad_norm": 1.2430269718170166, "learning_rate": 1.7283447372252146e-06, "log_odds_chosen": 1.073395013809204, "log_odds_ratio": -0.5384390950202942, "logits/chosen": -0.48438698053359985, "logits/rejected": -0.4872768521308899, "logps/chosen": -0.9116014242172241, "logps/rejected": -1.6653658151626587, "loss": 0.9836, "nll_loss": 0.9297466278076172, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09116014838218689, "rewards/margins": 0.07537643611431122, "rewards/rejected": -0.16653656959533691, "step": 10870 }, { "epoch": 1.97, "grad_norm": 1.2624175548553467, "learning_rate": 1.7225214732857767e-06, "log_odds_chosen": 1.233604073524475, "log_odds_ratio": -0.4971106946468353, "logits/chosen": -0.44337910413742065, "logits/rejected": -0.4556514620780945, "logps/chosen": -0.8173624277114868, "logps/rejected": -1.6833994388580322, "loss": 0.9029, "nll_loss": 0.8531962633132935, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0817362368106842, "rewards/margins": 0.08660370111465454, "rewards/rejected": -0.16833993792533875, "step": 10880 }, { "epoch": 1.97, "grad_norm": 1.0927963256835938, "learning_rate": 1.7166982093463384e-06, "log_odds_chosen": 0.8071399927139282, "log_odds_ratio": -0.5108460187911987, "logits/chosen": -0.4830823540687561, "logits/rejected": -0.4780608117580414, "logps/chosen": -0.9677250981330872, "logps/rejected": -1.5685020685195923, "loss": 0.9669, "nll_loss": 0.9157981872558594, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09677250683307648, "rewards/margins": 0.060077689588069916, "rewards/rejected": -0.15685021877288818, "step": 10890 }, { "epoch": 1.97, "grad_norm": 1.9250324964523315, "learning_rate": 1.7108749454069005e-06, "log_odds_chosen": 0.9409559369087219, "log_odds_ratio": -0.5403879880905151, "logits/chosen": -0.4660972058773041, "logits/rejected": -0.47896018624305725, "logps/chosen": -1.005990982055664, "logps/rejected": -1.6514829397201538, "loss": 0.9834, "nll_loss": 0.9293805360794067, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10059911012649536, "rewards/margins": 0.06454919278621674, "rewards/rejected": -0.1651482880115509, "step": 10900 }, { "epoch": 1.97, "grad_norm": 1.1880030632019043, "learning_rate": 1.7050516814674624e-06, "log_odds_chosen": 0.9781869053840637, "log_odds_ratio": -0.5084264874458313, "logits/chosen": -0.4466976225376129, "logits/rejected": -0.453413188457489, "logps/chosen": -1.0177123546600342, "logps/rejected": -1.7334365844726562, "loss": 0.9245, "nll_loss": 0.8736462593078613, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10177123546600342, "rewards/margins": 0.07157242298126221, "rewards/rejected": -0.17334364354610443, "step": 10910 }, { "epoch": 1.97, "grad_norm": 1.5584683418273926, "learning_rate": 1.6992284175280245e-06, "log_odds_chosen": 0.9679558873176575, "log_odds_ratio": -0.5330004692077637, "logits/chosen": -0.42195218801498413, "logits/rejected": -0.41038408875465393, "logps/chosen": -0.9459524154663086, "logps/rejected": -1.6363149881362915, "loss": 0.8991, "nll_loss": 0.8457754850387573, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09459523856639862, "rewards/margins": 0.06903626024723053, "rewards/rejected": -0.16363149881362915, "step": 10920 }, { "epoch": 1.97, "grad_norm": 0.8798287510871887, "learning_rate": 1.6934051535885862e-06, "log_odds_chosen": 0.7307524085044861, "log_odds_ratio": -0.5784265398979187, "logits/chosen": -0.49423471093177795, "logits/rejected": -0.4821283221244812, "logps/chosen": -0.9477846026420593, "logps/rejected": -1.4808070659637451, "loss": 0.96, "nll_loss": 0.9021958112716675, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09477847069501877, "rewards/margins": 0.05330223590135574, "rewards/rejected": -0.1480807065963745, "step": 10930 }, { "epoch": 1.98, "grad_norm": 1.3429219722747803, "learning_rate": 1.6875818896491483e-06, "log_odds_chosen": 1.1456918716430664, "log_odds_ratio": -0.4871048927307129, "logits/chosen": -0.41457852721214294, "logits/rejected": -0.42475467920303345, "logps/chosen": -0.8636356592178345, "logps/rejected": -1.6684157848358154, "loss": 0.856, "nll_loss": 0.8073083162307739, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08636356890201569, "rewards/margins": 0.08047802746295929, "rewards/rejected": -0.16684159636497498, "step": 10940 }, { "epoch": 1.98, "grad_norm": 2.187709331512451, "learning_rate": 1.6817586257097104e-06, "log_odds_chosen": 1.4078530073165894, "log_odds_ratio": -0.47329598665237427, "logits/chosen": -0.41357675194740295, "logits/rejected": -0.4157022535800934, "logps/chosen": -0.9161952137947083, "logps/rejected": -1.9065545797348022, "loss": 0.92, "nll_loss": 0.8726755380630493, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09161952137947083, "rewards/margins": 0.09903593361377716, "rewards/rejected": -0.19065546989440918, "step": 10950 }, { "epoch": 1.98, "grad_norm": 2.4422430992126465, "learning_rate": 1.675935361770272e-06, "log_odds_chosen": 0.7092650532722473, "log_odds_ratio": -0.5866946578025818, "logits/chosen": -0.44020456075668335, "logits/rejected": -0.4163680672645569, "logps/chosen": -0.9696685671806335, "logps/rejected": -1.4656903743743896, "loss": 0.9728, "nll_loss": 0.9141584634780884, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09696687757968903, "rewards/margins": 0.049602169543504715, "rewards/rejected": -0.14656902849674225, "step": 10960 }, { "epoch": 1.98, "grad_norm": 1.0880329608917236, "learning_rate": 1.6701120978308341e-06, "log_odds_chosen": 0.8620030283927917, "log_odds_ratio": -0.5120142698287964, "logits/chosen": -0.42742785811424255, "logits/rejected": -0.44241565465927124, "logps/chosen": -0.9114185571670532, "logps/rejected": -1.5257482528686523, "loss": 0.9209, "nll_loss": 0.8696750402450562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09114186465740204, "rewards/margins": 0.06143295019865036, "rewards/rejected": -0.1525748074054718, "step": 10970 }, { "epoch": 1.98, "grad_norm": 1.5797709226608276, "learning_rate": 1.664288833891396e-06, "log_odds_chosen": 1.3317102193832397, "log_odds_ratio": -0.4480084776878357, "logits/chosen": -0.4008447527885437, "logits/rejected": -0.39526838064193726, "logps/chosen": -0.9441932439804077, "logps/rejected": -1.8612372875213623, "loss": 0.9713, "nll_loss": 0.9264762997627258, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09441931545734406, "rewards/margins": 0.09170440584421158, "rewards/rejected": -0.18612372875213623, "step": 10980 }, { "epoch": 1.99, "grad_norm": 1.1879231929779053, "learning_rate": 1.6584655699519581e-06, "log_odds_chosen": 1.3444769382476807, "log_odds_ratio": -0.44155794382095337, "logits/chosen": -0.4258067011833191, "logits/rejected": -0.4344039559364319, "logps/chosen": -0.8423998951911926, "logps/rejected": -1.838492751121521, "loss": 1.0015, "nll_loss": 0.9573596715927124, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08423998206853867, "rewards/margins": 0.09960927069187164, "rewards/rejected": -0.1838492602109909, "step": 10990 }, { "epoch": 1.99, "grad_norm": 1.3635789155960083, "learning_rate": 1.6526423060125198e-06, "log_odds_chosen": 0.9028606414794922, "log_odds_ratio": -0.5877591967582703, "logits/chosen": -0.4743015170097351, "logits/rejected": -0.4625968337059021, "logps/chosen": -0.950115978717804, "logps/rejected": -1.6341667175292969, "loss": 1.0627, "nll_loss": 1.0039708614349365, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.09501160681247711, "rewards/margins": 0.06840505450963974, "rewards/rejected": -0.16341665387153625, "step": 11000 } ], "logging_steps": 10, "max_steps": 13838, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }