diff --git "a/checkpoint-11000/trainer_state.json" "b/checkpoint-11000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-11000/trainer_state.json" @@ -0,0 +1,19878 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9870390859620204, + "eval_steps": 2768, + "global_step": 11000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 3.333925485610962, + "learning_rate": 8e-07, + "log_odds_chosen": 0.5151928067207336, + "log_odds_ratio": -0.6412230730056763, + "logits/chosen": -0.491842657327652, + "logits/rejected": -0.7963203191757202, + "logps/chosen": -2.191591262817383, + "logps/rejected": -2.631917715072632, + "loss": 3.7982, + "nll_loss": 3.734078884124756, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21915917098522186, + "rewards/margins": 0.044032637029886246, + "rewards/rejected": -0.2631917893886566, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 4.156618118286133, + "learning_rate": 1.6e-06, + "log_odds_chosen": 0.23690485954284668, + "log_odds_ratio": -0.7374136447906494, + "logits/chosen": -0.5650675296783447, + "logits/rejected": -0.626733124256134, + "logps/chosen": -1.9588556289672852, + "logps/rejected": -2.1776576042175293, + "loss": 4.1747, + "nll_loss": 4.100991249084473, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.195885568857193, + "rewards/margins": 0.02188020572066307, + "rewards/rejected": -0.21776576340198517, + "step": 20 + }, + { + "epoch": 0.01, + "grad_norm": 3.653958320617676, + "learning_rate": 2.4e-06, + "log_odds_chosen": 0.2972280979156494, + "log_odds_ratio": -0.8111011385917664, + "logits/chosen": -0.57868492603302, + "logits/rejected": -0.8484483957290649, + "logps/chosen": -2.0790963172912598, + "logps/rejected": -2.3348782062530518, + "loss": 3.7657, + "nll_loss": 3.6846203804016113, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.20790961384773254, + "rewards/margins": 0.025578215718269348, + "rewards/rejected": -0.2334878146648407, + "step": 30 + }, + { + "epoch": 0.01, + "grad_norm": 3.8905930519104004, + "learning_rate": 3.2e-06, + "log_odds_chosen": 0.21622678637504578, + "log_odds_ratio": -0.8261001706123352, + "logits/chosen": -0.5219866037368774, + "logits/rejected": -0.6627193689346313, + "logps/chosen": -1.9454641342163086, + "logps/rejected": -2.1101784706115723, + "loss": 3.7382, + "nll_loss": 3.655561923980713, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1945464164018631, + "rewards/margins": 0.016471445560455322, + "rewards/rejected": -0.21101787686347961, + "step": 40 + }, + { + "epoch": 0.01, + "grad_norm": 2.2181079387664795, + "learning_rate": 4e-06, + "log_odds_chosen": 0.5430625081062317, + "log_odds_ratio": -0.6073707938194275, + "logits/chosen": -0.43996763229370117, + "logits/rejected": -0.6621267199516296, + "logps/chosen": -2.2849440574645996, + "logps/rejected": -2.7556464672088623, + "loss": 3.5221, + "nll_loss": 3.461381435394287, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22849440574645996, + "rewards/margins": 0.04707026481628418, + "rewards/rejected": -0.27556467056274414, + "step": 50 + }, + { + "epoch": 0.01, + "grad_norm": 1.3843708038330078, + "learning_rate": 4.8e-06, + "log_odds_chosen": 0.3988548815250397, + "log_odds_ratio": -0.6887394785881042, + "logits/chosen": -0.45927801728248596, + "logits/rejected": -0.6018295884132385, + "logps/chosen": -1.714123010635376, + "logps/rejected": -2.0624635219573975, + "loss": 2.8047, + "nll_loss": 2.7357983589172363, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.17141230404376984, + "rewards/margins": 0.034834057092666626, + "rewards/rejected": -0.20624634623527527, + "step": 60 + }, + { + "epoch": 0.01, + "grad_norm": 6.880599498748779, + "learning_rate": 5.6e-06, + "log_odds_chosen": 0.5788862705230713, + "log_odds_ratio": -0.6416047811508179, + "logits/chosen": -0.43928202986717224, + "logits/rejected": -0.46093741059303284, + "logps/chosen": -1.9180206060409546, + "logps/rejected": -2.4507076740264893, + "loss": 2.7151, + "nll_loss": 2.6509668827056885, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.19180205464363098, + "rewards/margins": 0.053268708288669586, + "rewards/rejected": -0.24507074058055878, + "step": 70 + }, + { + "epoch": 0.01, + "grad_norm": 1.88127601146698, + "learning_rate": 6.4e-06, + "log_odds_chosen": 0.4793362617492676, + "log_odds_ratio": -0.6489545106887817, + "logits/chosen": -0.3151110112667084, + "logits/rejected": -0.12425204366445541, + "logps/chosen": -1.9359104633331299, + "logps/rejected": -2.3397438526153564, + "loss": 2.3516, + "nll_loss": 2.286700487136841, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.19359104335308075, + "rewards/margins": 0.04038333147764206, + "rewards/rejected": -0.2339743673801422, + "step": 80 + }, + { + "epoch": 0.02, + "grad_norm": 1.657551646232605, + "learning_rate": 7.2e-06, + "log_odds_chosen": 0.2164599895477295, + "log_odds_ratio": -0.6870883703231812, + "logits/chosen": -0.34788548946380615, + "logits/rejected": -0.2957800328731537, + "logps/chosen": -1.727168321609497, + "logps/rejected": -1.917694091796875, + "loss": 2.1251, + "nll_loss": 2.056426525115967, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.17271684110164642, + "rewards/margins": 0.01905256323516369, + "rewards/rejected": -0.19176940619945526, + "step": 90 + }, + { + "epoch": 0.02, + "grad_norm": 0.9917482137680054, + "learning_rate": 8e-06, + "log_odds_chosen": 0.017276203259825706, + "log_odds_ratio": -0.8191194534301758, + "logits/chosen": -0.48032236099243164, + "logits/rejected": -0.4871880114078522, + "logps/chosen": -1.7194982767105103, + "logps/rejected": -1.7363418340682983, + "loss": 1.9265, + "nll_loss": 1.8445875644683838, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.1719498336315155, + "rewards/margins": 0.0016843515913933516, + "rewards/rejected": -0.17363418638706207, + "step": 100 + }, + { + "epoch": 0.02, + "grad_norm": 0.8336722254753113, + "learning_rate": 7.994176736060562e-06, + "log_odds_chosen": 0.23384077847003937, + "log_odds_ratio": -0.6849513053894043, + "logits/chosen": -0.4313598573207855, + "logits/rejected": -0.46657222509384155, + "logps/chosen": -1.4655210971832275, + "logps/rejected": -1.6593421697616577, + "loss": 1.7861, + "nll_loss": 1.717559576034546, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14655211567878723, + "rewards/margins": 0.01938212849199772, + "rewards/rejected": -0.1659342348575592, + "step": 110 + }, + { + "epoch": 0.02, + "grad_norm": 0.9542021155357361, + "learning_rate": 7.988353472121123e-06, + "log_odds_chosen": 0.30552104115486145, + "log_odds_ratio": -0.6328169703483582, + "logits/chosen": -0.4570741653442383, + "logits/rejected": -0.4447706341743469, + "logps/chosen": -1.468922734260559, + "logps/rejected": -1.714000940322876, + "loss": 1.7333, + "nll_loss": 1.6700189113616943, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.14689227938652039, + "rewards/margins": 0.024507839232683182, + "rewards/rejected": -0.17140009999275208, + "step": 120 + }, + { + "epoch": 0.02, + "grad_norm": 0.7623200416564941, + "learning_rate": 7.982530208181685e-06, + "log_odds_chosen": 0.26080387830734253, + "log_odds_ratio": -0.701958954334259, + "logits/chosen": -0.38970547914505005, + "logits/rejected": -0.4110942780971527, + "logps/chosen": -1.3950055837631226, + "logps/rejected": -1.5955398082733154, + "loss": 1.6744, + "nll_loss": 1.6042404174804688, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13950055837631226, + "rewards/margins": 0.020053423941135406, + "rewards/rejected": -0.15955397486686707, + "step": 130 + }, + { + "epoch": 0.03, + "grad_norm": 0.7866131663322449, + "learning_rate": 7.976706944242247e-06, + "log_odds_chosen": 0.366484135389328, + "log_odds_ratio": -0.6378077268600464, + "logits/chosen": -0.40181222558021545, + "logits/rejected": -0.4499889314174652, + "logps/chosen": -1.3115607500076294, + "logps/rejected": -1.6199172735214233, + "loss": 1.5161, + "nll_loss": 1.4522874355316162, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1311560571193695, + "rewards/margins": 0.030835667625069618, + "rewards/rejected": -0.16199173033237457, + "step": 140 + }, + { + "epoch": 0.03, + "grad_norm": 1.6389062404632568, + "learning_rate": 7.97088368030281e-06, + "log_odds_chosen": 0.24471768736839294, + "log_odds_ratio": -0.6693255305290222, + "logits/chosen": -0.4537169933319092, + "logits/rejected": -0.48922696709632874, + "logps/chosen": -1.465497612953186, + "logps/rejected": -1.6574163436889648, + "loss": 1.5532, + "nll_loss": 1.4862229824066162, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1465497761964798, + "rewards/margins": 0.019191861152648926, + "rewards/rejected": -0.16574163734912872, + "step": 150 + }, + { + "epoch": 0.03, + "grad_norm": 1.1604678630828857, + "learning_rate": 7.965060416363372e-06, + "log_odds_chosen": 0.35011082887649536, + "log_odds_ratio": -0.6021308898925781, + "logits/chosen": -0.4541899263858795, + "logits/rejected": -0.4512806832790375, + "logps/chosen": -1.4302852153778076, + "logps/rejected": -1.7131397724151611, + "loss": 1.5223, + "nll_loss": 1.462041974067688, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14302849769592285, + "rewards/margins": 0.028285473585128784, + "rewards/rejected": -0.17131397128105164, + "step": 160 + }, + { + "epoch": 0.03, + "grad_norm": 1.774051547050476, + "learning_rate": 7.959237152423934e-06, + "log_odds_chosen": 0.21281366050243378, + "log_odds_ratio": -0.662868082523346, + "logits/chosen": -0.4206954836845398, + "logits/rejected": -0.4131297171115875, + "logps/chosen": -1.4108431339263916, + "logps/rejected": -1.5811221599578857, + "loss": 1.4785, + "nll_loss": 1.4122225046157837, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.14108431339263916, + "rewards/margins": 0.017027903348207474, + "rewards/rejected": -0.15811221301555634, + "step": 170 + }, + { + "epoch": 0.03, + "grad_norm": 1.3976486921310425, + "learning_rate": 7.953413888484495e-06, + "log_odds_chosen": 0.3351721167564392, + "log_odds_ratio": -0.6082215309143066, + "logits/chosen": -0.4032515585422516, + "logits/rejected": -0.3882203996181488, + "logps/chosen": -1.3810927867889404, + "logps/rejected": -1.6092808246612549, + "loss": 1.2855, + "nll_loss": 1.2246606349945068, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.13810928165912628, + "rewards/margins": 0.022818809375166893, + "rewards/rejected": -0.16092810034751892, + "step": 180 + }, + { + "epoch": 0.03, + "grad_norm": 0.935833215713501, + "learning_rate": 7.947590624545057e-06, + "log_odds_chosen": 0.3752503991127014, + "log_odds_ratio": -0.5996502041816711, + "logits/chosen": -0.39611080288887024, + "logits/rejected": -0.4244672656059265, + "logps/chosen": -1.145514726638794, + "logps/rejected": -1.4192689657211304, + "loss": 1.3198, + "nll_loss": 1.2598202228546143, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11455146223306656, + "rewards/margins": 0.027375441044569016, + "rewards/rejected": -0.14192691445350647, + "step": 190 + }, + { + "epoch": 0.04, + "grad_norm": 2.048588752746582, + "learning_rate": 7.94176736060562e-06, + "log_odds_chosen": 0.14159588515758514, + "log_odds_ratio": -0.6899443864822388, + "logits/chosen": -0.42449721693992615, + "logits/rejected": -0.44452494382858276, + "logps/chosen": -1.3461925983428955, + "logps/rejected": -1.454458236694336, + "loss": 1.3718, + "nll_loss": 1.3028428554534912, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.13461926579475403, + "rewards/margins": 0.01082657091319561, + "rewards/rejected": -0.1454458385705948, + "step": 200 + }, + { + "epoch": 0.04, + "grad_norm": 1.715198278427124, + "learning_rate": 7.935944096666182e-06, + "log_odds_chosen": 0.39089664816856384, + "log_odds_ratio": -0.5823230743408203, + "logits/chosen": -0.4162190556526184, + "logits/rejected": -0.3965161442756653, + "logps/chosen": -1.270228624343872, + "logps/rejected": -1.5800155401229858, + "loss": 1.3012, + "nll_loss": 1.2429354190826416, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1270228922367096, + "rewards/margins": 0.030978679656982422, + "rewards/rejected": -0.15800157189369202, + "step": 210 + }, + { + "epoch": 0.04, + "grad_norm": 0.9870737791061401, + "learning_rate": 7.930120832726742e-06, + "log_odds_chosen": 0.2738127112388611, + "log_odds_ratio": -0.6368094086647034, + "logits/chosen": -0.3498522639274597, + "logits/rejected": -0.3681924343109131, + "logps/chosen": -1.1670268774032593, + "logps/rejected": -1.351577877998352, + "loss": 1.2731, + "nll_loss": 1.2094577550888062, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11670269817113876, + "rewards/margins": 0.01845509558916092, + "rewards/rejected": -0.13515779376029968, + "step": 220 + }, + { + "epoch": 0.04, + "grad_norm": 4.029344081878662, + "learning_rate": 7.924297568787305e-06, + "log_odds_chosen": 0.22043490409851074, + "log_odds_ratio": -0.6865091919898987, + "logits/chosen": -0.37322139739990234, + "logits/rejected": -0.3585105836391449, + "logps/chosen": -1.3887712955474854, + "logps/rejected": -1.5607054233551025, + "loss": 1.3353, + "nll_loss": 1.2666887044906616, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.13887712359428406, + "rewards/margins": 0.017193417996168137, + "rewards/rejected": -0.1560705453157425, + "step": 230 + }, + { + "epoch": 0.04, + "grad_norm": 0.7768957614898682, + "learning_rate": 7.918474304847867e-06, + "log_odds_chosen": 0.19368572533130646, + "log_odds_ratio": -0.6984459757804871, + "logits/chosen": -0.36836880445480347, + "logits/rejected": -0.3484199643135071, + "logps/chosen": -1.3645991086959839, + "logps/rejected": -1.5094786882400513, + "loss": 1.2396, + "nll_loss": 1.1697386503219604, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.13645990192890167, + "rewards/margins": 0.014487968757748604, + "rewards/rejected": -0.15094786882400513, + "step": 240 + }, + { + "epoch": 0.05, + "grad_norm": 0.8741191029548645, + "learning_rate": 7.91265104090843e-06, + "log_odds_chosen": 0.3740343749523163, + "log_odds_ratio": -0.5765000581741333, + "logits/chosen": -0.36863988637924194, + "logits/rejected": -0.3575906753540039, + "logps/chosen": -1.0825830698013306, + "logps/rejected": -1.3362675905227661, + "loss": 1.2257, + "nll_loss": 1.1680984497070312, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10825830698013306, + "rewards/margins": 0.0253684613853693, + "rewards/rejected": -0.1336267590522766, + "step": 250 + }, + { + "epoch": 0.05, + "grad_norm": 0.968717634677887, + "learning_rate": 7.906827776968992e-06, + "log_odds_chosen": 0.24148449301719666, + "log_odds_ratio": -0.6510211229324341, + "logits/chosen": -0.36772042512893677, + "logits/rejected": -0.3647049367427826, + "logps/chosen": -1.219939947128296, + "logps/rejected": -1.3963510990142822, + "loss": 1.2792, + "nll_loss": 1.214083194732666, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12199399620294571, + "rewards/margins": 0.017641115933656693, + "rewards/rejected": -0.1396351158618927, + "step": 260 + }, + { + "epoch": 0.05, + "grad_norm": 0.8676772117614746, + "learning_rate": 7.901004513029554e-06, + "log_odds_chosen": 0.2663150727748871, + "log_odds_ratio": -0.6641027331352234, + "logits/chosen": -0.3884763717651367, + "logits/rejected": -0.38966697454452515, + "logps/chosen": -1.3800342082977295, + "logps/rejected": -1.598384141921997, + "loss": 1.3198, + "nll_loss": 1.253383994102478, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.138003408908844, + "rewards/margins": 0.021835003048181534, + "rewards/rejected": -0.15983840823173523, + "step": 270 + }, + { + "epoch": 0.05, + "grad_norm": 1.793342113494873, + "learning_rate": 7.895181249090114e-06, + "log_odds_chosen": 0.22694933414459229, + "log_odds_ratio": -0.6462079286575317, + "logits/chosen": -0.3369145393371582, + "logits/rejected": -0.34541624784469604, + "logps/chosen": -1.1592384576797485, + "logps/rejected": -1.309792399406433, + "loss": 1.2404, + "nll_loss": 1.175813913345337, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11592384427785873, + "rewards/margins": 0.015055393800139427, + "rewards/rejected": -0.1309792548418045, + "step": 280 + }, + { + "epoch": 0.05, + "grad_norm": 0.962464451789856, + "learning_rate": 7.889357985150677e-06, + "log_odds_chosen": 0.2656427323818207, + "log_odds_ratio": -0.6383371949195862, + "logits/chosen": -0.28534650802612305, + "logits/rejected": -0.2924650311470032, + "logps/chosen": -1.1702284812927246, + "logps/rejected": -1.358701229095459, + "loss": 1.2384, + "nll_loss": 1.1746160984039307, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.11702284961938858, + "rewards/margins": 0.018847281113266945, + "rewards/rejected": -0.13587012887001038, + "step": 290 + }, + { + "epoch": 0.05, + "grad_norm": 0.5939451456069946, + "learning_rate": 7.883534721211239e-06, + "log_odds_chosen": 0.38102689385414124, + "log_odds_ratio": -0.6059800386428833, + "logits/chosen": -0.3350544273853302, + "logits/rejected": -0.312080442905426, + "logps/chosen": -1.1126482486724854, + "logps/rejected": -1.4021085500717163, + "loss": 1.1435, + "nll_loss": 1.0828807353973389, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.11126482486724854, + "rewards/margins": 0.02894604206085205, + "rewards/rejected": -0.14021086692810059, + "step": 300 + }, + { + "epoch": 0.06, + "grad_norm": 0.7120574712753296, + "learning_rate": 7.8777114572718e-06, + "log_odds_chosen": 0.2518892288208008, + "log_odds_ratio": -0.6437832713127136, + "logits/chosen": -0.42240291833877563, + "logits/rejected": -0.41587719321250916, + "logps/chosen": -1.2639614343643188, + "logps/rejected": -1.4360151290893555, + "loss": 1.3117, + "nll_loss": 1.2472755908966064, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12639614939689636, + "rewards/margins": 0.0172053761780262, + "rewards/rejected": -0.14360150694847107, + "step": 310 + }, + { + "epoch": 0.06, + "grad_norm": 1.4585373401641846, + "learning_rate": 7.871888193332362e-06, + "log_odds_chosen": 0.36516430974006653, + "log_odds_ratio": -0.5913447141647339, + "logits/chosen": -0.3302076756954193, + "logits/rejected": -0.3184494376182556, + "logps/chosen": -1.1804471015930176, + "logps/rejected": -1.4232193231582642, + "loss": 1.1885, + "nll_loss": 1.1293482780456543, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.11804471164941788, + "rewards/margins": 0.024277225136756897, + "rewards/rejected": -0.14232194423675537, + "step": 320 + }, + { + "epoch": 0.06, + "grad_norm": 0.9858699440956116, + "learning_rate": 7.866064929392924e-06, + "log_odds_chosen": 0.21814580261707306, + "log_odds_ratio": -0.6653276681900024, + "logits/chosen": -0.3807678818702698, + "logits/rejected": -0.3492942452430725, + "logps/chosen": -1.3254904747009277, + "logps/rejected": -1.4859329462051392, + "loss": 1.2597, + "nll_loss": 1.1931736469268799, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.13254904747009277, + "rewards/margins": 0.01604425348341465, + "rewards/rejected": -0.14859329164028168, + "step": 330 + }, + { + "epoch": 0.06, + "grad_norm": 1.0847293138504028, + "learning_rate": 7.860241665453487e-06, + "log_odds_chosen": 0.27122896909713745, + "log_odds_ratio": -0.6263772249221802, + "logits/chosen": -0.3252618610858917, + "logits/rejected": -0.3240824341773987, + "logps/chosen": -1.2787262201309204, + "logps/rejected": -1.471504807472229, + "loss": 1.2253, + "nll_loss": 1.1626585721969604, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.12787261605262756, + "rewards/margins": 0.019277850165963173, + "rewards/rejected": -0.14715047180652618, + "step": 340 + }, + { + "epoch": 0.06, + "grad_norm": 0.9820899963378906, + "learning_rate": 7.854418401514049e-06, + "log_odds_chosen": 0.3788866698741913, + "log_odds_ratio": -0.6247309446334839, + "logits/chosen": -0.3561337888240814, + "logits/rejected": -0.35025572776794434, + "logps/chosen": -1.1670924425125122, + "logps/rejected": -1.4341050386428833, + "loss": 1.2023, + "nll_loss": 1.1397864818572998, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11670924723148346, + "rewards/margins": 0.026701247319579124, + "rewards/rejected": -0.14341048896312714, + "step": 350 + }, + { + "epoch": 0.07, + "grad_norm": 2.0867435932159424, + "learning_rate": 7.848595137574611e-06, + "log_odds_chosen": 0.23402830958366394, + "log_odds_ratio": -0.6430379748344421, + "logits/chosen": -0.341842383146286, + "logits/rejected": -0.3549098074436188, + "logps/chosen": -1.2200753688812256, + "logps/rejected": -1.378993034362793, + "loss": 1.2236, + "nll_loss": 1.159274697303772, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.12200756371021271, + "rewards/margins": 0.015891747549176216, + "rewards/rejected": -0.13789930939674377, + "step": 360 + }, + { + "epoch": 0.07, + "grad_norm": 0.8319967985153198, + "learning_rate": 7.842771873635172e-06, + "log_odds_chosen": 0.3978227376937866, + "log_odds_ratio": -0.5828840136528015, + "logits/chosen": -0.3248536288738251, + "logits/rejected": -0.3298476040363312, + "logps/chosen": -1.1620112657546997, + "logps/rejected": -1.4452251195907593, + "loss": 1.2021, + "nll_loss": 1.1437865495681763, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.11620111763477325, + "rewards/margins": 0.02832140401005745, + "rewards/rejected": -0.1445225179195404, + "step": 370 + }, + { + "epoch": 0.07, + "grad_norm": 0.7204629778862, + "learning_rate": 7.836948609695734e-06, + "log_odds_chosen": 0.3162148594856262, + "log_odds_ratio": -0.5983591079711914, + "logits/chosen": -0.23571312427520752, + "logits/rejected": -0.26293158531188965, + "logps/chosen": -1.100656509399414, + "logps/rejected": -1.3071973323822021, + "loss": 1.1738, + "nll_loss": 1.1139934062957764, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11006565392017365, + "rewards/margins": 0.020654071122407913, + "rewards/rejected": -0.13071972131729126, + "step": 380 + }, + { + "epoch": 0.07, + "grad_norm": 1.169068455696106, + "learning_rate": 7.831125345756296e-06, + "log_odds_chosen": 0.19375436007976532, + "log_odds_ratio": -0.6605286598205566, + "logits/chosen": -0.25121766328811646, + "logits/rejected": -0.2934662997722626, + "logps/chosen": -1.0589444637298584, + "logps/rejected": -1.1631048917770386, + "loss": 1.1408, + "nll_loss": 1.0747069120407104, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10589446127414703, + "rewards/margins": 0.010416034609079361, + "rewards/rejected": -0.1163104772567749, + "step": 390 + }, + { + "epoch": 0.07, + "grad_norm": 0.5295692682266235, + "learning_rate": 7.825302081816857e-06, + "log_odds_chosen": 0.22412052750587463, + "log_odds_ratio": -0.6809746623039246, + "logits/chosen": -0.29396966099739075, + "logits/rejected": -0.299424946308136, + "logps/chosen": -1.1099778413772583, + "logps/rejected": -1.2693623304367065, + "loss": 1.1806, + "nll_loss": 1.1125379800796509, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.110997773706913, + "rewards/margins": 0.015938464552164078, + "rewards/rejected": -0.12693624198436737, + "step": 400 + }, + { + "epoch": 0.07, + "grad_norm": 1.4080840349197388, + "learning_rate": 7.81947881787742e-06, + "log_odds_chosen": 0.42054247856140137, + "log_odds_ratio": -0.5622063875198364, + "logits/chosen": -0.30937671661376953, + "logits/rejected": -0.3293423056602478, + "logps/chosen": -1.262446641921997, + "logps/rejected": -1.5442469120025635, + "loss": 1.2273, + "nll_loss": 1.1710504293441772, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.1262446492910385, + "rewards/margins": 0.028180036693811417, + "rewards/rejected": -0.15442468225955963, + "step": 410 + }, + { + "epoch": 0.08, + "grad_norm": 2.086719274520874, + "learning_rate": 7.813655553937982e-06, + "log_odds_chosen": 0.4324397146701813, + "log_odds_ratio": -0.5911771059036255, + "logits/chosen": -0.2944505512714386, + "logits/rejected": -0.330447793006897, + "logps/chosen": -1.2822068929672241, + "logps/rejected": -1.585065484046936, + "loss": 1.2142, + "nll_loss": 1.1550967693328857, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12822069227695465, + "rewards/margins": 0.03028585948050022, + "rewards/rejected": -0.15850654244422913, + "step": 420 + }, + { + "epoch": 0.08, + "grad_norm": 0.6857010722160339, + "learning_rate": 7.807832289998544e-06, + "log_odds_chosen": 0.3362474739551544, + "log_odds_ratio": -0.5960213541984558, + "logits/chosen": -0.24630114436149597, + "logits/rejected": -0.24887843430042267, + "logps/chosen": -1.0708539485931396, + "logps/rejected": -1.299224615097046, + "loss": 1.1308, + "nll_loss": 1.0712475776672363, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10708538442850113, + "rewards/margins": 0.02283708192408085, + "rewards/rejected": -0.12992244958877563, + "step": 430 + }, + { + "epoch": 0.08, + "grad_norm": 0.9802748560905457, + "learning_rate": 7.802009026059106e-06, + "log_odds_chosen": 0.20174989104270935, + "log_odds_ratio": -0.6817139387130737, + "logits/chosen": -0.3603675663471222, + "logits/rejected": -0.33373549580574036, + "logps/chosen": -1.2318315505981445, + "logps/rejected": -1.3654711246490479, + "loss": 1.2213, + "nll_loss": 1.153092384338379, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.12318315356969833, + "rewards/margins": 0.013363957405090332, + "rewards/rejected": -0.13654711842536926, + "step": 440 + }, + { + "epoch": 0.08, + "grad_norm": 0.811257004737854, + "learning_rate": 7.796185762119669e-06, + "log_odds_chosen": 0.308106929063797, + "log_odds_ratio": -0.6701749563217163, + "logits/chosen": -0.3041822612285614, + "logits/rejected": -0.32910075783729553, + "logps/chosen": -1.2259390354156494, + "logps/rejected": -1.4316823482513428, + "loss": 1.2089, + "nll_loss": 1.1418609619140625, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12259390205144882, + "rewards/margins": 0.02057434618473053, + "rewards/rejected": -0.14316824078559875, + "step": 450 + }, + { + "epoch": 0.08, + "grad_norm": 1.186498761177063, + "learning_rate": 7.79036249818023e-06, + "log_odds_chosen": 0.3135170340538025, + "log_odds_ratio": -0.6137998700141907, + "logits/chosen": -0.2752595543861389, + "logits/rejected": -0.31168991327285767, + "logps/chosen": -1.1841676235198975, + "logps/rejected": -1.3871896266937256, + "loss": 1.2211, + "nll_loss": 1.1597373485565186, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.11841676384210587, + "rewards/margins": 0.02030220814049244, + "rewards/rejected": -0.13871899247169495, + "step": 460 + }, + { + "epoch": 0.08, + "grad_norm": 0.4524146020412445, + "learning_rate": 7.784539234240792e-06, + "log_odds_chosen": 0.5339788198471069, + "log_odds_ratio": -0.55875563621521, + "logits/chosen": -0.31525543332099915, + "logits/rejected": -0.3321399390697479, + "logps/chosen": -1.1199116706848145, + "logps/rejected": -1.5095632076263428, + "loss": 1.1568, + "nll_loss": 1.1008961200714111, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11199116706848145, + "rewards/margins": 0.03896515816450119, + "rewards/rejected": -0.15095631778240204, + "step": 470 + }, + { + "epoch": 0.09, + "grad_norm": 1.2673509120941162, + "learning_rate": 7.778715970301354e-06, + "log_odds_chosen": 0.4090334475040436, + "log_odds_ratio": -0.6181383728981018, + "logits/chosen": -0.3052888512611389, + "logits/rejected": -0.32204440236091614, + "logps/chosen": -1.1738417148590088, + "logps/rejected": -1.483721137046814, + "loss": 1.1545, + "nll_loss": 1.092673420906067, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.1173841580748558, + "rewards/margins": 0.03098795935511589, + "rewards/rejected": -0.1483720988035202, + "step": 480 + }, + { + "epoch": 0.09, + "grad_norm": 1.0262770652770996, + "learning_rate": 7.772892706361916e-06, + "log_odds_chosen": 0.3209136426448822, + "log_odds_ratio": -0.6104674339294434, + "logits/chosen": -0.30723345279693604, + "logits/rejected": -0.3061657249927521, + "logps/chosen": -1.203070044517517, + "logps/rejected": -1.403160572052002, + "loss": 1.1315, + "nll_loss": 1.0704646110534668, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.12030700594186783, + "rewards/margins": 0.02000904455780983, + "rewards/rejected": -0.14031605422496796, + "step": 490 + }, + { + "epoch": 0.09, + "grad_norm": 0.8104180097579956, + "learning_rate": 7.767069442422477e-06, + "log_odds_chosen": 0.3832394480705261, + "log_odds_ratio": -0.5944157838821411, + "logits/chosen": -0.29042813181877136, + "logits/rejected": -0.3143666684627533, + "logps/chosen": -1.1808339357376099, + "logps/rejected": -1.447609543800354, + "loss": 1.1704, + "nll_loss": 1.110947608947754, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1180833950638771, + "rewards/margins": 0.026677558198571205, + "rewards/rejected": -0.14476095139980316, + "step": 500 + }, + { + "epoch": 0.09, + "grad_norm": 0.7807692885398865, + "learning_rate": 7.761246178483039e-06, + "log_odds_chosen": 0.2518574893474579, + "log_odds_ratio": -0.6599031090736389, + "logits/chosen": -0.21843962371349335, + "logits/rejected": -0.2486545592546463, + "logps/chosen": -1.1892979145050049, + "logps/rejected": -1.3361047506332397, + "loss": 1.1173, + "nll_loss": 1.0513516664505005, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.11892978847026825, + "rewards/margins": 0.014680701307952404, + "rewards/rejected": -0.13361048698425293, + "step": 510 + }, + { + "epoch": 0.09, + "grad_norm": 0.7994462847709656, + "learning_rate": 7.755422914543601e-06, + "log_odds_chosen": 0.3900667428970337, + "log_odds_ratio": -0.5982797741889954, + "logits/chosen": -0.19965076446533203, + "logits/rejected": -0.2556760013103485, + "logps/chosen": -1.0754297971725464, + "logps/rejected": -1.3169327974319458, + "loss": 1.1191, + "nll_loss": 1.0592612028121948, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1075429767370224, + "rewards/margins": 0.024150308221578598, + "rewards/rejected": -0.1316932737827301, + "step": 520 + }, + { + "epoch": 0.1, + "grad_norm": 1.0434073209762573, + "learning_rate": 7.749599650604164e-06, + "log_odds_chosen": 0.30178144574165344, + "log_odds_ratio": -0.6079939007759094, + "logits/chosen": -0.23979106545448303, + "logits/rejected": -0.2712782025337219, + "logps/chosen": -1.1630055904388428, + "logps/rejected": -1.3505280017852783, + "loss": 1.1351, + "nll_loss": 1.0743471384048462, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1163005605340004, + "rewards/margins": 0.01875222660601139, + "rewards/rejected": -0.13505280017852783, + "step": 530 + }, + { + "epoch": 0.1, + "grad_norm": 0.9403761029243469, + "learning_rate": 7.743776386664726e-06, + "log_odds_chosen": 0.3626457452774048, + "log_odds_ratio": -0.6064115762710571, + "logits/chosen": -0.2964823842048645, + "logits/rejected": -0.3309454917907715, + "logps/chosen": -1.1040284633636475, + "logps/rejected": -1.3232171535491943, + "loss": 1.0933, + "nll_loss": 1.0326130390167236, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.11040283739566803, + "rewards/margins": 0.021918874233961105, + "rewards/rejected": -0.13232171535491943, + "step": 540 + }, + { + "epoch": 0.1, + "grad_norm": 1.0006709098815918, + "learning_rate": 7.737953122725287e-06, + "log_odds_chosen": 0.38954734802246094, + "log_odds_ratio": -0.5961380004882812, + "logits/chosen": -0.26212555170059204, + "logits/rejected": -0.25596773624420166, + "logps/chosen": -1.1498854160308838, + "logps/rejected": -1.4123402833938599, + "loss": 1.1369, + "nll_loss": 1.0772807598114014, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.1149885281920433, + "rewards/margins": 0.0262454841285944, + "rewards/rejected": -0.14123402535915375, + "step": 550 + }, + { + "epoch": 0.1, + "grad_norm": 1.4980974197387695, + "learning_rate": 7.732129858785849e-06, + "log_odds_chosen": 0.5412741899490356, + "log_odds_ratio": -0.5501648187637329, + "logits/chosen": -0.26225027441978455, + "logits/rejected": -0.2663401961326599, + "logps/chosen": -1.0512454509735107, + "logps/rejected": -1.4261060953140259, + "loss": 1.1554, + "nll_loss": 1.1004068851470947, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10512454807758331, + "rewards/margins": 0.037486057728528976, + "rewards/rejected": -0.14261062443256378, + "step": 560 + }, + { + "epoch": 0.1, + "grad_norm": 1.3576551675796509, + "learning_rate": 7.726306594846411e-06, + "log_odds_chosen": 0.43464937806129456, + "log_odds_ratio": -0.6025624871253967, + "logits/chosen": -0.20509573817253113, + "logits/rejected": -0.25421127676963806, + "logps/chosen": -1.104231357574463, + "logps/rejected": -1.3879539966583252, + "loss": 1.1005, + "nll_loss": 1.0402498245239258, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.11042313277721405, + "rewards/margins": 0.028372278437018394, + "rewards/rejected": -0.1387954205274582, + "step": 570 + }, + { + "epoch": 0.1, + "grad_norm": 1.533570408821106, + "learning_rate": 7.720483330906974e-06, + "log_odds_chosen": 0.399638831615448, + "log_odds_ratio": -0.586500883102417, + "logits/chosen": -0.3640395402908325, + "logits/rejected": -0.3354605734348297, + "logps/chosen": -1.087601900100708, + "logps/rejected": -1.341512680053711, + "loss": 1.1592, + "nll_loss": 1.100534200668335, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10876019299030304, + "rewards/margins": 0.025391090661287308, + "rewards/rejected": -0.13415126502513885, + "step": 580 + }, + { + "epoch": 0.11, + "grad_norm": 0.598721981048584, + "learning_rate": 7.714660066967534e-06, + "log_odds_chosen": 0.477273166179657, + "log_odds_ratio": -0.5699985027313232, + "logits/chosen": -0.2669471800327301, + "logits/rejected": -0.2903423011302948, + "logps/chosen": -1.0827006101608276, + "logps/rejected": -1.398123025894165, + "loss": 1.1075, + "nll_loss": 1.0505017042160034, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1082700714468956, + "rewards/margins": 0.031542230397462845, + "rewards/rejected": -0.13981230556964874, + "step": 590 + }, + { + "epoch": 0.11, + "grad_norm": 0.9932595491409302, + "learning_rate": 7.708836803028096e-06, + "log_odds_chosen": 0.3443313539028168, + "log_odds_ratio": -0.61859130859375, + "logits/chosen": -0.2856084704399109, + "logits/rejected": -0.3268492817878723, + "logps/chosen": -1.112980604171753, + "logps/rejected": -1.3373619318008423, + "loss": 1.1029, + "nll_loss": 1.0410178899765015, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.11129806190729141, + "rewards/margins": 0.02243814989924431, + "rewards/rejected": -0.13373620808124542, + "step": 600 + }, + { + "epoch": 0.11, + "grad_norm": 1.115981936454773, + "learning_rate": 7.703013539088659e-06, + "log_odds_chosen": 0.46272382140159607, + "log_odds_ratio": -0.5593470931053162, + "logits/chosen": -0.24541564285755157, + "logits/rejected": -0.26886048913002014, + "logps/chosen": -1.2253077030181885, + "logps/rejected": -1.542391061782837, + "loss": 1.0993, + "nll_loss": 1.0433661937713623, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12253077328205109, + "rewards/margins": 0.031708355993032455, + "rewards/rejected": -0.15423911809921265, + "step": 610 + }, + { + "epoch": 0.11, + "grad_norm": 1.0807980298995972, + "learning_rate": 7.697190275149221e-06, + "log_odds_chosen": 0.40578898787498474, + "log_odds_ratio": -0.5844072103500366, + "logits/chosen": -0.26643887162208557, + "logits/rejected": -0.2956678867340088, + "logps/chosen": -1.0739221572875977, + "logps/rejected": -1.3498246669769287, + "loss": 1.0983, + "nll_loss": 1.0398545265197754, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10739222913980484, + "rewards/margins": 0.027590245008468628, + "rewards/rejected": -0.13498248159885406, + "step": 620 + }, + { + "epoch": 0.11, + "grad_norm": 1.0824393033981323, + "learning_rate": 7.691367011209783e-06, + "log_odds_chosen": 0.2399287223815918, + "log_odds_ratio": -0.6483467817306519, + "logits/chosen": -0.23387715220451355, + "logits/rejected": -0.2537880837917328, + "logps/chosen": -1.0878570079803467, + "logps/rejected": -1.2441601753234863, + "loss": 1.1431, + "nll_loss": 1.0782992839813232, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.1087857261300087, + "rewards/margins": 0.015630314126610756, + "rewards/rejected": -0.12441603094339371, + "step": 630 + }, + { + "epoch": 0.12, + "grad_norm": 1.312685489654541, + "learning_rate": 7.685543747270344e-06, + "log_odds_chosen": 0.2941173017024994, + "log_odds_ratio": -0.6299890279769897, + "logits/chosen": -0.23893220722675323, + "logits/rejected": -0.26458939909935, + "logps/chosen": -1.0712026357650757, + "logps/rejected": -1.277134656906128, + "loss": 1.0244, + "nll_loss": 0.9613849520683289, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10712026059627533, + "rewards/margins": 0.020593199878931046, + "rewards/rejected": -0.12771347165107727, + "step": 640 + }, + { + "epoch": 0.12, + "grad_norm": 0.5005902051925659, + "learning_rate": 7.679720483330906e-06, + "log_odds_chosen": 0.28637003898620605, + "log_odds_ratio": -0.6562920808792114, + "logits/chosen": -0.32920941710472107, + "logits/rejected": -0.3482641875743866, + "logps/chosen": -1.2317559719085693, + "logps/rejected": -1.4107776880264282, + "loss": 1.1082, + "nll_loss": 1.0425524711608887, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.12317559868097305, + "rewards/margins": 0.01790216937661171, + "rewards/rejected": -0.14107775688171387, + "step": 650 + }, + { + "epoch": 0.12, + "grad_norm": 0.7764565348625183, + "learning_rate": 7.673897219391469e-06, + "log_odds_chosen": 0.5228947997093201, + "log_odds_ratio": -0.5362275838851929, + "logits/chosen": -0.25872281193733215, + "logits/rejected": -0.3187440037727356, + "logps/chosen": -1.0902519226074219, + "logps/rejected": -1.4310792684555054, + "loss": 1.0802, + "nll_loss": 1.0265672206878662, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10902519524097443, + "rewards/margins": 0.03408272564411163, + "rewards/rejected": -0.14310793578624725, + "step": 660 + }, + { + "epoch": 0.12, + "grad_norm": 1.2186826467514038, + "learning_rate": 7.668073955452031e-06, + "log_odds_chosen": 0.24523350596427917, + "log_odds_ratio": -0.6810539960861206, + "logits/chosen": -0.3111647963523865, + "logits/rejected": -0.3507189452648163, + "logps/chosen": -1.2376306056976318, + "logps/rejected": -1.3808842897415161, + "loss": 1.1803, + "nll_loss": 1.112222671508789, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.12376304715871811, + "rewards/margins": 0.014325378462672234, + "rewards/rejected": -0.1380884349346161, + "step": 670 + }, + { + "epoch": 0.12, + "grad_norm": 1.0902864933013916, + "learning_rate": 7.662250691512593e-06, + "log_odds_chosen": 0.4521639347076416, + "log_odds_ratio": -0.604234516620636, + "logits/chosen": -0.315155029296875, + "logits/rejected": -0.34290218353271484, + "logps/chosen": -1.1858965158462524, + "logps/rejected": -1.4938578605651855, + "loss": 1.1126, + "nll_loss": 1.0521882772445679, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.11858963966369629, + "rewards/margins": 0.030796144157648087, + "rewards/rejected": -0.14938578009605408, + "step": 680 + }, + { + "epoch": 0.12, + "grad_norm": 1.0857912302017212, + "learning_rate": 7.656427427573154e-06, + "log_odds_chosen": 0.35988926887512207, + "log_odds_ratio": -0.6070387959480286, + "logits/chosen": -0.30350548028945923, + "logits/rejected": -0.3145049810409546, + "logps/chosen": -1.1082563400268555, + "logps/rejected": -1.3548405170440674, + "loss": 1.1194, + "nll_loss": 1.058680772781372, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.11082563549280167, + "rewards/margins": 0.024658426642417908, + "rewards/rejected": -0.13548406958580017, + "step": 690 + }, + { + "epoch": 0.13, + "grad_norm": 1.2661492824554443, + "learning_rate": 7.650604163633716e-06, + "log_odds_chosen": 0.3374677300453186, + "log_odds_ratio": -0.6508690714836121, + "logits/chosen": -0.3213859498500824, + "logits/rejected": -0.31398090720176697, + "logps/chosen": -1.2048908472061157, + "logps/rejected": -1.44016695022583, + "loss": 1.2272, + "nll_loss": 1.162106990814209, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.12048908323049545, + "rewards/margins": 0.023527618497610092, + "rewards/rejected": -0.14401671290397644, + "step": 700 + }, + { + "epoch": 0.13, + "grad_norm": 0.8489270210266113, + "learning_rate": 7.644780899694279e-06, + "log_odds_chosen": 0.4868837893009186, + "log_odds_ratio": -0.569444477558136, + "logits/chosen": -0.2966112196445465, + "logits/rejected": -0.30416035652160645, + "logps/chosen": -1.1295454502105713, + "logps/rejected": -1.4607059955596924, + "loss": 1.1246, + "nll_loss": 1.067684531211853, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.11295454204082489, + "rewards/margins": 0.03311605006456375, + "rewards/rejected": -0.14607058465480804, + "step": 710 + }, + { + "epoch": 0.13, + "grad_norm": 1.3026355504989624, + "learning_rate": 7.63895763575484e-06, + "log_odds_chosen": 0.344705194234848, + "log_odds_ratio": -0.6008533239364624, + "logits/chosen": -0.32720333337783813, + "logits/rejected": -0.34578755497932434, + "logps/chosen": -1.1115270853042603, + "logps/rejected": -1.3362066745758057, + "loss": 1.1358, + "nll_loss": 1.0757068395614624, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.11115269362926483, + "rewards/margins": 0.02246798202395439, + "rewards/rejected": -0.13362067937850952, + "step": 720 + }, + { + "epoch": 0.13, + "grad_norm": 1.5339288711547852, + "learning_rate": 7.633134371815401e-06, + "log_odds_chosen": 0.3415408134460449, + "log_odds_ratio": -0.6154005527496338, + "logits/chosen": -0.3425058424472809, + "logits/rejected": -0.33901968598365784, + "logps/chosen": -1.138835072517395, + "logps/rejected": -1.3690025806427002, + "loss": 1.1943, + "nll_loss": 1.1327401399612427, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11388351023197174, + "rewards/margins": 0.02301674149930477, + "rewards/rejected": -0.13690023124217987, + "step": 730 + }, + { + "epoch": 0.13, + "grad_norm": 1.1949843168258667, + "learning_rate": 7.627311107875965e-06, + "log_odds_chosen": 0.2753564715385437, + "log_odds_ratio": -0.6625062227249146, + "logits/chosen": -0.29811739921569824, + "logits/rejected": -0.3414981961250305, + "logps/chosen": -1.081146001815796, + "logps/rejected": -1.2424064874649048, + "loss": 1.1431, + "nll_loss": 1.0768808126449585, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10811461508274078, + "rewards/margins": 0.016126038506627083, + "rewards/rejected": -0.12424063682556152, + "step": 740 + }, + { + "epoch": 0.14, + "grad_norm": 3.6740400791168213, + "learning_rate": 7.621487843936526e-06, + "log_odds_chosen": 0.516070544719696, + "log_odds_ratio": -0.5642696619033813, + "logits/chosen": -0.29928848147392273, + "logits/rejected": -0.35982462763786316, + "logps/chosen": -1.093741774559021, + "logps/rejected": -1.4399880170822144, + "loss": 1.075, + "nll_loss": 1.0185630321502686, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10937418788671494, + "rewards/margins": 0.034624624997377396, + "rewards/rejected": -0.14399881660938263, + "step": 750 + }, + { + "epoch": 0.14, + "grad_norm": 1.1976426839828491, + "learning_rate": 7.615664579997088e-06, + "log_odds_chosen": 0.15615001320838928, + "log_odds_ratio": -0.7201071977615356, + "logits/chosen": -0.34353601932525635, + "logits/rejected": -0.3323616683483124, + "logps/chosen": -1.1542483568191528, + "logps/rejected": -1.2572157382965088, + "loss": 1.0689, + "nll_loss": 0.9969244003295898, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11542483419179916, + "rewards/margins": 0.010296729393303394, + "rewards/rejected": -0.12572155892848969, + "step": 760 + }, + { + "epoch": 0.14, + "grad_norm": 1.4891656637191772, + "learning_rate": 7.60984131605765e-06, + "log_odds_chosen": 0.42357462644577026, + "log_odds_ratio": -0.6011026501655579, + "logits/chosen": -0.30633580684661865, + "logits/rejected": -0.2977932095527649, + "logps/chosen": -0.9875582456588745, + "logps/rejected": -1.237544298171997, + "loss": 1.0333, + "nll_loss": 0.9731782078742981, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09875582903623581, + "rewards/margins": 0.024998605251312256, + "rewards/rejected": -0.12375444173812866, + "step": 770 + }, + { + "epoch": 0.14, + "grad_norm": 0.7760726809501648, + "learning_rate": 7.604018052118211e-06, + "log_odds_chosen": 0.41998594999313354, + "log_odds_ratio": -0.592202365398407, + "logits/chosen": -0.32606226205825806, + "logits/rejected": -0.29085078835487366, + "logps/chosen": -1.0062568187713623, + "logps/rejected": -1.238694190979004, + "loss": 1.0405, + "nll_loss": 0.9813073873519897, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10062569379806519, + "rewards/margins": 0.023243743926286697, + "rewards/rejected": -0.12386943399906158, + "step": 780 + }, + { + "epoch": 0.14, + "grad_norm": 1.3088407516479492, + "learning_rate": 7.598194788178774e-06, + "log_odds_chosen": 0.3967740535736084, + "log_odds_ratio": -0.5978984832763672, + "logits/chosen": -0.3288530707359314, + "logits/rejected": -0.3492429852485657, + "logps/chosen": -1.018615961074829, + "logps/rejected": -1.2579083442687988, + "loss": 1.0464, + "nll_loss": 0.9865927696228027, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10186159610748291, + "rewards/margins": 0.023929251357913017, + "rewards/rejected": -0.12579084932804108, + "step": 790 + }, + { + "epoch": 0.14, + "grad_norm": 0.9534358382225037, + "learning_rate": 7.592371524239336e-06, + "log_odds_chosen": 0.5231834650039673, + "log_odds_ratio": -0.5512691140174866, + "logits/chosen": -0.3196202218532562, + "logits/rejected": -0.3314352035522461, + "logps/chosen": -1.0690181255340576, + "logps/rejected": -1.3845093250274658, + "loss": 1.0595, + "nll_loss": 1.0043781995773315, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.10690182447433472, + "rewards/margins": 0.0315491184592247, + "rewards/rejected": -0.13845095038414001, + "step": 800 + }, + { + "epoch": 0.15, + "grad_norm": 0.8771949410438538, + "learning_rate": 7.586548260299897e-06, + "log_odds_chosen": 0.18916703760623932, + "log_odds_ratio": -0.6785593032836914, + "logits/chosen": -0.3995281755924225, + "logits/rejected": -0.3811416029930115, + "logps/chosen": -1.0976070165634155, + "logps/rejected": -1.21306574344635, + "loss": 1.0863, + "nll_loss": 1.0184518098831177, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10976070165634155, + "rewards/margins": 0.01154586672782898, + "rewards/rejected": -0.12130657583475113, + "step": 810 + }, + { + "epoch": 0.15, + "grad_norm": 0.9495704770088196, + "learning_rate": 7.58072499636046e-06, + "log_odds_chosen": 0.3516364097595215, + "log_odds_ratio": -0.6316573619842529, + "logits/chosen": -0.3535796105861664, + "logits/rejected": -0.344553679227829, + "logps/chosen": -1.112302541732788, + "logps/rejected": -1.2996736764907837, + "loss": 1.1134, + "nll_loss": 1.050210952758789, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.11123025417327881, + "rewards/margins": 0.01873708888888359, + "rewards/rejected": -0.1299673616886139, + "step": 820 + }, + { + "epoch": 0.15, + "grad_norm": 0.8986438512802124, + "learning_rate": 7.574901732421022e-06, + "log_odds_chosen": 0.5606812238693237, + "log_odds_ratio": -0.5470336675643921, + "logits/chosen": -0.37523120641708374, + "logits/rejected": -0.37770146131515503, + "logps/chosen": -0.9329134225845337, + "logps/rejected": -1.25912606716156, + "loss": 1.0575, + "nll_loss": 1.0027515888214111, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09329134225845337, + "rewards/margins": 0.03262128308415413, + "rewards/rejected": -0.1259126365184784, + "step": 830 + }, + { + "epoch": 0.15, + "grad_norm": 0.5794965624809265, + "learning_rate": 7.5690784684815835e-06, + "log_odds_chosen": 0.4418957233428955, + "log_odds_ratio": -0.5914565324783325, + "logits/chosen": -0.3764459490776062, + "logits/rejected": -0.4029974043369293, + "logps/chosen": -0.9287961721420288, + "logps/rejected": -1.1817586421966553, + "loss": 1.1, + "nll_loss": 1.0408680438995361, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09287962317466736, + "rewards/margins": 0.02529624104499817, + "rewards/rejected": -0.11817586421966553, + "step": 840 + }, + { + "epoch": 0.15, + "grad_norm": 1.5248697996139526, + "learning_rate": 7.563255204542146e-06, + "log_odds_chosen": 0.34692031145095825, + "log_odds_ratio": -0.6192424893379211, + "logits/chosen": -0.4035261273384094, + "logits/rejected": -0.38099542260169983, + "logps/chosen": -0.9760900735855103, + "logps/rejected": -1.1962649822235107, + "loss": 1.0806, + "nll_loss": 1.0186859369277954, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09760899841785431, + "rewards/margins": 0.022017499431967735, + "rewards/rejected": -0.11962650716304779, + "step": 850 + }, + { + "epoch": 0.16, + "grad_norm": 0.8907870054244995, + "learning_rate": 7.557431940602707e-06, + "log_odds_chosen": 0.466513454914093, + "log_odds_ratio": -0.6052361130714417, + "logits/chosen": -0.4223414361476898, + "logits/rejected": -0.42823654413223267, + "logps/chosen": -0.9593068361282349, + "logps/rejected": -1.228334665298462, + "loss": 1.038, + "nll_loss": 0.9774872064590454, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09593068063259125, + "rewards/margins": 0.026902783662080765, + "rewards/rejected": -0.12283346801996231, + "step": 860 + }, + { + "epoch": 0.16, + "grad_norm": 0.7226473689079285, + "learning_rate": 7.5516086766632695e-06, + "log_odds_chosen": 0.553143322467804, + "log_odds_ratio": -0.5825859308242798, + "logits/chosen": -0.3536778390407562, + "logits/rejected": -0.37367385625839233, + "logps/chosen": -1.0212783813476562, + "logps/rejected": -1.3241362571716309, + "loss": 1.0562, + "nll_loss": 0.9979656934738159, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10212783515453339, + "rewards/margins": 0.030285779386758804, + "rewards/rejected": -0.13241362571716309, + "step": 870 + }, + { + "epoch": 0.16, + "grad_norm": 1.348470687866211, + "learning_rate": 7.545785412723831e-06, + "log_odds_chosen": 0.48230600357055664, + "log_odds_ratio": -0.5734744668006897, + "logits/chosen": -0.3653802275657654, + "logits/rejected": -0.40828999876976013, + "logps/chosen": -1.0022938251495361, + "logps/rejected": -1.2667487859725952, + "loss": 1.0923, + "nll_loss": 1.0349771976470947, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10022939741611481, + "rewards/margins": 0.026445496827363968, + "rewards/rejected": -0.12667489051818848, + "step": 880 + }, + { + "epoch": 0.16, + "grad_norm": 1.4232394695281982, + "learning_rate": 7.539962148784393e-06, + "log_odds_chosen": 0.4975571036338806, + "log_odds_ratio": -0.5937734842300415, + "logits/chosen": -0.37488216161727905, + "logits/rejected": -0.4145907461643219, + "logps/chosen": -0.9429531097412109, + "logps/rejected": -1.2314273118972778, + "loss": 1.0408, + "nll_loss": 0.9814382791519165, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09429532289505005, + "rewards/margins": 0.028847401961684227, + "rewards/rejected": -0.12314271926879883, + "step": 890 + }, + { + "epoch": 0.16, + "grad_norm": 0.7326558828353882, + "learning_rate": 7.534138884844956e-06, + "log_odds_chosen": 0.35102158784866333, + "log_odds_ratio": -0.6554730534553528, + "logits/chosen": -0.4667816162109375, + "logits/rejected": -0.4587486684322357, + "logps/chosen": -1.1637868881225586, + "logps/rejected": -1.3463655710220337, + "loss": 1.1636, + "nll_loss": 1.098016381263733, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.11637868732213974, + "rewards/margins": 0.018257874995470047, + "rewards/rejected": -0.13463656604290009, + "step": 900 + }, + { + "epoch": 0.16, + "grad_norm": 0.9966881275177002, + "learning_rate": 7.528315620905517e-06, + "log_odds_chosen": 0.5971187353134155, + "log_odds_ratio": -0.5196677446365356, + "logits/chosen": -0.35535866022109985, + "logits/rejected": -0.38316792249679565, + "logps/chosen": -0.9760394096374512, + "logps/rejected": -1.33073091506958, + "loss": 0.9965, + "nll_loss": 0.9444907903671265, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0976039469242096, + "rewards/margins": 0.03546914458274841, + "rewards/rejected": -0.133073091506958, + "step": 910 + }, + { + "epoch": 0.17, + "grad_norm": 1.4429069757461548, + "learning_rate": 7.522492356966079e-06, + "log_odds_chosen": 0.30998191237449646, + "log_odds_ratio": -0.6428655385971069, + "logits/chosen": -0.38161906599998474, + "logits/rejected": -0.39548197388648987, + "logps/chosen": -0.9770835638046265, + "logps/rejected": -1.1524428129196167, + "loss": 1.071, + "nll_loss": 1.0066882371902466, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09770835936069489, + "rewards/margins": 0.01753593422472477, + "rewards/rejected": -0.11524428427219391, + "step": 920 + }, + { + "epoch": 0.17, + "grad_norm": 0.8039138317108154, + "learning_rate": 7.516669093026642e-06, + "log_odds_chosen": 0.4522746503353119, + "log_odds_ratio": -0.5700639486312866, + "logits/chosen": -0.3611920475959778, + "logits/rejected": -0.3778507113456726, + "logps/chosen": -0.9865026473999023, + "logps/rejected": -1.2545497417449951, + "loss": 0.9676, + "nll_loss": 0.910589337348938, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.098650261759758, + "rewards/margins": 0.02680472657084465, + "rewards/rejected": -0.12545499205589294, + "step": 930 + }, + { + "epoch": 0.17, + "grad_norm": 1.1389050483703613, + "learning_rate": 7.510845829087203e-06, + "log_odds_chosen": 0.5116861462593079, + "log_odds_ratio": -0.5802291035652161, + "logits/chosen": -0.3478499948978424, + "logits/rejected": -0.36575666069984436, + "logps/chosen": -1.0669629573822021, + "logps/rejected": -1.360779047012329, + "loss": 1.0806, + "nll_loss": 1.0225512981414795, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10669630765914917, + "rewards/margins": 0.02938159741461277, + "rewards/rejected": -0.1360778957605362, + "step": 940 + }, + { + "epoch": 0.17, + "grad_norm": 1.0509241819381714, + "learning_rate": 7.505022565147765e-06, + "log_odds_chosen": 0.6026821136474609, + "log_odds_ratio": -0.5167630314826965, + "logits/chosen": -0.3945934772491455, + "logits/rejected": -0.40920257568359375, + "logps/chosen": -0.9981874227523804, + "logps/rejected": -1.37343430519104, + "loss": 1.0228, + "nll_loss": 0.9711018800735474, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09981875121593475, + "rewards/margins": 0.03752467781305313, + "rewards/rejected": -0.13734343647956848, + "step": 950 + }, + { + "epoch": 0.17, + "grad_norm": 0.6846367716789246, + "learning_rate": 7.499199301208327e-06, + "log_odds_chosen": 0.59569251537323, + "log_odds_ratio": -0.5624656081199646, + "logits/chosen": -0.3632737994194031, + "logits/rejected": -0.4208325445652008, + "logps/chosen": -0.9958592653274536, + "logps/rejected": -1.3909313678741455, + "loss": 1.034, + "nll_loss": 0.977750301361084, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09958592802286148, + "rewards/margins": 0.03950721025466919, + "rewards/rejected": -0.13909313082695007, + "step": 960 + }, + { + "epoch": 0.18, + "grad_norm": 1.6020110845565796, + "learning_rate": 7.493376037268888e-06, + "log_odds_chosen": 0.3030172884464264, + "log_odds_ratio": -0.6338340044021606, + "logits/chosen": -0.41403812170028687, + "logits/rejected": -0.417144238948822, + "logps/chosen": -1.0414340496063232, + "logps/rejected": -1.2572132349014282, + "loss": 1.0919, + "nll_loss": 1.028564691543579, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10414339601993561, + "rewards/margins": 0.021577920764684677, + "rewards/rejected": -0.12572133541107178, + "step": 970 + }, + { + "epoch": 0.18, + "grad_norm": 0.6520597338676453, + "learning_rate": 7.487552773329451e-06, + "log_odds_chosen": 0.4331362843513489, + "log_odds_ratio": -0.5906507968902588, + "logits/chosen": -0.37529271841049194, + "logits/rejected": -0.3968786597251892, + "logps/chosen": -1.0254571437835693, + "logps/rejected": -1.3230538368225098, + "loss": 1.0314, + "nll_loss": 0.972353458404541, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10254571586847305, + "rewards/margins": 0.029759686440229416, + "rewards/rejected": -0.13230539858341217, + "step": 980 + }, + { + "epoch": 0.18, + "grad_norm": 1.175098180770874, + "learning_rate": 7.481729509390013e-06, + "log_odds_chosen": 0.7574179768562317, + "log_odds_ratio": -0.4986720681190491, + "logits/chosen": -0.280872642993927, + "logits/rejected": -0.3123845160007477, + "logps/chosen": -0.8410719037055969, + "logps/rejected": -1.3167189359664917, + "loss": 0.8723, + "nll_loss": 0.8224380612373352, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.0841071829199791, + "rewards/margins": 0.04756471887230873, + "rewards/rejected": -0.13167190551757812, + "step": 990 + }, + { + "epoch": 0.18, + "grad_norm": 0.8352462649345398, + "learning_rate": 7.4759062454505745e-06, + "log_odds_chosen": 0.3969859778881073, + "log_odds_ratio": -0.5995320081710815, + "logits/chosen": -0.3335706293582916, + "logits/rejected": -0.3678201735019684, + "logps/chosen": -0.9420549273490906, + "logps/rejected": -1.1878598928451538, + "loss": 0.9524, + "nll_loss": 0.8924533724784851, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09420549869537354, + "rewards/margins": 0.02458050288259983, + "rewards/rejected": -0.11878599971532822, + "step": 1000 + }, + { + "epoch": 0.18, + "grad_norm": 0.8329207301139832, + "learning_rate": 7.470082981511137e-06, + "log_odds_chosen": 0.4053170084953308, + "log_odds_ratio": -0.6159269213676453, + "logits/chosen": -0.28889894485473633, + "logits/rejected": -0.30135685205459595, + "logps/chosen": -1.0672948360443115, + "logps/rejected": -1.2995731830596924, + "loss": 1.0576, + "nll_loss": 0.9960120320320129, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10672948509454727, + "rewards/margins": 0.023227838799357414, + "rewards/rejected": -0.12995730340480804, + "step": 1010 + }, + { + "epoch": 0.18, + "grad_norm": 1.6608293056488037, + "learning_rate": 7.464259717571699e-06, + "log_odds_chosen": 0.2776438593864441, + "log_odds_ratio": -0.6814143061637878, + "logits/chosen": -0.3903682231903076, + "logits/rejected": -0.4070429801940918, + "logps/chosen": -1.2205824851989746, + "logps/rejected": -1.3949607610702515, + "loss": 1.1705, + "nll_loss": 1.1023309230804443, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.12205825746059418, + "rewards/margins": 0.01743781939148903, + "rewards/rejected": -0.1394960731267929, + "step": 1020 + }, + { + "epoch": 0.19, + "grad_norm": 1.8429490327835083, + "learning_rate": 7.4584364536322606e-06, + "log_odds_chosen": 0.4468922019004822, + "log_odds_ratio": -0.6024754047393799, + "logits/chosen": -0.3262963891029358, + "logits/rejected": -0.30852293968200684, + "logps/chosen": -1.005517601966858, + "logps/rejected": -1.2748384475708008, + "loss": 1.0482, + "nll_loss": 0.9879406094551086, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10055176913738251, + "rewards/margins": 0.02693208120763302, + "rewards/rejected": -0.12748384475708008, + "step": 1030 + }, + { + "epoch": 0.19, + "grad_norm": 1.0084201097488403, + "learning_rate": 7.452613189692822e-06, + "log_odds_chosen": 0.3204442858695984, + "log_odds_ratio": -0.6441566944122314, + "logits/chosen": -0.3281877934932709, + "logits/rejected": -0.34244847297668457, + "logps/chosen": -1.0889025926589966, + "logps/rejected": -1.27254056930542, + "loss": 1.0895, + "nll_loss": 1.0251226425170898, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10889027267694473, + "rewards/margins": 0.01836378499865532, + "rewards/rejected": -0.12725405395030975, + "step": 1040 + }, + { + "epoch": 0.19, + "grad_norm": 2.1175448894500732, + "learning_rate": 7.446789925753384e-06, + "log_odds_chosen": 0.49451178312301636, + "log_odds_ratio": -0.5911440849304199, + "logits/chosen": -0.392509400844574, + "logits/rejected": -0.41400307416915894, + "logps/chosen": -1.0346556901931763, + "logps/rejected": -1.3819650411605835, + "loss": 1.0346, + "nll_loss": 0.9754676818847656, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10346555709838867, + "rewards/margins": 0.0347309373319149, + "rewards/rejected": -0.13819649815559387, + "step": 1050 + }, + { + "epoch": 0.19, + "grad_norm": 1.266845464706421, + "learning_rate": 7.440966661813946e-06, + "log_odds_chosen": 0.5281954407691956, + "log_odds_ratio": -0.559390664100647, + "logits/chosen": -0.3589046001434326, + "logits/rejected": -0.36148151755332947, + "logps/chosen": -0.9146555066108704, + "logps/rejected": -1.2053717374801636, + "loss": 1.0095, + "nll_loss": 0.9535647630691528, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0914655476808548, + "rewards/margins": 0.029071617871522903, + "rewards/rejected": -0.1205371618270874, + "step": 1060 + }, + { + "epoch": 0.19, + "grad_norm": 0.6916521191596985, + "learning_rate": 7.435143397874508e-06, + "log_odds_chosen": 0.562498927116394, + "log_odds_ratio": -0.5646517872810364, + "logits/chosen": -0.3899080455303192, + "logits/rejected": -0.3930276334285736, + "logps/chosen": -0.9596776962280273, + "logps/rejected": -1.2929902076721191, + "loss": 1.0957, + "nll_loss": 1.0391947031021118, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09596777707338333, + "rewards/margins": 0.0333312451839447, + "rewards/rejected": -0.12929901480674744, + "step": 1070 + }, + { + "epoch": 0.2, + "grad_norm": 1.1792149543762207, + "learning_rate": 7.42932013393507e-06, + "log_odds_chosen": 0.551052451133728, + "log_odds_ratio": -0.582817554473877, + "logits/chosen": -0.38785818219184875, + "logits/rejected": -0.4266396462917328, + "logps/chosen": -1.1055101156234741, + "logps/rejected": -1.4765472412109375, + "loss": 1.0829, + "nll_loss": 1.0245933532714844, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.11055102199316025, + "rewards/margins": 0.03710371255874634, + "rewards/rejected": -0.147654727101326, + "step": 1080 + }, + { + "epoch": 0.2, + "grad_norm": 0.9841828942298889, + "learning_rate": 7.423496869995633e-06, + "log_odds_chosen": 0.3436262011528015, + "log_odds_ratio": -0.6445078253746033, + "logits/chosen": -0.39780935645103455, + "logits/rejected": -0.39955899119377136, + "logps/chosen": -1.16604483127594, + "logps/rejected": -1.4091861248016357, + "loss": 1.1291, + "nll_loss": 1.0646467208862305, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11660448461771011, + "rewards/margins": 0.024314161390066147, + "rewards/rejected": -0.14091864228248596, + "step": 1090 + }, + { + "epoch": 0.2, + "grad_norm": 1.4496309757232666, + "learning_rate": 7.417673606056194e-06, + "log_odds_chosen": 0.5248538255691528, + "log_odds_ratio": -0.5791778564453125, + "logits/chosen": -0.3379751443862915, + "logits/rejected": -0.3429611921310425, + "logps/chosen": -1.0097376108169556, + "logps/rejected": -1.3185153007507324, + "loss": 1.0299, + "nll_loss": 0.971939742565155, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10097376257181168, + "rewards/margins": 0.03087778389453888, + "rewards/rejected": -0.13185153901576996, + "step": 1100 + }, + { + "epoch": 0.2, + "grad_norm": 0.9468082785606384, + "learning_rate": 7.4118503421167565e-06, + "log_odds_chosen": 0.3713390827178955, + "log_odds_ratio": -0.6597203612327576, + "logits/chosen": -0.33122798800468445, + "logits/rejected": -0.308432936668396, + "logps/chosen": -1.116742491722107, + "logps/rejected": -1.3301223516464233, + "loss": 1.0757, + "nll_loss": 1.00968599319458, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.11167426407337189, + "rewards/margins": 0.02133798785507679, + "rewards/rejected": -0.13301225006580353, + "step": 1110 + }, + { + "epoch": 0.2, + "grad_norm": 1.1661351919174194, + "learning_rate": 7.406027078177319e-06, + "log_odds_chosen": 0.23989257216453552, + "log_odds_ratio": -0.6858891248703003, + "logits/chosen": -0.399729460477829, + "logits/rejected": -0.4175766110420227, + "logps/chosen": -1.0951465368270874, + "logps/rejected": -1.2085391283035278, + "loss": 1.0652, + "nll_loss": 0.9965718388557434, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10951465368270874, + "rewards/margins": 0.011339271441102028, + "rewards/rejected": -0.12085392326116562, + "step": 1120 + }, + { + "epoch": 0.2, + "grad_norm": 1.182506799697876, + "learning_rate": 7.4002038142378794e-06, + "log_odds_chosen": 0.4975626468658447, + "log_odds_ratio": -0.5809773206710815, + "logits/chosen": -0.3122422993183136, + "logits/rejected": -0.3628779947757721, + "logps/chosen": -1.0512737035751343, + "logps/rejected": -1.3611750602722168, + "loss": 1.0589, + "nll_loss": 1.0008268356323242, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10512737929821014, + "rewards/margins": 0.030990120023489, + "rewards/rejected": -0.13611750304698944, + "step": 1130 + }, + { + "epoch": 0.21, + "grad_norm": 1.0794907808303833, + "learning_rate": 7.394380550298442e-06, + "log_odds_chosen": 0.44226646423339844, + "log_odds_ratio": -0.5803853273391724, + "logits/chosen": -0.34379130601882935, + "logits/rejected": -0.35066670179367065, + "logps/chosen": -1.0103256702423096, + "logps/rejected": -1.2785089015960693, + "loss": 1.0025, + "nll_loss": 0.9445074200630188, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10103257745504379, + "rewards/margins": 0.02681831642985344, + "rewards/rejected": -0.12785090506076813, + "step": 1140 + }, + { + "epoch": 0.21, + "grad_norm": 1.5841662883758545, + "learning_rate": 7.388557286359004e-06, + "log_odds_chosen": 0.5097657442092896, + "log_odds_ratio": -0.5746845006942749, + "logits/chosen": -0.28022605180740356, + "logits/rejected": -0.3266647458076477, + "logps/chosen": -0.9454906582832336, + "logps/rejected": -1.2473700046539307, + "loss": 0.997, + "nll_loss": 0.9395227432250977, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09454905986785889, + "rewards/margins": 0.03018794022500515, + "rewards/rejected": -0.12473700195550919, + "step": 1150 + }, + { + "epoch": 0.21, + "grad_norm": 1.1745624542236328, + "learning_rate": 7.3827340224195655e-06, + "log_odds_chosen": 0.44944530725479126, + "log_odds_ratio": -0.6229075193405151, + "logits/chosen": -0.3116758167743683, + "logits/rejected": -0.352629691362381, + "logps/chosen": -0.9129802584648132, + "logps/rejected": -1.1999928951263428, + "loss": 0.9836, + "nll_loss": 0.9212974309921265, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09129802882671356, + "rewards/margins": 0.028701260685920715, + "rewards/rejected": -0.11999928951263428, + "step": 1160 + }, + { + "epoch": 0.21, + "grad_norm": 1.03397798538208, + "learning_rate": 7.376910758480128e-06, + "log_odds_chosen": 0.5379278063774109, + "log_odds_ratio": -0.5735832452774048, + "logits/chosen": -0.31208473443984985, + "logits/rejected": -0.3373704254627228, + "logps/chosen": -0.9682260751724243, + "logps/rejected": -1.293874740600586, + "loss": 1.0553, + "nll_loss": 0.9979804158210754, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09682260453701019, + "rewards/margins": 0.03256487101316452, + "rewards/rejected": -0.12938746809959412, + "step": 1170 + }, + { + "epoch": 0.21, + "grad_norm": 0.9198788404464722, + "learning_rate": 7.37108749454069e-06, + "log_odds_chosen": 0.5509835481643677, + "log_odds_ratio": -0.5716922283172607, + "logits/chosen": -0.35105282068252563, + "logits/rejected": -0.38916003704071045, + "logps/chosen": -0.970038115978241, + "logps/rejected": -1.3079442977905273, + "loss": 1.0032, + "nll_loss": 0.9459899663925171, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09700380265712738, + "rewards/margins": 0.033790625631809235, + "rewards/rejected": -0.1307944506406784, + "step": 1180 + }, + { + "epoch": 0.21, + "grad_norm": 0.8531373143196106, + "learning_rate": 7.365264230601252e-06, + "log_odds_chosen": 0.4643592834472656, + "log_odds_ratio": -0.5688709020614624, + "logits/chosen": -0.3445436358451843, + "logits/rejected": -0.3675573170185089, + "logps/chosen": -0.9282194972038269, + "logps/rejected": -1.1918764114379883, + "loss": 0.9728, + "nll_loss": 0.9158981442451477, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09282194823026657, + "rewards/margins": 0.02636568807065487, + "rewards/rejected": -0.11918763816356659, + "step": 1190 + }, + { + "epoch": 0.22, + "grad_norm": 1.4299198389053345, + "learning_rate": 7.359440966661814e-06, + "log_odds_chosen": 0.6456435918807983, + "log_odds_ratio": -0.5181793570518494, + "logits/chosen": -0.3154214322566986, + "logits/rejected": -0.37061676383018494, + "logps/chosen": -0.9311151504516602, + "logps/rejected": -1.331710934638977, + "loss": 1.0204, + "nll_loss": 0.9685548543930054, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09311151504516602, + "rewards/margins": 0.04005958512425423, + "rewards/rejected": -0.13317111134529114, + "step": 1200 + }, + { + "epoch": 0.22, + "grad_norm": 1.1542985439300537, + "learning_rate": 7.353617702722376e-06, + "log_odds_chosen": 0.8618080019950867, + "log_odds_ratio": -0.47413817048072815, + "logits/chosen": -0.3236393332481384, + "logits/rejected": -0.3237884044647217, + "logps/chosen": -0.8999403119087219, + "logps/rejected": -1.4406185150146484, + "loss": 0.9189, + "nll_loss": 0.8714414834976196, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08999402821063995, + "rewards/margins": 0.054067812860012054, + "rewards/rejected": -0.1440618485212326, + "step": 1210 + }, + { + "epoch": 0.22, + "grad_norm": 1.0737377405166626, + "learning_rate": 7.347794438782937e-06, + "log_odds_chosen": 0.6115435361862183, + "log_odds_ratio": -0.573743462562561, + "logits/chosen": -0.34152212738990784, + "logits/rejected": -0.35158854722976685, + "logps/chosen": -0.9282305836677551, + "logps/rejected": -1.25412917137146, + "loss": 1.0161, + "nll_loss": 0.958741307258606, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09282305091619492, + "rewards/margins": 0.0325898714363575, + "rewards/rejected": -0.1254129260778427, + "step": 1220 + }, + { + "epoch": 0.22, + "grad_norm": 1.0002702474594116, + "learning_rate": 7.341971174843499e-06, + "log_odds_chosen": 0.4594632685184479, + "log_odds_ratio": -0.605194628238678, + "logits/chosen": -0.3332158923149109, + "logits/rejected": -0.3719615936279297, + "logps/chosen": -0.9105242490768433, + "logps/rejected": -1.1700128316879272, + "loss": 0.9943, + "nll_loss": 0.9337489008903503, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09105244278907776, + "rewards/margins": 0.025948846712708473, + "rewards/rejected": -0.11700127273797989, + "step": 1230 + }, + { + "epoch": 0.22, + "grad_norm": 0.6423687934875488, + "learning_rate": 7.3361479109040614e-06, + "log_odds_chosen": 0.5466501712799072, + "log_odds_ratio": -0.5491333603858948, + "logits/chosen": -0.38570788502693176, + "logits/rejected": -0.41777318716049194, + "logps/chosen": -1.0705764293670654, + "logps/rejected": -1.4159528017044067, + "loss": 1.1157, + "nll_loss": 1.0607718229293823, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10705764591693878, + "rewards/margins": 0.03453763201832771, + "rewards/rejected": -0.1415952742099762, + "step": 1240 + }, + { + "epoch": 0.23, + "grad_norm": 0.9487566351890564, + "learning_rate": 7.330324646964623e-06, + "log_odds_chosen": 0.6835122108459473, + "log_odds_ratio": -0.541060209274292, + "logits/chosen": -0.3431823253631592, + "logits/rejected": -0.3847135305404663, + "logps/chosen": -0.8870918154716492, + "logps/rejected": -1.319023847579956, + "loss": 0.9598, + "nll_loss": 0.9056830406188965, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08870919793844223, + "rewards/margins": 0.043193183839321136, + "rewards/rejected": -0.13190238177776337, + "step": 1250 + }, + { + "epoch": 0.23, + "grad_norm": 0.7351201176643372, + "learning_rate": 7.324501383025185e-06, + "log_odds_chosen": 0.5293228030204773, + "log_odds_ratio": -0.6223096251487732, + "logits/chosen": -0.41755110025405884, + "logits/rejected": -0.40872058272361755, + "logps/chosen": -0.9203858375549316, + "logps/rejected": -1.2283319234848022, + "loss": 1.0607, + "nll_loss": 0.9984228014945984, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.0920385867357254, + "rewards/margins": 0.030794614925980568, + "rewards/rejected": -0.12283320724964142, + "step": 1260 + }, + { + "epoch": 0.23, + "grad_norm": 0.7665841579437256, + "learning_rate": 7.3186781190857475e-06, + "log_odds_chosen": 0.4791649878025055, + "log_odds_ratio": -0.5774362683296204, + "logits/chosen": -0.37003204226493835, + "logits/rejected": -0.40496787428855896, + "logps/chosen": -0.9530180096626282, + "logps/rejected": -1.2359671592712402, + "loss": 1.0133, + "nll_loss": 0.955549418926239, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0953017920255661, + "rewards/margins": 0.028294924646615982, + "rewards/rejected": -0.12359671294689178, + "step": 1270 + }, + { + "epoch": 0.23, + "grad_norm": 0.449677050113678, + "learning_rate": 7.312854855146309e-06, + "log_odds_chosen": 0.5322305560112, + "log_odds_ratio": -0.5769363641738892, + "logits/chosen": -0.40565329790115356, + "logits/rejected": -0.43567219376564026, + "logps/chosen": -0.9717713594436646, + "logps/rejected": -1.263688087463379, + "loss": 1.0753, + "nll_loss": 1.0175769329071045, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09717713296413422, + "rewards/margins": 0.029191669076681137, + "rewards/rejected": -0.12636880576610565, + "step": 1280 + }, + { + "epoch": 0.23, + "grad_norm": 2.150800943374634, + "learning_rate": 7.307031591206871e-06, + "log_odds_chosen": 0.6120613813400269, + "log_odds_ratio": -0.5586797595024109, + "logits/chosen": -0.3177061080932617, + "logits/rejected": -0.3986894190311432, + "logps/chosen": -0.9389210939407349, + "logps/rejected": -1.3461209535598755, + "loss": 1.0326, + "nll_loss": 0.976696789264679, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09389211237430573, + "rewards/margins": 0.04071998968720436, + "rewards/rejected": -0.13461211323738098, + "step": 1290 + }, + { + "epoch": 0.23, + "grad_norm": 0.8204323649406433, + "learning_rate": 7.301208327267434e-06, + "log_odds_chosen": 0.48294463753700256, + "log_odds_ratio": -0.5884346961975098, + "logits/chosen": -0.38646456599235535, + "logits/rejected": -0.36271700263023376, + "logps/chosen": -0.8860207796096802, + "logps/rejected": -1.1833808422088623, + "loss": 0.9879, + "nll_loss": 0.9290531277656555, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08860208094120026, + "rewards/margins": 0.029736008495092392, + "rewards/rejected": -0.11833808571100235, + "step": 1300 + }, + { + "epoch": 0.24, + "grad_norm": 0.8067964911460876, + "learning_rate": 7.295385063327995e-06, + "log_odds_chosen": 0.45374226570129395, + "log_odds_ratio": -0.6168845295906067, + "logits/chosen": -0.3121108412742615, + "logits/rejected": -0.3362121284008026, + "logps/chosen": -1.0480873584747314, + "logps/rejected": -1.3348156213760376, + "loss": 0.9928, + "nll_loss": 0.9311412572860718, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10480872541666031, + "rewards/margins": 0.028672825545072556, + "rewards/rejected": -0.13348154723644257, + "step": 1310 + }, + { + "epoch": 0.24, + "grad_norm": 1.132856845855713, + "learning_rate": 7.2895617993885565e-06, + "log_odds_chosen": 0.6111911535263062, + "log_odds_ratio": -0.5393105745315552, + "logits/chosen": -0.3297664523124695, + "logits/rejected": -0.3743050992488861, + "logps/chosen": -0.9645237922668457, + "logps/rejected": -1.344177007675171, + "loss": 1.0082, + "nll_loss": 0.9542475938796997, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09645237773656845, + "rewards/margins": 0.037965334951877594, + "rewards/rejected": -0.13441771268844604, + "step": 1320 + }, + { + "epoch": 0.24, + "grad_norm": 0.9139919281005859, + "learning_rate": 7.283738535449119e-06, + "log_odds_chosen": 0.4854803681373596, + "log_odds_ratio": -0.5775014162063599, + "logits/chosen": -0.2660491466522217, + "logits/rejected": -0.33125635981559753, + "logps/chosen": -0.9659830927848816, + "logps/rejected": -1.2355717420578003, + "loss": 0.9855, + "nll_loss": 0.9277377128601074, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0965983122587204, + "rewards/margins": 0.026958853006362915, + "rewards/rejected": -0.12355717271566391, + "step": 1330 + }, + { + "epoch": 0.24, + "grad_norm": 0.8374517560005188, + "learning_rate": 7.277915271509681e-06, + "log_odds_chosen": 0.28127503395080566, + "log_odds_ratio": -0.6820573806762695, + "logits/chosen": -0.36101454496383667, + "logits/rejected": -0.38573795557022095, + "logps/chosen": -0.9789048433303833, + "logps/rejected": -1.1818983554840088, + "loss": 1.0525, + "nll_loss": 0.9843059778213501, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09789048135280609, + "rewards/margins": 0.020299362018704414, + "rewards/rejected": -0.11818984895944595, + "step": 1340 + }, + { + "epoch": 0.24, + "grad_norm": 0.6741836071014404, + "learning_rate": 7.272092007570243e-06, + "log_odds_chosen": 0.5547982454299927, + "log_odds_ratio": -0.5649451017379761, + "logits/chosen": -0.35805225372314453, + "logits/rejected": -0.381672203540802, + "logps/chosen": -0.9512104988098145, + "logps/rejected": -1.2871150970458984, + "loss": 1.0333, + "nll_loss": 0.9767767190933228, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09512104839086533, + "rewards/margins": 0.03359045460820198, + "rewards/rejected": -0.1287115067243576, + "step": 1350 + }, + { + "epoch": 0.25, + "grad_norm": 1.025458574295044, + "learning_rate": 7.266268743630805e-06, + "log_odds_chosen": 0.49380841851234436, + "log_odds_ratio": -0.5802966356277466, + "logits/chosen": -0.248256117105484, + "logits/rejected": -0.3065466284751892, + "logps/chosen": -0.8993231058120728, + "logps/rejected": -1.1698864698410034, + "loss": 0.9771, + "nll_loss": 0.9190645217895508, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08993230760097504, + "rewards/margins": 0.027056332677602768, + "rewards/rejected": -0.1169886365532875, + "step": 1360 + }, + { + "epoch": 0.25, + "grad_norm": 0.9658330678939819, + "learning_rate": 7.260445479691367e-06, + "log_odds_chosen": 0.5135058164596558, + "log_odds_ratio": -0.5862656831741333, + "logits/chosen": -0.3769971430301666, + "logits/rejected": -0.3671988546848297, + "logps/chosen": -0.9479128122329712, + "logps/rejected": -1.2691059112548828, + "loss": 1.0174, + "nll_loss": 0.9587678909301758, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09479127824306488, + "rewards/margins": 0.03211931139230728, + "rewards/rejected": -0.12691059708595276, + "step": 1370 + }, + { + "epoch": 0.25, + "grad_norm": 0.6349258422851562, + "learning_rate": 7.254622215751929e-06, + "log_odds_chosen": 0.599760890007019, + "log_odds_ratio": -0.576224684715271, + "logits/chosen": -0.3141711354255676, + "logits/rejected": -0.363479346036911, + "logps/chosen": -1.0505956411361694, + "logps/rejected": -1.4687645435333252, + "loss": 1.083, + "nll_loss": 1.0253790616989136, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10505956411361694, + "rewards/margins": 0.041816871613264084, + "rewards/rejected": -0.14687643945217133, + "step": 1380 + }, + { + "epoch": 0.25, + "grad_norm": 0.806663990020752, + "learning_rate": 7.24879895181249e-06, + "log_odds_chosen": 0.5944348573684692, + "log_odds_ratio": -0.5526763200759888, + "logits/chosen": -0.2973032593727112, + "logits/rejected": -0.3449651300907135, + "logps/chosen": -0.910653293132782, + "logps/rejected": -1.2252440452575684, + "loss": 0.9812, + "nll_loss": 0.9258831143379211, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09106533229351044, + "rewards/margins": 0.03145906329154968, + "rewards/rejected": -0.12252439558506012, + "step": 1390 + }, + { + "epoch": 0.25, + "grad_norm": 1.9585217237472534, + "learning_rate": 7.2429756878730525e-06, + "log_odds_chosen": 0.5735687017440796, + "log_odds_ratio": -0.5608910322189331, + "logits/chosen": -0.3576315641403198, + "logits/rejected": -0.387492835521698, + "logps/chosen": -0.9962593913078308, + "logps/rejected": -1.3501719236373901, + "loss": 1.0814, + "nll_loss": 1.0253442525863647, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09962593764066696, + "rewards/margins": 0.03539125993847847, + "rewards/rejected": -0.13501721620559692, + "step": 1400 + }, + { + "epoch": 0.25, + "grad_norm": 0.9405146837234497, + "learning_rate": 7.237152423933614e-06, + "log_odds_chosen": 0.4652928411960602, + "log_odds_ratio": -0.6031362414360046, + "logits/chosen": -0.34576138854026794, + "logits/rejected": -0.34349748492240906, + "logps/chosen": -1.018971562385559, + "logps/rejected": -1.3038069009780884, + "loss": 1.0472, + "nll_loss": 0.9869211316108704, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10189716517925262, + "rewards/margins": 0.028483539819717407, + "rewards/rejected": -0.13038070499897003, + "step": 1410 + }, + { + "epoch": 0.26, + "grad_norm": 1.1048870086669922, + "learning_rate": 7.231329159994176e-06, + "log_odds_chosen": 0.6356562972068787, + "log_odds_ratio": -0.5308060646057129, + "logits/chosen": -0.3209785521030426, + "logits/rejected": -0.3367080092430115, + "logps/chosen": -0.9682799577713013, + "logps/rejected": -1.3677337169647217, + "loss": 0.9475, + "nll_loss": 0.8944019079208374, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09682799875736237, + "rewards/margins": 0.039945363998413086, + "rewards/rejected": -0.13677337765693665, + "step": 1420 + }, + { + "epoch": 0.26, + "grad_norm": 1.5478053092956543, + "learning_rate": 7.2255058960547386e-06, + "log_odds_chosen": 0.2101658582687378, + "log_odds_ratio": -0.692489504814148, + "logits/chosen": -0.34666886925697327, + "logits/rejected": -0.3526953160762787, + "logps/chosen": -0.9766533970832825, + "logps/rejected": -1.099202036857605, + "loss": 1.0478, + "nll_loss": 0.9785119891166687, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.09766535460948944, + "rewards/margins": 0.012254852801561356, + "rewards/rejected": -0.1099202036857605, + "step": 1430 + }, + { + "epoch": 0.26, + "grad_norm": 1.153688669204712, + "learning_rate": 7.2196826321153e-06, + "log_odds_chosen": 0.3514810800552368, + "log_odds_ratio": -0.6305993795394897, + "logits/chosen": -0.34288614988327026, + "logits/rejected": -0.3488093912601471, + "logps/chosen": -1.0037890672683716, + "logps/rejected": -1.2000821828842163, + "loss": 1.0715, + "nll_loss": 1.0084068775177002, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10037890821695328, + "rewards/margins": 0.019629308953881264, + "rewards/rejected": -0.12000821530818939, + "step": 1440 + }, + { + "epoch": 0.26, + "grad_norm": 1.224960446357727, + "learning_rate": 7.213859368175862e-06, + "log_odds_chosen": 0.3187094032764435, + "log_odds_ratio": -0.6215401887893677, + "logits/chosen": -0.31062254309654236, + "logits/rejected": -0.3334897756576538, + "logps/chosen": -0.8957698941230774, + "logps/rejected": -1.0763921737670898, + "loss": 1.0235, + "nll_loss": 0.9613516926765442, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08957698941230774, + "rewards/margins": 0.01806223951280117, + "rewards/rejected": -0.10763921588659286, + "step": 1450 + }, + { + "epoch": 0.26, + "grad_norm": 0.8180124163627625, + "learning_rate": 7.208036104236425e-06, + "log_odds_chosen": 0.46529459953308105, + "log_odds_ratio": -0.5676907896995544, + "logits/chosen": -0.3247535824775696, + "logits/rejected": -0.39403122663497925, + "logps/chosen": -0.9336856603622437, + "logps/rejected": -1.2000067234039307, + "loss": 1.0012, + "nll_loss": 0.9444776773452759, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09336856752634048, + "rewards/margins": 0.026632100343704224, + "rewards/rejected": -0.1200006753206253, + "step": 1460 + }, + { + "epoch": 0.27, + "grad_norm": 1.2947418689727783, + "learning_rate": 7.202212840296986e-06, + "log_odds_chosen": 0.24858565628528595, + "log_odds_ratio": -0.7275146245956421, + "logits/chosen": -0.34301838278770447, + "logits/rejected": -0.3836689889431, + "logps/chosen": -1.0797077417373657, + "logps/rejected": -1.202086091041565, + "loss": 1.08, + "nll_loss": 1.007287621498108, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10797077417373657, + "rewards/margins": 0.012237833812832832, + "rewards/rejected": -0.12020860612392426, + "step": 1470 + }, + { + "epoch": 0.27, + "grad_norm": 0.8348037600517273, + "learning_rate": 7.1963895763575476e-06, + "log_odds_chosen": 0.4048139452934265, + "log_odds_ratio": -0.6023651957511902, + "logits/chosen": -0.2860475182533264, + "logits/rejected": -0.31518831849098206, + "logps/chosen": -0.8968518972396851, + "logps/rejected": -1.1367640495300293, + "loss": 1.0037, + "nll_loss": 0.9434369802474976, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08968518674373627, + "rewards/margins": 0.023991206660866737, + "rewards/rejected": -0.11367639154195786, + "step": 1480 + }, + { + "epoch": 0.27, + "grad_norm": 1.1033504009246826, + "learning_rate": 7.19056631241811e-06, + "log_odds_chosen": 0.3861461281776428, + "log_odds_ratio": -0.6319631338119507, + "logits/chosen": -0.329324334859848, + "logits/rejected": -0.3265681266784668, + "logps/chosen": -0.9853197932243347, + "logps/rejected": -1.2213842868804932, + "loss": 1.0146, + "nll_loss": 0.9514220356941223, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09853197634220123, + "rewards/margins": 0.02360645867884159, + "rewards/rejected": -0.12213845551013947, + "step": 1490 + }, + { + "epoch": 0.27, + "grad_norm": 1.4328731298446655, + "learning_rate": 7.184743048478672e-06, + "log_odds_chosen": 0.566982090473175, + "log_odds_ratio": -0.5453047156333923, + "logits/chosen": -0.2849673628807068, + "logits/rejected": -0.31650617718696594, + "logps/chosen": -0.9042051434516907, + "logps/rejected": -1.2444576025009155, + "loss": 0.9611, + "nll_loss": 0.9066120386123657, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09042052179574966, + "rewards/margins": 0.03402525186538696, + "rewards/rejected": -0.12444577366113663, + "step": 1500 + }, + { + "epoch": 0.27, + "grad_norm": 0.9411538243293762, + "learning_rate": 7.178919784539234e-06, + "log_odds_chosen": 0.6574187278747559, + "log_odds_ratio": -0.5834145545959473, + "logits/chosen": -0.28295382857322693, + "logits/rejected": -0.3845768868923187, + "logps/chosen": -0.8513942956924438, + "logps/rejected": -1.3180423974990845, + "loss": 1.0373, + "nll_loss": 0.9789282083511353, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0851394310593605, + "rewards/margins": 0.04666482284665108, + "rewards/rejected": -0.13180424273014069, + "step": 1510 + }, + { + "epoch": 0.27, + "grad_norm": 0.6901792883872986, + "learning_rate": 7.173096520599796e-06, + "log_odds_chosen": 0.4914397597312927, + "log_odds_ratio": -0.6235078573226929, + "logits/chosen": -0.2798933982849121, + "logits/rejected": -0.3196839690208435, + "logps/chosen": -0.9667059779167175, + "logps/rejected": -1.2527544498443604, + "loss": 0.9792, + "nll_loss": 0.9168528318405151, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09667058289051056, + "rewards/margins": 0.02860487625002861, + "rewards/rejected": -0.12527546286582947, + "step": 1520 + }, + { + "epoch": 0.28, + "grad_norm": 0.9170409440994263, + "learning_rate": 7.167273256660358e-06, + "log_odds_chosen": 0.46514517068862915, + "log_odds_ratio": -0.5963708162307739, + "logits/chosen": -0.3535362780094147, + "logits/rejected": -0.3404978811740875, + "logps/chosen": -0.9499415159225464, + "logps/rejected": -1.2293248176574707, + "loss": 0.9763, + "nll_loss": 0.9166848063468933, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09499415010213852, + "rewards/margins": 0.02793833613395691, + "rewards/rejected": -0.12293247878551483, + "step": 1530 + }, + { + "epoch": 0.28, + "grad_norm": 1.1653376817703247, + "learning_rate": 7.16144999272092e-06, + "log_odds_chosen": 0.5939033627510071, + "log_odds_ratio": -0.5412915945053101, + "logits/chosen": -0.34380143880844116, + "logits/rejected": -0.40813955664634705, + "logps/chosen": -0.9544633030891418, + "logps/rejected": -1.2961674928665161, + "loss": 1.0981, + "nll_loss": 1.0439279079437256, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09544633328914642, + "rewards/margins": 0.034170426428318024, + "rewards/rejected": -0.12961676716804504, + "step": 1540 + }, + { + "epoch": 0.28, + "grad_norm": 1.4316825866699219, + "learning_rate": 7.155626728781482e-06, + "log_odds_chosen": 0.4217056334018707, + "log_odds_ratio": -0.6345925331115723, + "logits/chosen": -0.3553635776042938, + "logits/rejected": -0.3846450448036194, + "logps/chosen": -1.0682843923568726, + "logps/rejected": -1.3270084857940674, + "loss": 1.0435, + "nll_loss": 0.9799984097480774, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10682845115661621, + "rewards/margins": 0.025872424244880676, + "rewards/rejected": -0.1327008754014969, + "step": 1550 + }, + { + "epoch": 0.28, + "grad_norm": 0.8913134336471558, + "learning_rate": 7.149803464842044e-06, + "log_odds_chosen": 0.8833622932434082, + "log_odds_ratio": -0.4926014840602875, + "logits/chosen": -0.2923971116542816, + "logits/rejected": -0.35514816641807556, + "logps/chosen": -0.8891391754150391, + "logps/rejected": -1.430625319480896, + "loss": 1.0246, + "nll_loss": 0.9753875732421875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08891390264034271, + "rewards/margins": 0.05414862558245659, + "rewards/rejected": -0.1430625319480896, + "step": 1560 + }, + { + "epoch": 0.28, + "grad_norm": 1.0268759727478027, + "learning_rate": 7.143980200902605e-06, + "log_odds_chosen": 0.32187631726264954, + "log_odds_ratio": -0.642335057258606, + "logits/chosen": -0.3550707697868347, + "logits/rejected": -0.35717231035232544, + "logps/chosen": -1.087780237197876, + "logps/rejected": -1.2959010601043701, + "loss": 1.0671, + "nll_loss": 1.0029114484786987, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10877802222967148, + "rewards/margins": 0.02081209048628807, + "rewards/rejected": -0.12959010899066925, + "step": 1570 + }, + { + "epoch": 0.29, + "grad_norm": 0.764402449131012, + "learning_rate": 7.138156936963167e-06, + "log_odds_chosen": 0.5447977185249329, + "log_odds_ratio": -0.5645043253898621, + "logits/chosen": -0.27879756689071655, + "logits/rejected": -0.340719610452652, + "logps/chosen": -0.88838130235672, + "logps/rejected": -1.2113538980484009, + "loss": 0.9794, + "nll_loss": 0.9229621887207031, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0888381376862526, + "rewards/margins": 0.03229725360870361, + "rewards/rejected": -0.12113537639379501, + "step": 1580 + }, + { + "epoch": 0.29, + "grad_norm": 1.1508593559265137, + "learning_rate": 7.13233367302373e-06, + "log_odds_chosen": 0.24824786186218262, + "log_odds_ratio": -0.6948032975196838, + "logits/chosen": -0.40458765625953674, + "logits/rejected": -0.4210253655910492, + "logps/chosen": -0.9826368093490601, + "logps/rejected": -1.1311357021331787, + "loss": 1.0984, + "nll_loss": 1.0289161205291748, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.098263680934906, + "rewards/margins": 0.0148498909547925, + "rewards/rejected": -0.11311358213424683, + "step": 1590 + }, + { + "epoch": 0.29, + "grad_norm": 0.8140817880630493, + "learning_rate": 7.126510409084291e-06, + "log_odds_chosen": 0.44691014289855957, + "log_odds_ratio": -0.5997231602668762, + "logits/chosen": -0.36742717027664185, + "logits/rejected": -0.41214919090270996, + "logps/chosen": -1.0089651346206665, + "logps/rejected": -1.2913377285003662, + "loss": 1.0365, + "nll_loss": 0.9765187501907349, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10089650005102158, + "rewards/margins": 0.02823726274073124, + "rewards/rejected": -0.12913377583026886, + "step": 1600 + }, + { + "epoch": 0.29, + "grad_norm": 0.9169033765792847, + "learning_rate": 7.120687145144853e-06, + "log_odds_chosen": 0.6452026963233948, + "log_odds_ratio": -0.5194689035415649, + "logits/chosen": -0.3512391746044159, + "logits/rejected": -0.37762579321861267, + "logps/chosen": -0.8982778787612915, + "logps/rejected": -1.300342082977295, + "loss": 1.0038, + "nll_loss": 0.9518443942070007, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08982778340578079, + "rewards/margins": 0.0402064248919487, + "rewards/rejected": -0.1300342082977295, + "step": 1610 + }, + { + "epoch": 0.29, + "grad_norm": 1.4523009061813354, + "learning_rate": 7.114863881205416e-06, + "log_odds_chosen": 0.4951610565185547, + "log_odds_ratio": -0.5961709022521973, + "logits/chosen": -0.33776336908340454, + "logits/rejected": -0.37025144696235657, + "logps/chosen": -1.0465290546417236, + "logps/rejected": -1.3635728359222412, + "loss": 1.0154, + "nll_loss": 0.9557603597640991, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10465290397405624, + "rewards/margins": 0.031704384833574295, + "rewards/rejected": -0.13635727763175964, + "step": 1620 + }, + { + "epoch": 0.29, + "grad_norm": 1.2989190816879272, + "learning_rate": 7.109040617265977e-06, + "log_odds_chosen": 0.535646915435791, + "log_odds_ratio": -0.6097526550292969, + "logits/chosen": -0.3525809347629547, + "logits/rejected": -0.3878706991672516, + "logps/chosen": -0.9263569116592407, + "logps/rejected": -1.2173206806182861, + "loss": 0.9963, + "nll_loss": 0.9353706240653992, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09263569116592407, + "rewards/margins": 0.029096385464072227, + "rewards/rejected": -0.12173207849264145, + "step": 1630 + }, + { + "epoch": 0.3, + "grad_norm": 1.1236441135406494, + "learning_rate": 7.1032173533265394e-06, + "log_odds_chosen": 0.7328363656997681, + "log_odds_ratio": -0.5077277421951294, + "logits/chosen": -0.3232725262641907, + "logits/rejected": -0.3601072430610657, + "logps/chosen": -0.8846950531005859, + "logps/rejected": -1.3101729154586792, + "loss": 0.9562, + "nll_loss": 0.9054625630378723, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0884695053100586, + "rewards/margins": 0.0425477959215641, + "rewards/rejected": -0.1310172975063324, + "step": 1640 + }, + { + "epoch": 0.3, + "grad_norm": 1.1808315515518188, + "learning_rate": 7.097394089387102e-06, + "log_odds_chosen": 0.8097518086433411, + "log_odds_ratio": -0.4847545623779297, + "logits/chosen": -0.32802054286003113, + "logits/rejected": -0.3918638825416565, + "logps/chosen": -0.9471396207809448, + "logps/rejected": -1.4627947807312012, + "loss": 0.9561, + "nll_loss": 0.9076499938964844, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0947139710187912, + "rewards/margins": 0.0515655092895031, + "rewards/rejected": -0.1462794840335846, + "step": 1650 + }, + { + "epoch": 0.3, + "grad_norm": 0.774590015411377, + "learning_rate": 7.091570825447662e-06, + "log_odds_chosen": 0.5359390377998352, + "log_odds_ratio": -0.6051704287528992, + "logits/chosen": -0.35709959268569946, + "logits/rejected": -0.3970513939857483, + "logps/chosen": -0.94941246509552, + "logps/rejected": -1.2806012630462646, + "loss": 1.1352, + "nll_loss": 1.0746341943740845, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09494125843048096, + "rewards/margins": 0.033118873834609985, + "rewards/rejected": -0.12806013226509094, + "step": 1660 + }, + { + "epoch": 0.3, + "grad_norm": 0.907768726348877, + "learning_rate": 7.085747561508225e-06, + "log_odds_chosen": 0.4143344461917877, + "log_odds_ratio": -0.6266435384750366, + "logits/chosen": -0.36355313658714294, + "logits/rejected": -0.4045397639274597, + "logps/chosen": -1.0518794059753418, + "logps/rejected": -1.331181287765503, + "loss": 1.0651, + "nll_loss": 1.0024408102035522, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10518793016672134, + "rewards/margins": 0.02793019451200962, + "rewards/rejected": -0.13311812281608582, + "step": 1670 + }, + { + "epoch": 0.3, + "grad_norm": 0.6284062266349792, + "learning_rate": 7.079924297568787e-06, + "log_odds_chosen": 0.3497571647167206, + "log_odds_ratio": -0.6255709528923035, + "logits/chosen": -0.348887175321579, + "logits/rejected": -0.3884449005126953, + "logps/chosen": -1.0277200937271118, + "logps/rejected": -1.226231575012207, + "loss": 1.0394, + "nll_loss": 0.976884663105011, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10277201235294342, + "rewards/margins": 0.019851163029670715, + "rewards/rejected": -0.12262316793203354, + "step": 1680 + }, + { + "epoch": 0.31, + "grad_norm": 1.5012038946151733, + "learning_rate": 7.0741010336293484e-06, + "log_odds_chosen": 0.5618478655815125, + "log_odds_ratio": -0.5806422233581543, + "logits/chosen": -0.34077805280685425, + "logits/rejected": -0.38193902373313904, + "logps/chosen": -0.9076236486434937, + "logps/rejected": -1.2798479795455933, + "loss": 0.9895, + "nll_loss": 0.9314451217651367, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09076236188411713, + "rewards/margins": 0.03722243756055832, + "rewards/rejected": -0.12798479199409485, + "step": 1690 + }, + { + "epoch": 0.31, + "grad_norm": 1.182563304901123, + "learning_rate": 7.068277769689911e-06, + "log_odds_chosen": 0.5676418542861938, + "log_odds_ratio": -0.5794765949249268, + "logits/chosen": -0.33637866377830505, + "logits/rejected": -0.40183839201927185, + "logps/chosen": -0.913240909576416, + "logps/rejected": -1.2547906637191772, + "loss": 1.0457, + "nll_loss": 0.9877544641494751, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0913240909576416, + "rewards/margins": 0.0341549888253212, + "rewards/rejected": -0.1254790723323822, + "step": 1700 + }, + { + "epoch": 0.31, + "grad_norm": 0.5410019159317017, + "learning_rate": 7.062454505750473e-06, + "log_odds_chosen": 0.615912139415741, + "log_odds_ratio": -0.5161094665527344, + "logits/chosen": -0.3636830449104309, + "logits/rejected": -0.4102718234062195, + "logps/chosen": -0.9589177966117859, + "logps/rejected": -1.329530954360962, + "loss": 1.0449, + "nll_loss": 0.9932928085327148, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09589177370071411, + "rewards/margins": 0.03706132620573044, + "rewards/rejected": -0.13295309245586395, + "step": 1710 + }, + { + "epoch": 0.31, + "grad_norm": 0.8718307018280029, + "learning_rate": 7.056631241811035e-06, + "log_odds_chosen": 0.3784236013889313, + "log_odds_ratio": -0.6245784163475037, + "logits/chosen": -0.4157410264015198, + "logits/rejected": -0.42662128806114197, + "logps/chosen": -1.0432878732681274, + "logps/rejected": -1.2611668109893799, + "loss": 1.1002, + "nll_loss": 1.0377851724624634, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10432878881692886, + "rewards/margins": 0.021787891164422035, + "rewards/rejected": -0.12611667811870575, + "step": 1720 + }, + { + "epoch": 0.31, + "grad_norm": 0.904116690158844, + "learning_rate": 7.050807977871597e-06, + "log_odds_chosen": 0.5137162804603577, + "log_odds_ratio": -0.5803641080856323, + "logits/chosen": -0.41569024324417114, + "logits/rejected": -0.4513755440711975, + "logps/chosen": -0.9598332643508911, + "logps/rejected": -1.294640064239502, + "loss": 1.0163, + "nll_loss": 0.9582997560501099, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09598332643508911, + "rewards/margins": 0.0334806889295578, + "rewards/rejected": -0.1294640153646469, + "step": 1730 + }, + { + "epoch": 0.31, + "grad_norm": 1.1222513914108276, + "learning_rate": 7.044984713932159e-06, + "log_odds_chosen": 0.5173107385635376, + "log_odds_ratio": -0.5620402693748474, + "logits/chosen": -0.3197651505470276, + "logits/rejected": -0.3535226583480835, + "logps/chosen": -1.0193755626678467, + "logps/rejected": -1.3410420417785645, + "loss": 0.9179, + "nll_loss": 0.8617299199104309, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10193755477666855, + "rewards/margins": 0.03216664493083954, + "rewards/rejected": -0.13410422205924988, + "step": 1740 + }, + { + "epoch": 0.32, + "grad_norm": 0.8727396130561829, + "learning_rate": 7.039161449992721e-06, + "log_odds_chosen": 0.5304024815559387, + "log_odds_ratio": -0.5949326753616333, + "logits/chosen": -0.4052054286003113, + "logits/rejected": -0.4512867331504822, + "logps/chosen": -1.019547462463379, + "logps/rejected": -1.3655933141708374, + "loss": 1.0608, + "nll_loss": 1.0013364553451538, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10195475816726685, + "rewards/margins": 0.03460458293557167, + "rewards/rejected": -0.13655933737754822, + "step": 1750 + }, + { + "epoch": 0.32, + "grad_norm": 1.6699724197387695, + "learning_rate": 7.033338186053282e-06, + "log_odds_chosen": 0.4551050066947937, + "log_odds_ratio": -0.6072874665260315, + "logits/chosen": -0.33777788281440735, + "logits/rejected": -0.39305973052978516, + "logps/chosen": -1.0166957378387451, + "logps/rejected": -1.3022456169128418, + "loss": 1.0423, + "nll_loss": 0.9816068410873413, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10166957229375839, + "rewards/margins": 0.028554990887641907, + "rewards/rejected": -0.1302245706319809, + "step": 1760 + }, + { + "epoch": 0.32, + "grad_norm": 1.614592432975769, + "learning_rate": 7.027514922113844e-06, + "log_odds_chosen": 0.39455699920654297, + "log_odds_ratio": -0.6552709341049194, + "logits/chosen": -0.3162342607975006, + "logits/rejected": -0.41145092248916626, + "logps/chosen": -0.913953423500061, + "logps/rejected": -1.165477991104126, + "loss": 0.96, + "nll_loss": 0.8944529294967651, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09139533340930939, + "rewards/margins": 0.025152459740638733, + "rewards/rejected": -0.11654778569936752, + "step": 1770 + }, + { + "epoch": 0.32, + "grad_norm": 0.8290024399757385, + "learning_rate": 7.021691658174407e-06, + "log_odds_chosen": 0.6237296462059021, + "log_odds_ratio": -0.5952363014221191, + "logits/chosen": -0.3944126069545746, + "logits/rejected": -0.43316787481307983, + "logps/chosen": -0.8954101800918579, + "logps/rejected": -1.290221929550171, + "loss": 1.0224, + "nll_loss": 0.9628337621688843, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08954103291034698, + "rewards/margins": 0.03948115557432175, + "rewards/rejected": -0.12902218103408813, + "step": 1780 + }, + { + "epoch": 0.32, + "grad_norm": 1.0954138040542603, + "learning_rate": 7.015868394234968e-06, + "log_odds_chosen": 0.5047041177749634, + "log_odds_ratio": -0.5463986396789551, + "logits/chosen": -0.39047911763191223, + "logits/rejected": -0.40275081992149353, + "logps/chosen": -1.0550428628921509, + "logps/rejected": -1.3856205940246582, + "loss": 1.0086, + "nll_loss": 0.9539523124694824, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10550429672002792, + "rewards/margins": 0.033057767897844315, + "rewards/rejected": -0.13856205344200134, + "step": 1790 + }, + { + "epoch": 0.33, + "grad_norm": 1.0443518161773682, + "learning_rate": 7.0100451302955305e-06, + "log_odds_chosen": 0.5747898817062378, + "log_odds_ratio": -0.5704335570335388, + "logits/chosen": -0.3137153685092926, + "logits/rejected": -0.3747533857822418, + "logps/chosen": -0.8636377453804016, + "logps/rejected": -1.2245159149169922, + "loss": 1.0042, + "nll_loss": 0.9471246004104614, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0863637775182724, + "rewards/margins": 0.03608780354261398, + "rewards/rejected": -0.12245158851146698, + "step": 1800 + }, + { + "epoch": 0.33, + "grad_norm": 1.2862238883972168, + "learning_rate": 7.004221866356093e-06, + "log_odds_chosen": 0.5296390652656555, + "log_odds_ratio": -0.5726187825202942, + "logits/chosen": -0.3899995684623718, + "logits/rejected": -0.4204530715942383, + "logps/chosen": -0.9171603918075562, + "logps/rejected": -1.1902835369110107, + "loss": 1.0626, + "nll_loss": 1.0053044557571411, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09171605110168457, + "rewards/margins": 0.027312302961945534, + "rewards/rejected": -0.11902834475040436, + "step": 1810 + }, + { + "epoch": 0.33, + "grad_norm": 1.6314342021942139, + "learning_rate": 6.998398602416654e-06, + "log_odds_chosen": 0.5884126424789429, + "log_odds_ratio": -0.5797205567359924, + "logits/chosen": -0.36631208658218384, + "logits/rejected": -0.4214417338371277, + "logps/chosen": -1.0176228284835815, + "logps/rejected": -1.3633677959442139, + "loss": 1.0146, + "nll_loss": 0.9566680192947388, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10176227241754532, + "rewards/margins": 0.03457449749112129, + "rewards/rejected": -0.1363367736339569, + "step": 1820 + }, + { + "epoch": 0.33, + "grad_norm": 0.8122896552085876, + "learning_rate": 6.9925753384772165e-06, + "log_odds_chosen": 0.6310805082321167, + "log_odds_ratio": -0.6001628637313843, + "logits/chosen": -0.274971067905426, + "logits/rejected": -0.295291006565094, + "logps/chosen": -0.9514021873474121, + "logps/rejected": -1.3572217226028442, + "loss": 0.9502, + "nll_loss": 0.8902056813240051, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09514021128416061, + "rewards/margins": 0.040581949055194855, + "rewards/rejected": -0.13572217524051666, + "step": 1830 + }, + { + "epoch": 0.33, + "grad_norm": 1.1364367008209229, + "learning_rate": 6.986752074537778e-06, + "log_odds_chosen": 0.6302961111068726, + "log_odds_ratio": -0.5267154574394226, + "logits/chosen": -0.3086879849433899, + "logits/rejected": -0.3633294999599457, + "logps/chosen": -0.9853225946426392, + "logps/rejected": -1.3981595039367676, + "loss": 0.9728, + "nll_loss": 0.9201301336288452, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.09853225946426392, + "rewards/margins": 0.041283704340457916, + "rewards/rejected": -0.13981595635414124, + "step": 1840 + }, + { + "epoch": 0.33, + "grad_norm": 0.7904521822929382, + "learning_rate": 6.9809288105983395e-06, + "log_odds_chosen": 0.5368858575820923, + "log_odds_ratio": -0.5833784937858582, + "logits/chosen": -0.27477526664733887, + "logits/rejected": -0.3175152838230133, + "logps/chosen": -0.9110499620437622, + "logps/rejected": -1.2280490398406982, + "loss": 0.9398, + "nll_loss": 0.8814668655395508, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09110499918460846, + "rewards/margins": 0.03169991075992584, + "rewards/rejected": -0.1228049024939537, + "step": 1850 + }, + { + "epoch": 0.34, + "grad_norm": 0.8617063164710999, + "learning_rate": 6.975105546658902e-06, + "log_odds_chosen": 0.47567349672317505, + "log_odds_ratio": -0.6010562777519226, + "logits/chosen": -0.3454052805900574, + "logits/rejected": -0.4101662039756775, + "logps/chosen": -0.9865895509719849, + "logps/rejected": -1.253812551498413, + "loss": 0.9747, + "nll_loss": 0.9145520329475403, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0986589565873146, + "rewards/margins": 0.02672230266034603, + "rewards/rejected": -0.1253812611103058, + "step": 1860 + }, + { + "epoch": 0.34, + "grad_norm": 0.9157552123069763, + "learning_rate": 6.969282282719464e-06, + "log_odds_chosen": 0.4372219145298004, + "log_odds_ratio": -0.6045929789543152, + "logits/chosen": -0.44653192162513733, + "logits/rejected": -0.4505546987056732, + "logps/chosen": -1.074901819229126, + "logps/rejected": -1.350311517715454, + "loss": 1.063, + "nll_loss": 1.002518892288208, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.1074901819229126, + "rewards/margins": 0.02754097245633602, + "rewards/rejected": -0.13503116369247437, + "step": 1870 + }, + { + "epoch": 0.34, + "grad_norm": 1.502583622932434, + "learning_rate": 6.9634590187800256e-06, + "log_odds_chosen": 0.5613323450088501, + "log_odds_ratio": -0.5771964192390442, + "logits/chosen": -0.3930050730705261, + "logits/rejected": -0.44371461868286133, + "logps/chosen": -0.9873424768447876, + "logps/rejected": -1.3379671573638916, + "loss": 1.0228, + "nll_loss": 0.9650462865829468, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09873425960540771, + "rewards/margins": 0.03506246581673622, + "rewards/rejected": -0.13379672169685364, + "step": 1880 + }, + { + "epoch": 0.34, + "grad_norm": 0.5780860781669617, + "learning_rate": 6.957635754840588e-06, + "log_odds_chosen": 0.6220310926437378, + "log_odds_ratio": -0.5668379664421082, + "logits/chosen": -0.3505100607872009, + "logits/rejected": -0.4041958749294281, + "logps/chosen": -0.9813801646232605, + "logps/rejected": -1.343157410621643, + "loss": 1.0134, + "nll_loss": 0.9566676020622253, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09813802689313889, + "rewards/margins": 0.03617771714925766, + "rewards/rejected": -0.13431574404239655, + "step": 1890 + }, + { + "epoch": 0.34, + "grad_norm": 0.4777265787124634, + "learning_rate": 6.95181249090115e-06, + "log_odds_chosen": 0.42654770612716675, + "log_odds_ratio": -0.5968258380889893, + "logits/chosen": -0.37639492750167847, + "logits/rejected": -0.4339643120765686, + "logps/chosen": -1.0687663555145264, + "logps/rejected": -1.3647792339324951, + "loss": 1.067, + "nll_loss": 1.0073009729385376, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10687664896249771, + "rewards/margins": 0.029601257294416428, + "rewards/rejected": -0.13647790253162384, + "step": 1900 + }, + { + "epoch": 0.35, + "grad_norm": 0.6649044156074524, + "learning_rate": 6.945989226961712e-06, + "log_odds_chosen": 0.43329086899757385, + "log_odds_ratio": -0.6196489930152893, + "logits/chosen": -0.3298007547855377, + "logits/rejected": -0.401068776845932, + "logps/chosen": -0.990477442741394, + "logps/rejected": -1.2473641633987427, + "loss": 0.9843, + "nll_loss": 0.9223047494888306, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09904775023460388, + "rewards/margins": 0.025688668712973595, + "rewards/rejected": -0.12473641335964203, + "step": 1910 + }, + { + "epoch": 0.35, + "grad_norm": 1.315058946609497, + "learning_rate": 6.940165963022274e-06, + "log_odds_chosen": 0.530105710029602, + "log_odds_ratio": -0.5921862125396729, + "logits/chosen": -0.39285919070243835, + "logits/rejected": -0.4322722852230072, + "logps/chosen": -1.0640658140182495, + "logps/rejected": -1.410599946975708, + "loss": 1.0939, + "nll_loss": 1.0346629619598389, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10640659183263779, + "rewards/margins": 0.034653399139642715, + "rewards/rejected": -0.1410599797964096, + "step": 1920 + }, + { + "epoch": 0.35, + "grad_norm": 0.7587740421295166, + "learning_rate": 6.934342699082835e-06, + "log_odds_chosen": 0.751360297203064, + "log_odds_ratio": -0.5236696004867554, + "logits/chosen": -0.35968920588493347, + "logits/rejected": -0.41237396001815796, + "logps/chosen": -0.9788786172866821, + "logps/rejected": -1.4456140995025635, + "loss": 1.0208, + "nll_loss": 0.9684196710586548, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09788785129785538, + "rewards/margins": 0.04667355865240097, + "rewards/rejected": -0.14456140995025635, + "step": 1930 + }, + { + "epoch": 0.35, + "grad_norm": 0.6302559971809387, + "learning_rate": 6.928519435143398e-06, + "log_odds_chosen": 0.4367288649082184, + "log_odds_ratio": -0.6356965899467468, + "logits/chosen": -0.3377440273761749, + "logits/rejected": -0.3957875370979309, + "logps/chosen": -1.087546944618225, + "logps/rejected": -1.3826909065246582, + "loss": 1.0962, + "nll_loss": 1.0325984954833984, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1087547093629837, + "rewards/margins": 0.02951439656317234, + "rewards/rejected": -0.1382690966129303, + "step": 1940 + }, + { + "epoch": 0.35, + "grad_norm": 0.6672381162643433, + "learning_rate": 6.922696171203959e-06, + "log_odds_chosen": 0.6472089886665344, + "log_odds_ratio": -0.5463239550590515, + "logits/chosen": -0.33907216787338257, + "logits/rejected": -0.3891569972038269, + "logps/chosen": -0.8778939247131348, + "logps/rejected": -1.2696306705474854, + "loss": 0.9847, + "nll_loss": 0.9300362467765808, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08778940141201019, + "rewards/margins": 0.0391736701130867, + "rewards/rejected": -0.1269630640745163, + "step": 1950 + }, + { + "epoch": 0.35, + "grad_norm": 0.9312640428543091, + "learning_rate": 6.9168729072645215e-06, + "log_odds_chosen": 0.6855367422103882, + "log_odds_ratio": -0.5426241159439087, + "logits/chosen": -0.3388255536556244, + "logits/rejected": -0.39959508180618286, + "logps/chosen": -0.8858163952827454, + "logps/rejected": -1.3462940454483032, + "loss": 0.9277, + "nll_loss": 0.8734596967697144, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0885816365480423, + "rewards/margins": 0.04604776203632355, + "rewards/rejected": -0.13462939858436584, + "step": 1960 + }, + { + "epoch": 0.36, + "grad_norm": 2.3137943744659424, + "learning_rate": 6.911049643325084e-06, + "log_odds_chosen": 0.5328444242477417, + "log_odds_ratio": -0.5675147175788879, + "logits/chosen": -0.33348965644836426, + "logits/rejected": -0.36870360374450684, + "logps/chosen": -0.9644654393196106, + "logps/rejected": -1.2675182819366455, + "loss": 1.0454, + "nll_loss": 0.9886082410812378, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09644654393196106, + "rewards/margins": 0.03030528500676155, + "rewards/rejected": -0.1267518252134323, + "step": 1970 + }, + { + "epoch": 0.36, + "grad_norm": 0.7342625856399536, + "learning_rate": 6.905226379385645e-06, + "log_odds_chosen": 0.3813175857067108, + "log_odds_ratio": -0.6699342727661133, + "logits/chosen": -0.3162277340888977, + "logits/rejected": -0.35589399933815, + "logps/chosen": -1.0159353017807007, + "logps/rejected": -1.252074122428894, + "loss": 1.041, + "nll_loss": 0.9740489721298218, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10159353166818619, + "rewards/margins": 0.023613888770341873, + "rewards/rejected": -0.12520742416381836, + "step": 1980 + }, + { + "epoch": 0.36, + "grad_norm": 0.9021235704421997, + "learning_rate": 6.8994031154462076e-06, + "log_odds_chosen": 0.7601310014724731, + "log_odds_ratio": -0.4945410192012787, + "logits/chosen": -0.35523557662963867, + "logits/rejected": -0.37828975915908813, + "logps/chosen": -0.9633037447929382, + "logps/rejected": -1.4405834674835205, + "loss": 0.99, + "nll_loss": 0.940527617931366, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09633038938045502, + "rewards/margins": 0.047727979719638824, + "rewards/rejected": -0.14405836164951324, + "step": 1990 + }, + { + "epoch": 0.36, + "grad_norm": 0.4736451506614685, + "learning_rate": 6.89357985150677e-06, + "log_odds_chosen": 0.4646480977535248, + "log_odds_ratio": -0.6559361219406128, + "logits/chosen": -0.30533161759376526, + "logits/rejected": -0.37452375888824463, + "logps/chosen": -0.9401071667671204, + "logps/rejected": -1.2080457210540771, + "loss": 0.9819, + "nll_loss": 0.9162567853927612, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09401071071624756, + "rewards/margins": 0.026793863624334335, + "rewards/rejected": -0.12080458551645279, + "step": 2000 + }, + { + "epoch": 0.36, + "grad_norm": 0.7754179835319519, + "learning_rate": 6.8877565875673305e-06, + "log_odds_chosen": 0.6256696581840515, + "log_odds_ratio": -0.5275606513023376, + "logits/chosen": -0.3516564965248108, + "logits/rejected": -0.406394898891449, + "logps/chosen": -0.9741449356079102, + "logps/rejected": -1.3891098499298096, + "loss": 0.9558, + "nll_loss": 0.9030143618583679, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09741449356079102, + "rewards/margins": 0.04149649664759636, + "rewards/rejected": -0.13891097903251648, + "step": 2010 + }, + { + "epoch": 0.36, + "grad_norm": 0.7154907584190369, + "learning_rate": 6.881933323627893e-06, + "log_odds_chosen": 0.4335847496986389, + "log_odds_ratio": -0.6152317523956299, + "logits/chosen": -0.3850800395011902, + "logits/rejected": -0.38393911719322205, + "logps/chosen": -1.0410282611846924, + "logps/rejected": -1.3170279264450073, + "loss": 1.0869, + "nll_loss": 1.025334119796753, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10410281270742416, + "rewards/margins": 0.027599969878792763, + "rewards/rejected": -0.13170279562473297, + "step": 2020 + }, + { + "epoch": 0.37, + "grad_norm": 0.8986865878105164, + "learning_rate": 6.876110059688455e-06, + "log_odds_chosen": 0.5948430299758911, + "log_odds_ratio": -0.5587864518165588, + "logits/chosen": -0.349942147731781, + "logits/rejected": -0.41519179940223694, + "logps/chosen": -0.9230004549026489, + "logps/rejected": -1.3495103120803833, + "loss": 1.0273, + "nll_loss": 0.9713996648788452, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09230004251003265, + "rewards/margins": 0.0426509864628315, + "rewards/rejected": -0.13495102524757385, + "step": 2030 + }, + { + "epoch": 0.37, + "grad_norm": 1.152334213256836, + "learning_rate": 6.870286795749017e-06, + "log_odds_chosen": 0.34722867608070374, + "log_odds_ratio": -0.6710313558578491, + "logits/chosen": -0.3837626278400421, + "logits/rejected": -0.3969534635543823, + "logps/chosen": -1.0239957571029663, + "logps/rejected": -1.237477421760559, + "loss": 1.0862, + "nll_loss": 1.0191189050674438, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10239957273006439, + "rewards/margins": 0.021348167210817337, + "rewards/rejected": -0.12374775111675262, + "step": 2040 + }, + { + "epoch": 0.37, + "grad_norm": 0.756015419960022, + "learning_rate": 6.864463531809579e-06, + "log_odds_chosen": 0.48199620842933655, + "log_odds_ratio": -0.5751310586929321, + "logits/chosen": -0.3649478852748871, + "logits/rejected": -0.40983134508132935, + "logps/chosen": -1.0215938091278076, + "logps/rejected": -1.3313084840774536, + "loss": 1.0334, + "nll_loss": 0.975843071937561, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10215938091278076, + "rewards/margins": 0.0309714674949646, + "rewards/rejected": -0.13313084840774536, + "step": 2050 + }, + { + "epoch": 0.37, + "grad_norm": 0.9733452796936035, + "learning_rate": 6.858640267870141e-06, + "log_odds_chosen": 0.665124773979187, + "log_odds_ratio": -0.5681694746017456, + "logits/chosen": -0.39392274618148804, + "logits/rejected": -0.43787112832069397, + "logps/chosen": -0.9010134935379028, + "logps/rejected": -1.3361163139343262, + "loss": 0.9935, + "nll_loss": 0.9366915822029114, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09010134637355804, + "rewards/margins": 0.04351028427481651, + "rewards/rejected": -0.13361163437366486, + "step": 2060 + }, + { + "epoch": 0.37, + "grad_norm": 1.235131859779358, + "learning_rate": 6.852817003930703e-06, + "log_odds_chosen": 0.600813627243042, + "log_odds_ratio": -0.5714324712753296, + "logits/chosen": -0.29560619592666626, + "logits/rejected": -0.3389769196510315, + "logps/chosen": -0.9260609745979309, + "logps/rejected": -1.3035285472869873, + "loss": 1.0465, + "nll_loss": 0.9893512725830078, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09260609745979309, + "rewards/margins": 0.03774676471948624, + "rewards/rejected": -0.13035285472869873, + "step": 2070 + }, + { + "epoch": 0.38, + "grad_norm": 0.881033182144165, + "learning_rate": 6.846993739991265e-06, + "log_odds_chosen": 0.552410900592804, + "log_odds_ratio": -0.5525115132331848, + "logits/chosen": -0.34073910117149353, + "logits/rejected": -0.36035147309303284, + "logps/chosen": -0.9047195315361023, + "logps/rejected": -1.2465412616729736, + "loss": 0.906, + "nll_loss": 0.8507863283157349, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09047196060419083, + "rewards/margins": 0.03418216481804848, + "rewards/rejected": -0.1246541291475296, + "step": 2080 + }, + { + "epoch": 0.38, + "grad_norm": 1.3036489486694336, + "learning_rate": 6.841170476051827e-06, + "log_odds_chosen": 0.3467450737953186, + "log_odds_ratio": -0.6337326765060425, + "logits/chosen": -0.4385066032409668, + "logits/rejected": -0.461122989654541, + "logps/chosen": -1.1040109395980835, + "logps/rejected": -1.33207368850708, + "loss": 1.1146, + "nll_loss": 1.0512287616729736, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11040109395980835, + "rewards/margins": 0.02280627377331257, + "rewards/rejected": -0.13320736587047577, + "step": 2090 + }, + { + "epoch": 0.38, + "grad_norm": 0.663144052028656, + "learning_rate": 6.835347212112388e-06, + "log_odds_chosen": 0.5738186836242676, + "log_odds_ratio": -0.5866661071777344, + "logits/chosen": -0.3205263614654541, + "logits/rejected": -0.39872634410858154, + "logps/chosen": -0.9723861813545227, + "logps/rejected": -1.3176357746124268, + "loss": 1.0031, + "nll_loss": 0.9444729089736938, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09723862260580063, + "rewards/margins": 0.03452496603131294, + "rewards/rejected": -0.13176357746124268, + "step": 2100 + }, + { + "epoch": 0.38, + "grad_norm": 1.065043330192566, + "learning_rate": 6.82952394817295e-06, + "log_odds_chosen": 0.7342604398727417, + "log_odds_ratio": -0.519620954990387, + "logits/chosen": -0.33630579710006714, + "logits/rejected": -0.41530171036720276, + "logps/chosen": -0.9176281094551086, + "logps/rejected": -1.3705017566680908, + "loss": 0.9411, + "nll_loss": 0.8891534805297852, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09176281839609146, + "rewards/margins": 0.0452873595058918, + "rewards/rejected": -0.13705015182495117, + "step": 2110 + }, + { + "epoch": 0.38, + "grad_norm": 1.0710101127624512, + "learning_rate": 6.8237006842335125e-06, + "log_odds_chosen": 0.7710472345352173, + "log_odds_ratio": -0.5303458571434021, + "logits/chosen": -0.3510195016860962, + "logits/rejected": -0.4198974668979645, + "logps/chosen": -1.0233778953552246, + "logps/rejected": -1.4768149852752686, + "loss": 1.0737, + "nll_loss": 1.020646572113037, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1023377999663353, + "rewards/margins": 0.045343708246946335, + "rewards/rejected": -0.14768150448799133, + "step": 2120 + }, + { + "epoch": 0.38, + "grad_norm": 0.6870781779289246, + "learning_rate": 6.817877420294074e-06, + "log_odds_chosen": 0.478823184967041, + "log_odds_ratio": -0.5921360850334167, + "logits/chosen": -0.36411404609680176, + "logits/rejected": -0.4298684000968933, + "logps/chosen": -0.9525805711746216, + "logps/rejected": -1.2109096050262451, + "loss": 1.0417, + "nll_loss": 0.9824945330619812, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09525805711746216, + "rewards/margins": 0.02583291009068489, + "rewards/rejected": -0.12109096348285675, + "step": 2130 + }, + { + "epoch": 0.39, + "grad_norm": 0.8713396191596985, + "learning_rate": 6.812054156354636e-06, + "log_odds_chosen": 0.5773652791976929, + "log_odds_ratio": -0.5538418292999268, + "logits/chosen": -0.3354545533657074, + "logits/rejected": -0.385932594537735, + "logps/chosen": -1.016210913658142, + "logps/rejected": -1.4001071453094482, + "loss": 0.9637, + "nll_loss": 0.9083443880081177, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10162109136581421, + "rewards/margins": 0.03838961571455002, + "rewards/rejected": -0.14001069962978363, + "step": 2140 + }, + { + "epoch": 0.39, + "grad_norm": 1.1261780261993408, + "learning_rate": 6.806230892415199e-06, + "log_odds_chosen": 0.5818010568618774, + "log_odds_ratio": -0.5739784240722656, + "logits/chosen": -0.38004809617996216, + "logits/rejected": -0.41545504331588745, + "logps/chosen": -0.9510005116462708, + "logps/rejected": -1.3282325267791748, + "loss": 1.0095, + "nll_loss": 0.9521392583847046, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0951000526547432, + "rewards/margins": 0.037723198533058167, + "rewards/rejected": -0.13282324373722076, + "step": 2150 + }, + { + "epoch": 0.39, + "grad_norm": 1.1771738529205322, + "learning_rate": 6.800407628475761e-06, + "log_odds_chosen": 0.6635382175445557, + "log_odds_ratio": -0.5700774192810059, + "logits/chosen": -0.29149946570396423, + "logits/rejected": -0.3473144471645355, + "logps/chosen": -0.9014791250228882, + "logps/rejected": -1.2821435928344727, + "loss": 0.9432, + "nll_loss": 0.8862012624740601, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09014792740345001, + "rewards/margins": 0.038066450506448746, + "rewards/rejected": -0.12821438908576965, + "step": 2160 + }, + { + "epoch": 0.39, + "grad_norm": 1.0287370681762695, + "learning_rate": 6.794584364536322e-06, + "log_odds_chosen": 0.6836004257202148, + "log_odds_ratio": -0.5302466154098511, + "logits/chosen": -0.3840259611606598, + "logits/rejected": -0.42339396476745605, + "logps/chosen": -0.9160095453262329, + "logps/rejected": -1.375124216079712, + "loss": 1.0308, + "nll_loss": 0.9777463674545288, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0916009470820427, + "rewards/margins": 0.045911483466625214, + "rewards/rejected": -0.1375124156475067, + "step": 2170 + }, + { + "epoch": 0.39, + "grad_norm": 1.1369781494140625, + "learning_rate": 6.788761100596885e-06, + "log_odds_chosen": 0.6386801600456238, + "log_odds_ratio": -0.5835193395614624, + "logits/chosen": -0.35931721329689026, + "logits/rejected": -0.43148794770240784, + "logps/chosen": -0.9243070483207703, + "logps/rejected": -1.3196786642074585, + "loss": 1.0626, + "nll_loss": 1.004262924194336, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09243070334196091, + "rewards/margins": 0.03953716158866882, + "rewards/rejected": -0.13196787238121033, + "step": 2180 + }, + { + "epoch": 0.4, + "grad_norm": 0.809481143951416, + "learning_rate": 6.782937836657447e-06, + "log_odds_chosen": 0.5534670948982239, + "log_odds_ratio": -0.5876575112342834, + "logits/chosen": -0.39520007371902466, + "logits/rejected": -0.44970574975013733, + "logps/chosen": -1.018445611000061, + "logps/rejected": -1.3711169958114624, + "loss": 1.0772, + "nll_loss": 1.0184389352798462, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10184456408023834, + "rewards/margins": 0.03526713699102402, + "rewards/rejected": -0.13711170852184296, + "step": 2190 + }, + { + "epoch": 0.4, + "grad_norm": 1.0092328786849976, + "learning_rate": 6.777114572718008e-06, + "log_odds_chosen": 0.968769371509552, + "log_odds_ratio": -0.45703181624412537, + "logits/chosen": -0.3396906554698944, + "logits/rejected": -0.41497963666915894, + "logps/chosen": -0.9728395342826843, + "logps/rejected": -1.5849764347076416, + "loss": 0.9097, + "nll_loss": 0.8639856576919556, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.09728394448757172, + "rewards/margins": 0.06121370196342468, + "rewards/rejected": -0.1584976464509964, + "step": 2200 + }, + { + "epoch": 0.4, + "grad_norm": 1.4918605089187622, + "learning_rate": 6.77129130877857e-06, + "log_odds_chosen": 0.6453096270561218, + "log_odds_ratio": -0.5849459171295166, + "logits/chosen": -0.32868796586990356, + "logits/rejected": -0.3679044842720032, + "logps/chosen": -0.9020195007324219, + "logps/rejected": -1.2888437509536743, + "loss": 0.9864, + "nll_loss": 0.9279425740242004, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0902019590139389, + "rewards/margins": 0.038682423532009125, + "rewards/rejected": -0.12888437509536743, + "step": 2210 + }, + { + "epoch": 0.4, + "grad_norm": 1.0304147005081177, + "learning_rate": 6.765468044839132e-06, + "log_odds_chosen": 0.558380663394928, + "log_odds_ratio": -0.589785635471344, + "logits/chosen": -0.3739710748195648, + "logits/rejected": -0.40226370096206665, + "logps/chosen": -1.023913025856018, + "logps/rejected": -1.3768985271453857, + "loss": 1.0085, + "nll_loss": 0.9495282173156738, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.102391317486763, + "rewards/margins": 0.035298533737659454, + "rewards/rejected": -0.13768985867500305, + "step": 2220 + }, + { + "epoch": 0.4, + "grad_norm": 1.6191908121109009, + "learning_rate": 6.759644780899694e-06, + "log_odds_chosen": 0.6424797773361206, + "log_odds_ratio": -0.5855919718742371, + "logits/chosen": -0.3532762825489044, + "logits/rejected": -0.412332147359848, + "logps/chosen": -0.9896075129508972, + "logps/rejected": -1.3922007083892822, + "loss": 0.9935, + "nll_loss": 0.9349050521850586, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0989607572555542, + "rewards/margins": 0.040259331464767456, + "rewards/rejected": -0.13922008872032166, + "step": 2230 + }, + { + "epoch": 0.4, + "grad_norm": 0.8883562684059143, + "learning_rate": 6.753821516960256e-06, + "log_odds_chosen": 0.841235339641571, + "log_odds_ratio": -0.5459184050559998, + "logits/chosen": -0.3101753890514374, + "logits/rejected": -0.32989877462387085, + "logps/chosen": -0.7897628545761108, + "logps/rejected": -1.3077296018600464, + "loss": 0.8489, + "nll_loss": 0.7942813038825989, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07897628843784332, + "rewards/margins": 0.05179668590426445, + "rewards/rejected": -0.13077297806739807, + "step": 2240 + }, + { + "epoch": 0.41, + "grad_norm": 1.5552319288253784, + "learning_rate": 6.747998253020818e-06, + "log_odds_chosen": 0.6456782817840576, + "log_odds_ratio": -0.5549284219741821, + "logits/chosen": -0.38464441895484924, + "logits/rejected": -0.44848066568374634, + "logps/chosen": -1.0553745031356812, + "logps/rejected": -1.4854891300201416, + "loss": 1.0404, + "nll_loss": 0.9849538803100586, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10553745180368423, + "rewards/margins": 0.043011464178562164, + "rewards/rejected": -0.1485489159822464, + "step": 2250 + }, + { + "epoch": 0.41, + "grad_norm": 1.2503700256347656, + "learning_rate": 6.74217498908138e-06, + "log_odds_chosen": 0.42475882172584534, + "log_odds_ratio": -0.6306148171424866, + "logits/chosen": -0.3681567311286926, + "logits/rejected": -0.38218241930007935, + "logps/chosen": -1.0094163417816162, + "logps/rejected": -1.2551424503326416, + "loss": 1.0313, + "nll_loss": 0.9682766199111938, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10094162076711655, + "rewards/margins": 0.024572614580392838, + "rewards/rejected": -0.12551423907279968, + "step": 2260 + }, + { + "epoch": 0.41, + "grad_norm": 0.9619736671447754, + "learning_rate": 6.736351725141942e-06, + "log_odds_chosen": 0.8118526339530945, + "log_odds_ratio": -0.5391420722007751, + "logits/chosen": -0.37076014280319214, + "logits/rejected": -0.41540661454200745, + "logps/chosen": -0.9378563165664673, + "logps/rejected": -1.4630939960479736, + "loss": 0.964, + "nll_loss": 0.9101201295852661, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09378562867641449, + "rewards/margins": 0.05252378061413765, + "rewards/rejected": -0.14630940556526184, + "step": 2270 + }, + { + "epoch": 0.41, + "grad_norm": 1.0768691301345825, + "learning_rate": 6.7305284612025035e-06, + "log_odds_chosen": 0.6344785690307617, + "log_odds_ratio": -0.565656304359436, + "logits/chosen": -0.331177294254303, + "logits/rejected": -0.377986341714859, + "logps/chosen": -0.9524379968643188, + "logps/rejected": -1.366152048110962, + "loss": 1.0225, + "nll_loss": 0.9659032821655273, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09524379670619965, + "rewards/margins": 0.041371412575244904, + "rewards/rejected": -0.13661520183086395, + "step": 2280 + }, + { + "epoch": 0.41, + "grad_norm": 0.9441320896148682, + "learning_rate": 6.724705197263065e-06, + "log_odds_chosen": 0.4996124804019928, + "log_odds_ratio": -0.6170133352279663, + "logits/chosen": -0.39471060037612915, + "logits/rejected": -0.42773985862731934, + "logps/chosen": -1.0002835988998413, + "logps/rejected": -1.3339643478393555, + "loss": 1.0856, + "nll_loss": 1.0238593816757202, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1000283733010292, + "rewards/margins": 0.0333680734038353, + "rewards/rejected": -0.1333964318037033, + "step": 2290 + }, + { + "epoch": 0.42, + "grad_norm": 0.9333765506744385, + "learning_rate": 6.718881933323627e-06, + "log_odds_chosen": 0.5398795008659363, + "log_odds_ratio": -0.5720388889312744, + "logits/chosen": -0.33184653520584106, + "logits/rejected": -0.3918634355068207, + "logps/chosen": -0.924095630645752, + "logps/rejected": -1.2493730783462524, + "loss": 1.017, + "nll_loss": 0.9597843885421753, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09240957349538803, + "rewards/margins": 0.03252773731946945, + "rewards/rejected": -0.12493731081485748, + "step": 2300 + }, + { + "epoch": 0.42, + "grad_norm": 1.224831223487854, + "learning_rate": 6.71305866938419e-06, + "log_odds_chosen": 0.7087077498435974, + "log_odds_ratio": -0.49888911843299866, + "logits/chosen": -0.361178994178772, + "logits/rejected": -0.4183693826198578, + "logps/chosen": -0.9371638298034668, + "logps/rejected": -1.3673518896102905, + "loss": 1.0179, + "nll_loss": 0.9680109024047852, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09371639043092728, + "rewards/margins": 0.043018803000450134, + "rewards/rejected": -0.13673518598079681, + "step": 2310 + }, + { + "epoch": 0.42, + "grad_norm": 1.2092949151992798, + "learning_rate": 6.707235405444751e-06, + "log_odds_chosen": 0.6781972646713257, + "log_odds_ratio": -0.5294966697692871, + "logits/chosen": -0.42441660165786743, + "logits/rejected": -0.4300518035888672, + "logps/chosen": -0.9629390835762024, + "logps/rejected": -1.3670963048934937, + "loss": 1.0294, + "nll_loss": 0.9764898419380188, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09629391133785248, + "rewards/margins": 0.04041573405265808, + "rewards/rejected": -0.13670964539051056, + "step": 2320 + }, + { + "epoch": 0.42, + "grad_norm": 0.727232813835144, + "learning_rate": 6.701412141505313e-06, + "log_odds_chosen": 0.4993107318878174, + "log_odds_ratio": -0.6099010705947876, + "logits/chosen": -0.39678144454956055, + "logits/rejected": -0.4318881630897522, + "logps/chosen": -0.9463534355163574, + "logps/rejected": -1.2829571962356567, + "loss": 1.0367, + "nll_loss": 0.9757480621337891, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09463534504175186, + "rewards/margins": 0.03366038203239441, + "rewards/rejected": -0.12829571962356567, + "step": 2330 + }, + { + "epoch": 0.42, + "grad_norm": 0.9829471111297607, + "learning_rate": 6.695588877565876e-06, + "log_odds_chosen": 0.4144318103790283, + "log_odds_ratio": -0.6522342562675476, + "logits/chosen": -0.37262946367263794, + "logits/rejected": -0.3711271286010742, + "logps/chosen": -0.9915045499801636, + "logps/rejected": -1.2454833984375, + "loss": 1.0185, + "nll_loss": 0.953311562538147, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.09915046393871307, + "rewards/margins": 0.02539788745343685, + "rewards/rejected": -0.12454833835363388, + "step": 2340 + }, + { + "epoch": 0.42, + "grad_norm": 0.849780797958374, + "learning_rate": 6.689765613626437e-06, + "log_odds_chosen": 0.6342957615852356, + "log_odds_ratio": -0.5933780670166016, + "logits/chosen": -0.3510368764400482, + "logits/rejected": -0.38174304366111755, + "logps/chosen": -1.001947283744812, + "logps/rejected": -1.4229815006256104, + "loss": 1.038, + "nll_loss": 0.9786350131034851, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10019473731517792, + "rewards/margins": 0.04210341349244118, + "rewards/rejected": -0.1422981470823288, + "step": 2350 + }, + { + "epoch": 0.43, + "grad_norm": 0.9741145968437195, + "learning_rate": 6.6839423496869995e-06, + "log_odds_chosen": 0.5828785300254822, + "log_odds_ratio": -0.5875527262687683, + "logits/chosen": -0.3308332562446594, + "logits/rejected": -0.3834843635559082, + "logps/chosen": -0.9138933420181274, + "logps/rejected": -1.27740478515625, + "loss": 1.0159, + "nll_loss": 0.9571261405944824, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09138933569192886, + "rewards/margins": 0.03635113686323166, + "rewards/rejected": -0.12774047255516052, + "step": 2360 + }, + { + "epoch": 0.43, + "grad_norm": 0.7441773414611816, + "learning_rate": 6.678119085747561e-06, + "log_odds_chosen": 0.5344163775444031, + "log_odds_ratio": -0.6083695888519287, + "logits/chosen": -0.31939536333084106, + "logits/rejected": -0.3396163284778595, + "logps/chosen": -0.9387407302856445, + "logps/rejected": -1.2810204029083252, + "loss": 1.0482, + "nll_loss": 0.9873501658439636, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09387408196926117, + "rewards/margins": 0.034227967262268066, + "rewards/rejected": -0.12810204923152924, + "step": 2370 + }, + { + "epoch": 0.43, + "grad_norm": 0.9099089503288269, + "learning_rate": 6.672295821808123e-06, + "log_odds_chosen": 0.46537071466445923, + "log_odds_ratio": -0.6207537651062012, + "logits/chosen": -0.34910348057746887, + "logits/rejected": -0.3974645435810089, + "logps/chosen": -1.0489540100097656, + "logps/rejected": -1.3541288375854492, + "loss": 1.0742, + "nll_loss": 1.01216459274292, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10489541292190552, + "rewards/margins": 0.03051748313009739, + "rewards/rejected": -0.13541290163993835, + "step": 2380 + }, + { + "epoch": 0.43, + "grad_norm": 0.9425209164619446, + "learning_rate": 6.666472557868685e-06, + "log_odds_chosen": 0.5351072549819946, + "log_odds_ratio": -0.5699527859687805, + "logits/chosen": -0.39522382616996765, + "logits/rejected": -0.44066038727760315, + "logps/chosen": -0.9757105112075806, + "logps/rejected": -1.321337342262268, + "loss": 1.0655, + "nll_loss": 1.0084631443023682, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09757105261087418, + "rewards/margins": 0.03456268459558487, + "rewards/rejected": -0.13213373720645905, + "step": 2390 + }, + { + "epoch": 0.43, + "grad_norm": 0.7204871773719788, + "learning_rate": 6.660649293929247e-06, + "log_odds_chosen": 0.6166509985923767, + "log_odds_ratio": -0.5407067537307739, + "logits/chosen": -0.37788647413253784, + "logits/rejected": -0.39217525720596313, + "logps/chosen": -1.0124753713607788, + "logps/rejected": -1.399766206741333, + "loss": 1.0106, + "nll_loss": 0.9564822316169739, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10124754905700684, + "rewards/margins": 0.03872908279299736, + "rewards/rejected": -0.1399766206741333, + "step": 2400 + }, + { + "epoch": 0.44, + "grad_norm": 0.7321575284004211, + "learning_rate": 6.654826029989809e-06, + "log_odds_chosen": 0.48690706491470337, + "log_odds_ratio": -0.6029237508773804, + "logits/chosen": -0.3971622586250305, + "logits/rejected": -0.4110518991947174, + "logps/chosen": -0.9805097579956055, + "logps/rejected": -1.2300655841827393, + "loss": 1.0499, + "nll_loss": 0.9895747900009155, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09805097430944443, + "rewards/margins": 0.024955574423074722, + "rewards/rejected": -0.12300655990839005, + "step": 2410 + }, + { + "epoch": 0.44, + "grad_norm": 1.1652004718780518, + "learning_rate": 6.649002766050371e-06, + "log_odds_chosen": 0.6261937022209167, + "log_odds_ratio": -0.5773937106132507, + "logits/chosen": -0.3494192957878113, + "logits/rejected": -0.37246376276016235, + "logps/chosen": -1.0030348300933838, + "logps/rejected": -1.390746831893921, + "loss": 0.9871, + "nll_loss": 0.9293266534805298, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10030348598957062, + "rewards/margins": 0.03877120837569237, + "rewards/rejected": -0.13907471299171448, + "step": 2420 + }, + { + "epoch": 0.44, + "grad_norm": 1.1041061878204346, + "learning_rate": 6.643179502110933e-06, + "log_odds_chosen": 0.6732785105705261, + "log_odds_ratio": -0.5620766878128052, + "logits/chosen": -0.4026264250278473, + "logits/rejected": -0.4409736096858978, + "logps/chosen": -0.9442132115364075, + "logps/rejected": -1.4226003885269165, + "loss": 1.0086, + "nll_loss": 0.9523922801017761, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09442131966352463, + "rewards/margins": 0.0478387214243412, + "rewards/rejected": -0.14226004481315613, + "step": 2430 + }, + { + "epoch": 0.44, + "grad_norm": 1.5153762102127075, + "learning_rate": 6.637356238171495e-06, + "log_odds_chosen": 0.7149797677993774, + "log_odds_ratio": -0.5272139310836792, + "logits/chosen": -0.3368612229824066, + "logits/rejected": -0.396007776260376, + "logps/chosen": -0.9288423657417297, + "logps/rejected": -1.3468871116638184, + "loss": 1.0085, + "nll_loss": 0.9557502865791321, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09288422763347626, + "rewards/margins": 0.041804488748311996, + "rewards/rejected": -0.13468872010707855, + "step": 2440 + }, + { + "epoch": 0.44, + "grad_norm": 0.9731908440589905, + "learning_rate": 6.631532974232057e-06, + "log_odds_chosen": 0.5724431872367859, + "log_odds_ratio": -0.5888754725456238, + "logits/chosen": -0.36172300577163696, + "logits/rejected": -0.41580086946487427, + "logps/chosen": -0.9775680303573608, + "logps/rejected": -1.3245165348052979, + "loss": 0.9907, + "nll_loss": 0.931767463684082, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09775681793689728, + "rewards/margins": 0.034694839268922806, + "rewards/rejected": -0.13245165348052979, + "step": 2450 + }, + { + "epoch": 0.44, + "grad_norm": 1.017647624015808, + "learning_rate": 6.625709710292618e-06, + "log_odds_chosen": 0.754473090171814, + "log_odds_ratio": -0.5083373785018921, + "logits/chosen": -0.31709548830986023, + "logits/rejected": -0.4046868681907654, + "logps/chosen": -0.939433753490448, + "logps/rejected": -1.394449234008789, + "loss": 0.9886, + "nll_loss": 0.9377379417419434, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09394337981939316, + "rewards/margins": 0.045501552522182465, + "rewards/rejected": -0.13944493234157562, + "step": 2460 + }, + { + "epoch": 0.45, + "grad_norm": 0.9073895215988159, + "learning_rate": 6.619886446353181e-06, + "log_odds_chosen": 0.6407888531684875, + "log_odds_ratio": -0.5432699918746948, + "logits/chosen": -0.34865349531173706, + "logits/rejected": -0.3891986012458801, + "logps/chosen": -0.8993372917175293, + "logps/rejected": -1.3464696407318115, + "loss": 1.0326, + "nll_loss": 0.9782568216323853, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08993373811244965, + "rewards/margins": 0.04471323639154434, + "rewards/rejected": -0.1346469670534134, + "step": 2470 + }, + { + "epoch": 0.45, + "grad_norm": 0.6044934988021851, + "learning_rate": 6.614063182413742e-06, + "log_odds_chosen": 0.46049004793167114, + "log_odds_ratio": -0.5819103717803955, + "logits/chosen": -0.35496190190315247, + "logits/rejected": -0.39264383912086487, + "logps/chosen": -0.978875458240509, + "logps/rejected": -1.270918369293213, + "loss": 1.0487, + "nll_loss": 0.9905277490615845, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0978875383734703, + "rewards/margins": 0.029204288497567177, + "rewards/rejected": -0.12709183990955353, + "step": 2480 + }, + { + "epoch": 0.45, + "grad_norm": 1.590712547302246, + "learning_rate": 6.608239918474304e-06, + "log_odds_chosen": 0.630506694316864, + "log_odds_ratio": -0.586438775062561, + "logits/chosen": -0.38816604018211365, + "logits/rejected": -0.4340592920780182, + "logps/chosen": -0.9471012949943542, + "logps/rejected": -1.3487260341644287, + "loss": 0.9928, + "nll_loss": 0.9341457486152649, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09471012651920319, + "rewards/margins": 0.040162477642297745, + "rewards/rejected": -0.13487261533737183, + "step": 2490 + }, + { + "epoch": 0.45, + "grad_norm": 1.3605127334594727, + "learning_rate": 6.602416654534867e-06, + "log_odds_chosen": 0.6695212125778198, + "log_odds_ratio": -0.5592930912971497, + "logits/chosen": -0.36375027894973755, + "logits/rejected": -0.40861162543296814, + "logps/chosen": -0.9220840334892273, + "logps/rejected": -1.3491367101669312, + "loss": 0.9604, + "nll_loss": 0.9045180082321167, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.0922083854675293, + "rewards/margins": 0.04270528256893158, + "rewards/rejected": -0.13491368293762207, + "step": 2500 + }, + { + "epoch": 0.45, + "grad_norm": 1.2861924171447754, + "learning_rate": 6.596593390595428e-06, + "log_odds_chosen": 0.5616058707237244, + "log_odds_ratio": -0.5883401036262512, + "logits/chosen": -0.25284165143966675, + "logits/rejected": -0.37594661116600037, + "logps/chosen": -0.9259670972824097, + "logps/rejected": -1.263522982597351, + "loss": 0.9541, + "nll_loss": 0.8952864408493042, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09259669482707977, + "rewards/margins": 0.033755604177713394, + "rewards/rejected": -0.12635231018066406, + "step": 2510 + }, + { + "epoch": 0.46, + "grad_norm": 0.7333442568778992, + "learning_rate": 6.5907701266559905e-06, + "log_odds_chosen": 0.518241822719574, + "log_odds_ratio": -0.5922364592552185, + "logits/chosen": -0.3900024890899658, + "logits/rejected": -0.4332877993583679, + "logps/chosen": -0.9187110662460327, + "logps/rejected": -1.223683476448059, + "loss": 1.0167, + "nll_loss": 0.9575027227401733, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09187111258506775, + "rewards/margins": 0.030497241765260696, + "rewards/rejected": -0.12236835807561874, + "step": 2520 + }, + { + "epoch": 0.46, + "grad_norm": 1.4825656414031982, + "learning_rate": 6.584946862716553e-06, + "log_odds_chosen": 0.582595944404602, + "log_odds_ratio": -0.5741361379623413, + "logits/chosen": -0.3265005946159363, + "logits/rejected": -0.40415525436401367, + "logps/chosen": -0.9316970705986023, + "logps/rejected": -1.3179895877838135, + "loss": 0.9669, + "nll_loss": 0.9095318913459778, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09316971898078918, + "rewards/margins": 0.03862924873828888, + "rewards/rejected": -0.13179895281791687, + "step": 2530 + }, + { + "epoch": 0.46, + "grad_norm": 1.1516770124435425, + "learning_rate": 6.579123598777114e-06, + "log_odds_chosen": 0.5853735208511353, + "log_odds_ratio": -0.5575556755065918, + "logits/chosen": -0.37003493309020996, + "logits/rejected": -0.4401502013206482, + "logps/chosen": -1.0224106311798096, + "logps/rejected": -1.4017164707183838, + "loss": 1.096, + "nll_loss": 1.040209174156189, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10224107652902603, + "rewards/margins": 0.03793057054281235, + "rewards/rejected": -0.14017164707183838, + "step": 2540 + }, + { + "epoch": 0.46, + "grad_norm": 0.772507905960083, + "learning_rate": 6.573300334837676e-06, + "log_odds_chosen": 0.6209217309951782, + "log_odds_ratio": -0.5777196288108826, + "logits/chosen": -0.32888883352279663, + "logits/rejected": -0.3678010106086731, + "logps/chosen": -0.9577111005783081, + "logps/rejected": -1.3549540042877197, + "loss": 0.9416, + "nll_loss": 0.8838027715682983, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09577111154794693, + "rewards/margins": 0.03972429037094116, + "rewards/rejected": -0.1354953944683075, + "step": 2550 + }, + { + "epoch": 0.46, + "grad_norm": 0.6387426853179932, + "learning_rate": 6.567477070898238e-06, + "log_odds_chosen": 0.5519171953201294, + "log_odds_ratio": -0.5468162298202515, + "logits/chosen": -0.3772895336151123, + "logits/rejected": -0.42758235335350037, + "logps/chosen": -1.0784213542938232, + "logps/rejected": -1.4518009424209595, + "loss": 0.9968, + "nll_loss": 0.9421661496162415, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10784213244915009, + "rewards/margins": 0.03733794763684273, + "rewards/rejected": -0.14518007636070251, + "step": 2560 + }, + { + "epoch": 0.46, + "grad_norm": 0.7245638966560364, + "learning_rate": 6.5616538069588e-06, + "log_odds_chosen": 0.5130826234817505, + "log_odds_ratio": -0.6046417951583862, + "logits/chosen": -0.2720004618167877, + "logits/rejected": -0.32857561111450195, + "logps/chosen": -0.9239827990531921, + "logps/rejected": -1.2177114486694336, + "loss": 0.9891, + "nll_loss": 0.9286392331123352, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09239828586578369, + "rewards/margins": 0.029372859746217728, + "rewards/rejected": -0.12177114188671112, + "step": 2570 + }, + { + "epoch": 0.47, + "grad_norm": 0.7606828808784485, + "learning_rate": 6.555830543019362e-06, + "log_odds_chosen": 0.4750826954841614, + "log_odds_ratio": -0.5803383588790894, + "logits/chosen": -0.33285465836524963, + "logits/rejected": -0.37880927324295044, + "logps/chosen": -0.9121491312980652, + "logps/rejected": -1.1974008083343506, + "loss": 1.0248, + "nll_loss": 0.9667941927909851, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09121491014957428, + "rewards/margins": 0.028525155037641525, + "rewards/rejected": -0.11974005401134491, + "step": 2580 + }, + { + "epoch": 0.47, + "grad_norm": 1.215981125831604, + "learning_rate": 6.550007279079924e-06, + "log_odds_chosen": 0.7042616605758667, + "log_odds_ratio": -0.550471305847168, + "logits/chosen": -0.33983561396598816, + "logits/rejected": -0.36023131012916565, + "logps/chosen": -0.9646269083023071, + "logps/rejected": -1.3918030261993408, + "loss": 0.9539, + "nll_loss": 0.8988884687423706, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.096462681889534, + "rewards/margins": 0.04271761700510979, + "rewards/rejected": -0.13918031752109528, + "step": 2590 + }, + { + "epoch": 0.47, + "grad_norm": 1.175630807876587, + "learning_rate": 6.5441840151404864e-06, + "log_odds_chosen": 0.5247041583061218, + "log_odds_ratio": -0.6049832105636597, + "logits/chosen": -0.35429611802101135, + "logits/rejected": -0.3579476773738861, + "logps/chosen": -0.8990931510925293, + "logps/rejected": -1.199903130531311, + "loss": 1.0378, + "nll_loss": 0.9773503541946411, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08990932255983353, + "rewards/margins": 0.030080992728471756, + "rewards/rejected": -0.11999031156301498, + "step": 2600 + }, + { + "epoch": 0.47, + "grad_norm": 1.0978769063949585, + "learning_rate": 6.538360751201048e-06, + "log_odds_chosen": 0.40354281663894653, + "log_odds_ratio": -0.6332866549491882, + "logits/chosen": -0.3224504590034485, + "logits/rejected": -0.36072710156440735, + "logps/chosen": -1.123956322669983, + "logps/rejected": -1.387921929359436, + "loss": 1.0701, + "nll_loss": 1.0067265033721924, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11239562183618546, + "rewards/margins": 0.026396561414003372, + "rewards/rejected": -0.13879218697547913, + "step": 2610 + }, + { + "epoch": 0.47, + "grad_norm": 0.9290404915809631, + "learning_rate": 6.53253748726161e-06, + "log_odds_chosen": 0.6468437910079956, + "log_odds_ratio": -0.5385705232620239, + "logits/chosen": -0.36749547719955444, + "logits/rejected": -0.4251924157142639, + "logps/chosen": -0.9306084513664246, + "logps/rejected": -1.3251934051513672, + "loss": 0.9871, + "nll_loss": 0.9332555532455444, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09306085854768753, + "rewards/margins": 0.03945847973227501, + "rewards/rejected": -0.13251933455467224, + "step": 2620 + }, + { + "epoch": 0.48, + "grad_norm": 1.0460034608840942, + "learning_rate": 6.5267142233221725e-06, + "log_odds_chosen": 0.5371777415275574, + "log_odds_ratio": -0.6011015176773071, + "logits/chosen": -0.3856775164604187, + "logits/rejected": -0.39130908250808716, + "logps/chosen": -0.9254153966903687, + "logps/rejected": -1.268058180809021, + "loss": 1.0057, + "nll_loss": 0.9455927014350891, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09254153072834015, + "rewards/margins": 0.03426428511738777, + "rewards/rejected": -0.12680582702159882, + "step": 2630 + }, + { + "epoch": 0.48, + "grad_norm": 1.1770684719085693, + "learning_rate": 6.520890959382733e-06, + "log_odds_chosen": 0.754479706287384, + "log_odds_ratio": -0.5277892351150513, + "logits/chosen": -0.41099995374679565, + "logits/rejected": -0.46807852387428284, + "logps/chosen": -0.9225029945373535, + "logps/rejected": -1.4032045602798462, + "loss": 1.0005, + "nll_loss": 0.9477365612983704, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09225030243396759, + "rewards/margins": 0.04807015508413315, + "rewards/rejected": -0.14032046496868134, + "step": 2640 + }, + { + "epoch": 0.48, + "grad_norm": 1.7522804737091064, + "learning_rate": 6.5150676954432954e-06, + "log_odds_chosen": 0.7221927046775818, + "log_odds_ratio": -0.5742099285125732, + "logits/chosen": -0.29052841663360596, + "logits/rejected": -0.3499542772769928, + "logps/chosen": -0.8708510398864746, + "logps/rejected": -1.3332278728485107, + "loss": 0.9638, + "nll_loss": 0.9063900709152222, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08708511292934418, + "rewards/margins": 0.046237677335739136, + "rewards/rejected": -0.1333227902650833, + "step": 2650 + }, + { + "epoch": 0.48, + "grad_norm": 1.2086186408996582, + "learning_rate": 6.509244431503858e-06, + "log_odds_chosen": 0.6395819783210754, + "log_odds_ratio": -0.5765899419784546, + "logits/chosen": -0.31575196981430054, + "logits/rejected": -0.3708987832069397, + "logps/chosen": -0.8991060256958008, + "logps/rejected": -1.2960377931594849, + "loss": 1.0116, + "nll_loss": 0.953917384147644, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.0899106115102768, + "rewards/margins": 0.03969316929578781, + "rewards/rejected": -0.129603773355484, + "step": 2660 + }, + { + "epoch": 0.48, + "grad_norm": 0.7068976163864136, + "learning_rate": 6.503421167564419e-06, + "log_odds_chosen": 0.6602069139480591, + "log_odds_ratio": -0.581697940826416, + "logits/chosen": -0.332303524017334, + "logits/rejected": -0.3719666600227356, + "logps/chosen": -0.884446918964386, + "logps/rejected": -1.2823951244354248, + "loss": 0.9566, + "nll_loss": 0.8984075784683228, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08844468742609024, + "rewards/margins": 0.03979482874274254, + "rewards/rejected": -0.12823952734470367, + "step": 2670 + }, + { + "epoch": 0.48, + "grad_norm": 1.3176428079605103, + "learning_rate": 6.4975979036249815e-06, + "log_odds_chosen": 0.6552340388298035, + "log_odds_ratio": -0.5691067576408386, + "logits/chosen": -0.3273809254169464, + "logits/rejected": -0.36774808168411255, + "logps/chosen": -0.9798242449760437, + "logps/rejected": -1.389387845993042, + "loss": 0.9677, + "nll_loss": 0.9107893109321594, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09798242151737213, + "rewards/margins": 0.04095636308193207, + "rewards/rejected": -0.1389387845993042, + "step": 2680 + }, + { + "epoch": 0.49, + "grad_norm": 1.0215779542922974, + "learning_rate": 6.491774639685544e-06, + "log_odds_chosen": 1.010939598083496, + "log_odds_ratio": -0.4497356414794922, + "logits/chosen": -0.29089489579200745, + "logits/rejected": -0.38202494382858276, + "logps/chosen": -0.9118770360946655, + "logps/rejected": -1.5500422716140747, + "loss": 0.9683, + "nll_loss": 0.9233266115188599, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.09118770807981491, + "rewards/margins": 0.06381651014089584, + "rewards/rejected": -0.15500421822071075, + "step": 2690 + }, + { + "epoch": 0.49, + "grad_norm": 1.354233741760254, + "learning_rate": 6.485951375746105e-06, + "log_odds_chosen": 0.5281728506088257, + "log_odds_ratio": -0.571759045124054, + "logits/chosen": -0.3615049421787262, + "logits/rejected": -0.36989089846611023, + "logps/chosen": -0.9388942718505859, + "logps/rejected": -1.2711718082427979, + "loss": 0.994, + "nll_loss": 0.9367810487747192, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09388942271471024, + "rewards/margins": 0.03322775661945343, + "rewards/rejected": -0.12711718678474426, + "step": 2700 + }, + { + "epoch": 0.49, + "grad_norm": 1.767279863357544, + "learning_rate": 6.480128111806668e-06, + "log_odds_chosen": 0.5129133462905884, + "log_odds_ratio": -0.5640901327133179, + "logits/chosen": -0.4442782998085022, + "logits/rejected": -0.47071051597595215, + "logps/chosen": -1.0512162446975708, + "logps/rejected": -1.3872233629226685, + "loss": 1.0637, + "nll_loss": 1.0073240995407104, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.10512162744998932, + "rewards/margins": 0.033600710332393646, + "rewards/rejected": -0.13872233033180237, + "step": 2710 + }, + { + "epoch": 0.49, + "grad_norm": 1.4116398096084595, + "learning_rate": 6.47430484786723e-06, + "log_odds_chosen": 0.6858533620834351, + "log_odds_ratio": -0.571151614189148, + "logits/chosen": -0.3957204222679138, + "logits/rejected": -0.4555203914642334, + "logps/chosen": -0.7919386625289917, + "logps/rejected": -1.2326372861862183, + "loss": 1.0122, + "nll_loss": 0.9550908803939819, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07919386774301529, + "rewards/margins": 0.044069863855838776, + "rewards/rejected": -0.12326373159885406, + "step": 2720 + }, + { + "epoch": 0.49, + "grad_norm": 0.7749652862548828, + "learning_rate": 6.4684815839277905e-06, + "log_odds_chosen": 0.7230443954467773, + "log_odds_ratio": -0.537756085395813, + "logits/chosen": -0.3686821460723877, + "logits/rejected": -0.4431026577949524, + "logps/chosen": -0.9403525590896606, + "logps/rejected": -1.4275939464569092, + "loss": 0.9609, + "nll_loss": 0.9071076512336731, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09403526782989502, + "rewards/margins": 0.04872414469718933, + "rewards/rejected": -0.14275939762592316, + "step": 2730 + }, + { + "epoch": 0.49, + "grad_norm": 0.7996389865875244, + "learning_rate": 6.462658319988353e-06, + "log_odds_chosen": 0.8235070109367371, + "log_odds_ratio": -0.5080887079238892, + "logits/chosen": -0.34345048666000366, + "logits/rejected": -0.42161065340042114, + "logps/chosen": -0.9218958020210266, + "logps/rejected": -1.4511245489120483, + "loss": 0.9716, + "nll_loss": 0.9207679033279419, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09218958765268326, + "rewards/margins": 0.05292288213968277, + "rewards/rejected": -0.14511245489120483, + "step": 2740 + }, + { + "epoch": 0.5, + "grad_norm": 1.5153478384017944, + "learning_rate": 6.456835056048915e-06, + "log_odds_chosen": 0.69731605052948, + "log_odds_ratio": -0.526605486869812, + "logits/chosen": -0.4176858961582184, + "logits/rejected": -0.4533267021179199, + "logps/chosen": -1.0750231742858887, + "logps/rejected": -1.5430954694747925, + "loss": 1.0532, + "nll_loss": 1.000571608543396, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10750231891870499, + "rewards/margins": 0.04680723696947098, + "rewards/rejected": -0.15430954098701477, + "step": 2750 + }, + { + "epoch": 0.5, + "grad_norm": 1.3526057004928589, + "learning_rate": 6.451011792109477e-06, + "log_odds_chosen": 0.6370762586593628, + "log_odds_ratio": -0.5361495018005371, + "logits/chosen": -0.36857062578201294, + "logits/rejected": -0.4371257722377777, + "logps/chosen": -0.9362856149673462, + "logps/rejected": -1.307198405265808, + "loss": 1.0073, + "nll_loss": 0.95367032289505, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09362856298685074, + "rewards/margins": 0.03709127753973007, + "rewards/rejected": -0.1307198405265808, + "step": 2760 + }, + { + "epoch": 0.5, + "eval_log_odds_chosen": 0.5925426483154297, + "eval_log_odds_ratio": -0.5730059146881104, + "eval_logits/chosen": -0.3731546401977539, + "eval_logits/rejected": -0.40579819679260254, + "eval_logps/chosen": -0.9562295079231262, + "eval_logps/rejected": -1.3317075967788696, + "eval_loss": 1.0005911588668823, + "eval_nll_loss": 0.943290650844574, + "eval_rewards/accuracies": 0.6516516804695129, + "eval_rewards/chosen": -0.0956229493021965, + "eval_rewards/margins": 0.037547819316387177, + "eval_rewards/rejected": -0.13317078351974487, + "eval_runtime": 2286.9458, + "eval_samples_per_second": 1.019, + "eval_steps_per_second": 1.019, + "step": 2768 + }, + { + "epoch": 0.5, + "grad_norm": 1.127935528755188, + "learning_rate": 6.445188528170039e-06, + "log_odds_chosen": 0.40119534730911255, + "log_odds_ratio": -0.6752806901931763, + "logits/chosen": -0.32637766003608704, + "logits/rejected": -0.34356263279914856, + "logps/chosen": -0.938979983329773, + "logps/rejected": -1.1499440670013428, + "loss": 0.9678, + "nll_loss": 0.9002933502197266, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.0938979983329773, + "rewards/margins": 0.021096404641866684, + "rewards/rejected": -0.11499440670013428, + "step": 2770 + }, + { + "epoch": 0.5, + "grad_norm": 0.8931524157524109, + "learning_rate": 6.439365264230601e-06, + "log_odds_chosen": 0.5248688459396362, + "log_odds_ratio": -0.6203723549842834, + "logits/chosen": -0.4128592610359192, + "logits/rejected": -0.46203988790512085, + "logps/chosen": -0.9933439493179321, + "logps/rejected": -1.3095693588256836, + "loss": 1.1102, + "nll_loss": 1.0481542348861694, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09933439642190933, + "rewards/margins": 0.03162253648042679, + "rewards/rejected": -0.13095691800117493, + "step": 2780 + }, + { + "epoch": 0.5, + "grad_norm": 1.6323082447052002, + "learning_rate": 6.4335420002911636e-06, + "log_odds_chosen": 0.42494791746139526, + "log_odds_ratio": -0.6246153712272644, + "logits/chosen": -0.38673996925354004, + "logits/rejected": -0.42045697569847107, + "logps/chosen": -1.0206129550933838, + "logps/rejected": -1.2617638111114502, + "loss": 0.9711, + "nll_loss": 0.9086559414863586, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10206129401922226, + "rewards/margins": 0.02411508932709694, + "rewards/rejected": -0.1261763870716095, + "step": 2790 + }, + { + "epoch": 0.51, + "grad_norm": 1.0086363554000854, + "learning_rate": 6.427718736351725e-06, + "log_odds_chosen": 0.6424874067306519, + "log_odds_ratio": -0.5433818101882935, + "logits/chosen": -0.4411230981349945, + "logits/rejected": -0.45607155561447144, + "logps/chosen": -0.8835335969924927, + "logps/rejected": -1.297825574874878, + "loss": 0.9854, + "nll_loss": 0.9310863614082336, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08835335820913315, + "rewards/margins": 0.041429195553064346, + "rewards/rejected": -0.1297825574874878, + "step": 2800 + }, + { + "epoch": 0.51, + "grad_norm": 0.996560275554657, + "learning_rate": 6.421895472412287e-06, + "log_odds_chosen": 0.647859513759613, + "log_odds_ratio": -0.5474862456321716, + "logits/chosen": -0.36230072379112244, + "logits/rejected": -0.3996432423591614, + "logps/chosen": -0.9513490796089172, + "logps/rejected": -1.3442670106887817, + "loss": 0.9573, + "nll_loss": 0.9025037884712219, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09513489902019501, + "rewards/margins": 0.039291806519031525, + "rewards/rejected": -0.13442671298980713, + "step": 2810 + }, + { + "epoch": 0.51, + "grad_norm": 0.7576774954795837, + "learning_rate": 6.416072208472849e-06, + "log_odds_chosen": 0.8065885305404663, + "log_odds_ratio": -0.5174297094345093, + "logits/chosen": -0.3431718349456787, + "logits/rejected": -0.37808164954185486, + "logps/chosen": -0.828266978263855, + "logps/rejected": -1.2940632104873657, + "loss": 0.9534, + "nll_loss": 0.9016314744949341, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08282670378684998, + "rewards/margins": 0.04657962545752525, + "rewards/rejected": -0.12940633296966553, + "step": 2820 + }, + { + "epoch": 0.51, + "grad_norm": 1.1549729108810425, + "learning_rate": 6.41024894453341e-06, + "log_odds_chosen": 0.37550097703933716, + "log_odds_ratio": -0.6334739327430725, + "logits/chosen": -0.42225581407546997, + "logits/rejected": -0.4576474130153656, + "logps/chosen": -0.941022515296936, + "logps/rejected": -1.16748046875, + "loss": 1.0423, + "nll_loss": 0.9789831042289734, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09410224854946136, + "rewards/margins": 0.022645797580480576, + "rewards/rejected": -0.11674805730581284, + "step": 2830 + }, + { + "epoch": 0.51, + "grad_norm": 1.2891383171081543, + "learning_rate": 6.4044256805939726e-06, + "log_odds_chosen": 0.7490788698196411, + "log_odds_ratio": -0.5351554751396179, + "logits/chosen": -0.3456578850746155, + "logits/rejected": -0.40108785033226013, + "logps/chosen": -0.8461182713508606, + "logps/rejected": -1.3408584594726562, + "loss": 0.9503, + "nll_loss": 0.8967713117599487, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08461182564496994, + "rewards/margins": 0.04947403073310852, + "rewards/rejected": -0.13408586382865906, + "step": 2840 + }, + { + "epoch": 0.51, + "grad_norm": 0.6824889183044434, + "learning_rate": 6.398602416654535e-06, + "log_odds_chosen": 0.8293973207473755, + "log_odds_ratio": -0.5298095941543579, + "logits/chosen": -0.3383901119232178, + "logits/rejected": -0.37419217824935913, + "logps/chosen": -0.8970960378646851, + "logps/rejected": -1.441548228263855, + "loss": 0.9307, + "nll_loss": 0.8776991963386536, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08970960974693298, + "rewards/margins": 0.05444520711898804, + "rewards/rejected": -0.14415481686592102, + "step": 2850 + }, + { + "epoch": 0.52, + "grad_norm": 1.3923622369766235, + "learning_rate": 6.392779152715096e-06, + "log_odds_chosen": 0.6607835292816162, + "log_odds_ratio": -0.5144139528274536, + "logits/chosen": -0.3709713816642761, + "logits/rejected": -0.44595274329185486, + "logps/chosen": -1.0105422735214233, + "logps/rejected": -1.4535988569259644, + "loss": 1.023, + "nll_loss": 0.971581757068634, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.10105422884225845, + "rewards/margins": 0.044305648654699326, + "rewards/rejected": -0.14535988867282867, + "step": 2860 + }, + { + "epoch": 0.52, + "grad_norm": 0.8290739059448242, + "learning_rate": 6.386955888775659e-06, + "log_odds_chosen": 0.5817979574203491, + "log_odds_ratio": -0.6108132004737854, + "logits/chosen": -0.4003733694553375, + "logits/rejected": -0.4213009774684906, + "logps/chosen": -0.8927229046821594, + "logps/rejected": -1.2568292617797852, + "loss": 0.996, + "nll_loss": 0.934949517250061, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08927230536937714, + "rewards/margins": 0.036410633474588394, + "rewards/rejected": -0.12568292021751404, + "step": 2870 + }, + { + "epoch": 0.52, + "grad_norm": 1.026987910270691, + "learning_rate": 6.381132624836221e-06, + "log_odds_chosen": 0.9320286512374878, + "log_odds_ratio": -0.46994876861572266, + "logits/chosen": -0.3082696795463562, + "logits/rejected": -0.36887750029563904, + "logps/chosen": -0.7973732948303223, + "logps/rejected": -1.398707628250122, + "loss": 0.9171, + "nll_loss": 0.8701435327529907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07973732799291611, + "rewards/margins": 0.0601334273815155, + "rewards/rejected": -0.1398707628250122, + "step": 2880 + }, + { + "epoch": 0.52, + "grad_norm": 1.0939037799835205, + "learning_rate": 6.375309360896782e-06, + "log_odds_chosen": 0.3428114056587219, + "log_odds_ratio": -0.6756697297096252, + "logits/chosen": -0.3726629614830017, + "logits/rejected": -0.3697716295719147, + "logps/chosen": -1.040447473526001, + "logps/rejected": -1.2464678287506104, + "loss": 1.0178, + "nll_loss": 0.9502217173576355, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10404475033283234, + "rewards/margins": 0.020602049306035042, + "rewards/rejected": -0.12464678287506104, + "step": 2890 + }, + { + "epoch": 0.52, + "grad_norm": 0.9675848484039307, + "learning_rate": 6.369486096957344e-06, + "log_odds_chosen": 0.7951258420944214, + "log_odds_ratio": -0.4850694537162781, + "logits/chosen": -0.2807965576648712, + "logits/rejected": -0.3710668087005615, + "logps/chosen": -0.9030061960220337, + "logps/rejected": -1.4071217775344849, + "loss": 0.9388, + "nll_loss": 0.8902514576911926, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09030061960220337, + "rewards/margins": 0.050411563366651535, + "rewards/rejected": -0.140712171792984, + "step": 2900 + }, + { + "epoch": 0.53, + "grad_norm": 1.603076696395874, + "learning_rate": 6.363662833017906e-06, + "log_odds_chosen": 0.6028963923454285, + "log_odds_ratio": -0.6104676127433777, + "logits/chosen": -0.31884264945983887, + "logits/rejected": -0.3657948076725006, + "logps/chosen": -1.0222609043121338, + "logps/rejected": -1.4125055074691772, + "loss": 1.0856, + "nll_loss": 1.024524450302124, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10222609341144562, + "rewards/margins": 0.039024461060762405, + "rewards/rejected": -0.14125055074691772, + "step": 2910 + }, + { + "epoch": 0.53, + "grad_norm": 1.0004611015319824, + "learning_rate": 6.357839569078468e-06, + "log_odds_chosen": 0.443314790725708, + "log_odds_ratio": -0.6308013200759888, + "logits/chosen": -0.31897619366645813, + "logits/rejected": -0.3597314953804016, + "logps/chosen": -0.9442898035049438, + "logps/rejected": -1.2072503566741943, + "loss": 0.908, + "nll_loss": 0.8449575304985046, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09442898631095886, + "rewards/margins": 0.02629604935646057, + "rewards/rejected": -0.12072502076625824, + "step": 2920 + }, + { + "epoch": 0.53, + "grad_norm": 0.8449299335479736, + "learning_rate": 6.35201630513903e-06, + "log_odds_chosen": 0.7141034007072449, + "log_odds_ratio": -0.5272042155265808, + "logits/chosen": -0.29719918966293335, + "logits/rejected": -0.3617883622646332, + "logps/chosen": -0.9249895215034485, + "logps/rejected": -1.4064514636993408, + "loss": 0.9018, + "nll_loss": 0.849094569683075, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09249895811080933, + "rewards/margins": 0.04814619570970535, + "rewards/rejected": -0.14064516127109528, + "step": 2930 + }, + { + "epoch": 0.53, + "grad_norm": 0.9977993369102478, + "learning_rate": 6.346193041199592e-06, + "log_odds_chosen": 0.5973039865493774, + "log_odds_ratio": -0.5831558108329773, + "logits/chosen": -0.36963245272636414, + "logits/rejected": -0.3873246908187866, + "logps/chosen": -1.0007551908493042, + "logps/rejected": -1.412858247756958, + "loss": 1.0077, + "nll_loss": 0.9494079351425171, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10007552057504654, + "rewards/margins": 0.04121030494570732, + "rewards/rejected": -0.14128582179546356, + "step": 2940 + }, + { + "epoch": 0.53, + "grad_norm": 0.69256192445755, + "learning_rate": 6.340369777260154e-06, + "log_odds_chosen": 0.784423828125, + "log_odds_ratio": -0.5472939014434814, + "logits/chosen": -0.3177037835121155, + "logits/rejected": -0.3788725733757019, + "logps/chosen": -0.9119611978530884, + "logps/rejected": -1.4428807497024536, + "loss": 0.9072, + "nll_loss": 0.852469801902771, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09119612723588943, + "rewards/margins": 0.05309196189045906, + "rewards/rejected": -0.1442880928516388, + "step": 2950 + }, + { + "epoch": 0.53, + "grad_norm": 0.8197088837623596, + "learning_rate": 6.334546513320716e-06, + "log_odds_chosen": 0.7240030169487, + "log_odds_ratio": -0.5557405948638916, + "logits/chosen": -0.3689579963684082, + "logits/rejected": -0.4157228469848633, + "logps/chosen": -0.9546843767166138, + "logps/rejected": -1.4102638959884644, + "loss": 1.0052, + "nll_loss": 0.9496715664863586, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0954684391617775, + "rewards/margins": 0.04555796831846237, + "rewards/rejected": -0.14102640748023987, + "step": 2960 + }, + { + "epoch": 0.54, + "grad_norm": 1.157364845275879, + "learning_rate": 6.328723249381278e-06, + "log_odds_chosen": 0.46397870779037476, + "log_odds_ratio": -0.6432394981384277, + "logits/chosen": -0.36046457290649414, + "logits/rejected": -0.32585659623146057, + "logps/chosen": -0.9779598116874695, + "logps/rejected": -1.2871477603912354, + "loss": 0.9652, + "nll_loss": 0.9008302688598633, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0977959856390953, + "rewards/margins": 0.03091878816485405, + "rewards/rejected": -0.12871477007865906, + "step": 2970 + }, + { + "epoch": 0.54, + "grad_norm": 0.8545392155647278, + "learning_rate": 6.32289998544184e-06, + "log_odds_chosen": 0.7115742564201355, + "log_odds_ratio": -0.5449367761611938, + "logits/chosen": -0.3283035159111023, + "logits/rejected": -0.39339348673820496, + "logps/chosen": -0.8771149516105652, + "logps/rejected": -1.3346948623657227, + "loss": 1.0092, + "nll_loss": 0.9547283053398132, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08771149814128876, + "rewards/margins": 0.04575798287987709, + "rewards/rejected": -0.13346949219703674, + "step": 2980 + }, + { + "epoch": 0.54, + "grad_norm": 0.9508376717567444, + "learning_rate": 6.317076721502401e-06, + "log_odds_chosen": 0.532355546951294, + "log_odds_ratio": -0.5811368227005005, + "logits/chosen": -0.38911187648773193, + "logits/rejected": -0.42666807770729065, + "logps/chosen": -1.050180435180664, + "logps/rejected": -1.404990792274475, + "loss": 0.9887, + "nll_loss": 0.9305723309516907, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10501804202795029, + "rewards/margins": 0.03548102825880051, + "rewards/rejected": -0.140499085187912, + "step": 2990 + }, + { + "epoch": 0.54, + "grad_norm": 1.369917631149292, + "learning_rate": 6.311253457562964e-06, + "log_odds_chosen": 0.5318782329559326, + "log_odds_ratio": -0.5593219995498657, + "logits/chosen": -0.4074210226535797, + "logits/rejected": -0.4076215326786041, + "logps/chosen": -0.960712730884552, + "logps/rejected": -1.3129749298095703, + "loss": 0.9791, + "nll_loss": 0.9231414794921875, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0960712656378746, + "rewards/margins": 0.03522622585296631, + "rewards/rejected": -0.1312974989414215, + "step": 3000 + }, + { + "epoch": 0.54, + "grad_norm": 2.4820430278778076, + "learning_rate": 6.305430193623526e-06, + "log_odds_chosen": 0.6888760328292847, + "log_odds_ratio": -0.5727877616882324, + "logits/chosen": -0.3978230953216553, + "logits/rejected": -0.40785154700279236, + "logps/chosen": -0.9991810917854309, + "logps/rejected": -1.4873907566070557, + "loss": 0.9746, + "nll_loss": 0.9173223376274109, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09991810470819473, + "rewards/margins": 0.04882097989320755, + "rewards/rejected": -0.1487390697002411, + "step": 3010 + }, + { + "epoch": 0.55, + "grad_norm": 1.1071016788482666, + "learning_rate": 6.299606929684087e-06, + "log_odds_chosen": 0.49799299240112305, + "log_odds_ratio": -0.6224324107170105, + "logits/chosen": -0.3779663145542145, + "logits/rejected": -0.3969855308532715, + "logps/chosen": -1.0049419403076172, + "logps/rejected": -1.3126152753829956, + "loss": 1.0151, + "nll_loss": 0.9528893232345581, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10049420595169067, + "rewards/margins": 0.030767327174544334, + "rewards/rejected": -0.13126154243946075, + "step": 3020 + }, + { + "epoch": 0.55, + "grad_norm": 0.5869792103767395, + "learning_rate": 6.29378366574465e-06, + "log_odds_chosen": 0.6354817748069763, + "log_odds_ratio": -0.5479956865310669, + "logits/chosen": -0.3983832001686096, + "logits/rejected": -0.41133037209510803, + "logps/chosen": -0.9372811317443848, + "logps/rejected": -1.3125584125518799, + "loss": 1.0118, + "nll_loss": 0.9569603800773621, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09372811019420624, + "rewards/margins": 0.037527717649936676, + "rewards/rejected": -0.1312558352947235, + "step": 3030 + }, + { + "epoch": 0.55, + "grad_norm": 0.6390839219093323, + "learning_rate": 6.287960401805212e-06, + "log_odds_chosen": 0.8298152089118958, + "log_odds_ratio": -0.5246734023094177, + "logits/chosen": -0.3871910870075226, + "logits/rejected": -0.4038742184638977, + "logps/chosen": -0.9234312772750854, + "logps/rejected": -1.4239513874053955, + "loss": 1.0285, + "nll_loss": 0.9760168194770813, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09234314411878586, + "rewards/margins": 0.0500519797205925, + "rewards/rejected": -0.14239510893821716, + "step": 3040 + }, + { + "epoch": 0.55, + "grad_norm": 1.0352649688720703, + "learning_rate": 6.2821371378657734e-06, + "log_odds_chosen": 0.7161882519721985, + "log_odds_ratio": -0.5397850275039673, + "logits/chosen": -0.30927398800849915, + "logits/rejected": -0.3645241856575012, + "logps/chosen": -0.9100635647773743, + "logps/rejected": -1.3684438467025757, + "loss": 0.9382, + "nll_loss": 0.8842523694038391, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09100636094808578, + "rewards/margins": 0.04583803564310074, + "rewards/rejected": -0.13684439659118652, + "step": 3050 + }, + { + "epoch": 0.55, + "grad_norm": 1.2245994806289673, + "learning_rate": 6.276313873926336e-06, + "log_odds_chosen": 0.3765376806259155, + "log_odds_ratio": -0.6475566029548645, + "logits/chosen": -0.35767942667007446, + "logits/rejected": -0.40610751509666443, + "logps/chosen": -0.9529776573181152, + "logps/rejected": -1.1977792978286743, + "loss": 0.9646, + "nll_loss": 0.8998012542724609, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09529776871204376, + "rewards/margins": 0.024480151012539864, + "rewards/rejected": -0.11977791786193848, + "step": 3060 + }, + { + "epoch": 0.55, + "grad_norm": 0.6054956912994385, + "learning_rate": 6.270490609986898e-06, + "log_odds_chosen": 0.49144425988197327, + "log_odds_ratio": -0.6276726722717285, + "logits/chosen": -0.3699381649494171, + "logits/rejected": -0.39319050312042236, + "logps/chosen": -0.9125950932502747, + "logps/rejected": -1.2397955656051636, + "loss": 1.0078, + "nll_loss": 0.9449881315231323, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09125951677560806, + "rewards/margins": 0.03272005170583725, + "rewards/rejected": -0.12397956848144531, + "step": 3070 + }, + { + "epoch": 0.56, + "grad_norm": 1.454533576965332, + "learning_rate": 6.264667346047459e-06, + "log_odds_chosen": 0.5676583051681519, + "log_odds_ratio": -0.6090052723884583, + "logits/chosen": -0.39846283197402954, + "logits/rejected": -0.4198426306247711, + "logps/chosen": -0.9415773153305054, + "logps/rejected": -1.296958327293396, + "loss": 1.0253, + "nll_loss": 0.9643552899360657, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09415774047374725, + "rewards/margins": 0.03553809970617294, + "rewards/rejected": -0.1296958327293396, + "step": 3080 + }, + { + "epoch": 0.56, + "grad_norm": 1.0474345684051514, + "learning_rate": 6.258844082108021e-06, + "log_odds_chosen": 0.6766977310180664, + "log_odds_ratio": -0.5505935549736023, + "logits/chosen": -0.37675073742866516, + "logits/rejected": -0.40171122550964355, + "logps/chosen": -1.0111701488494873, + "logps/rejected": -1.48032546043396, + "loss": 1.0248, + "nll_loss": 0.9696931838989258, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10111702978610992, + "rewards/margins": 0.04691552370786667, + "rewards/rejected": -0.1480325609445572, + "step": 3090 + }, + { + "epoch": 0.56, + "grad_norm": 0.677821695804596, + "learning_rate": 6.253020818168583e-06, + "log_odds_chosen": 0.806612491607666, + "log_odds_ratio": -0.49709954857826233, + "logits/chosen": -0.35511764883995056, + "logits/rejected": -0.3808368742465973, + "logps/chosen": -0.9699333310127258, + "logps/rejected": -1.517735481262207, + "loss": 0.9724, + "nll_loss": 0.9226738214492798, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.0969933345913887, + "rewards/margins": 0.05478020757436752, + "rewards/rejected": -0.15177355706691742, + "step": 3100 + }, + { + "epoch": 0.56, + "grad_norm": 1.076366901397705, + "learning_rate": 6.247197554229145e-06, + "log_odds_chosen": 0.5176805853843689, + "log_odds_ratio": -0.6054424047470093, + "logits/chosen": -0.368263840675354, + "logits/rejected": -0.37648746371269226, + "logps/chosen": -1.000482439994812, + "logps/rejected": -1.343254804611206, + "loss": 1.0322, + "nll_loss": 0.9716836214065552, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1000482439994812, + "rewards/margins": 0.03427725285291672, + "rewards/rejected": -0.13432548940181732, + "step": 3110 + }, + { + "epoch": 0.56, + "grad_norm": 0.7154731750488281, + "learning_rate": 6.241374290289707e-06, + "log_odds_chosen": 0.6836196184158325, + "log_odds_ratio": -0.5482660531997681, + "logits/chosen": -0.3415481448173523, + "logits/rejected": -0.3720071315765381, + "logps/chosen": -0.8983786702156067, + "logps/rejected": -1.293168306350708, + "loss": 0.9264, + "nll_loss": 0.8715240359306335, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08983787894248962, + "rewards/margins": 0.03947895020246506, + "rewards/rejected": -0.12931683659553528, + "step": 3120 + }, + { + "epoch": 0.57, + "grad_norm": 0.8524779677391052, + "learning_rate": 6.235551026350269e-06, + "log_odds_chosen": 0.6569803953170776, + "log_odds_ratio": -0.5352288484573364, + "logits/chosen": -0.3533919155597687, + "logits/rejected": -0.43142709136009216, + "logps/chosen": -0.9483108520507812, + "logps/rejected": -1.3620734214782715, + "loss": 1.0218, + "nll_loss": 0.96832275390625, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09483110159635544, + "rewards/margins": 0.04137624427676201, + "rewards/rejected": -0.13620734214782715, + "step": 3130 + }, + { + "epoch": 0.57, + "grad_norm": 1.4360308647155762, + "learning_rate": 6.229727762410831e-06, + "log_odds_chosen": 0.6728376746177673, + "log_odds_ratio": -0.5485900640487671, + "logits/chosen": -0.4550979733467102, + "logits/rejected": -0.4855107367038727, + "logps/chosen": -0.9604121446609497, + "logps/rejected": -1.4198859930038452, + "loss": 1.0696, + "nll_loss": 1.014716386795044, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0960412248969078, + "rewards/margins": 0.04594738408923149, + "rewards/rejected": -0.141988605260849, + "step": 3140 + }, + { + "epoch": 0.57, + "grad_norm": 0.6961885094642639, + "learning_rate": 6.223904498471393e-06, + "log_odds_chosen": 0.4872314929962158, + "log_odds_ratio": -0.6163089871406555, + "logits/chosen": -0.46947789192199707, + "logits/rejected": -0.4593765139579773, + "logps/chosen": -0.9408215284347534, + "logps/rejected": -1.2545238733291626, + "loss": 1.0243, + "nll_loss": 0.9627069234848022, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09408216178417206, + "rewards/margins": 0.03137023001909256, + "rewards/rejected": -0.12545239925384521, + "step": 3150 + }, + { + "epoch": 0.57, + "grad_norm": 2.0956509113311768, + "learning_rate": 6.2180812345319555e-06, + "log_odds_chosen": 0.9468412399291992, + "log_odds_ratio": -0.47234565019607544, + "logits/chosen": -0.3197837471961975, + "logits/rejected": -0.3366406261920929, + "logps/chosen": -0.8767255544662476, + "logps/rejected": -1.4780380725860596, + "loss": 0.9153, + "nll_loss": 0.8680181503295898, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08767254650592804, + "rewards/margins": 0.060131270438432693, + "rewards/rejected": -0.14780382812023163, + "step": 3160 + }, + { + "epoch": 0.57, + "grad_norm": 1.5426450967788696, + "learning_rate": 6.212257970592516e-06, + "log_odds_chosen": 0.5291934609413147, + "log_odds_ratio": -0.5959833860397339, + "logits/chosen": -0.45292121171951294, + "logits/rejected": -0.449939489364624, + "logps/chosen": -0.976009726524353, + "logps/rejected": -1.3378291130065918, + "loss": 1.0219, + "nll_loss": 0.9623022079467773, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09760098159313202, + "rewards/margins": 0.03618193417787552, + "rewards/rejected": -0.13378292322158813, + "step": 3170 + }, + { + "epoch": 0.57, + "grad_norm": 1.670472264289856, + "learning_rate": 6.206434706653078e-06, + "log_odds_chosen": 0.671677827835083, + "log_odds_ratio": -0.6096156239509583, + "logits/chosen": -0.4184805452823639, + "logits/rejected": -0.44354119896888733, + "logps/chosen": -1.0512487888336182, + "logps/rejected": -1.49226975440979, + "loss": 1.0419, + "nll_loss": 0.980981171131134, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10512487590312958, + "rewards/margins": 0.04410209506750107, + "rewards/rejected": -0.14922699332237244, + "step": 3180 + }, + { + "epoch": 0.58, + "grad_norm": 1.4701049327850342, + "learning_rate": 6.200611442713641e-06, + "log_odds_chosen": 0.6520159244537354, + "log_odds_ratio": -0.5479967594146729, + "logits/chosen": -0.34326881170272827, + "logits/rejected": -0.40131497383117676, + "logps/chosen": -0.9332335591316223, + "logps/rejected": -1.3544549942016602, + "loss": 1.0696, + "nll_loss": 1.0148441791534424, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09332336485385895, + "rewards/margins": 0.042122118175029755, + "rewards/rejected": -0.1354454755783081, + "step": 3190 + }, + { + "epoch": 0.58, + "grad_norm": 1.3066092729568481, + "learning_rate": 6.194788178774202e-06, + "log_odds_chosen": 0.6795617938041687, + "log_odds_ratio": -0.5576266050338745, + "logits/chosen": -0.3325703740119934, + "logits/rejected": -0.3916395604610443, + "logps/chosen": -1.0032011270523071, + "logps/rejected": -1.4542853832244873, + "loss": 1.0795, + "nll_loss": 1.0237706899642944, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10032012313604355, + "rewards/margins": 0.04510842263698578, + "rewards/rejected": -0.14542852342128754, + "step": 3200 + }, + { + "epoch": 0.58, + "grad_norm": 1.0712743997573853, + "learning_rate": 6.1889649148347645e-06, + "log_odds_chosen": 0.537954568862915, + "log_odds_ratio": -0.5886083841323853, + "logits/chosen": -0.4207335114479065, + "logits/rejected": -0.39842933416366577, + "logps/chosen": -1.0041154623031616, + "logps/rejected": -1.3683401346206665, + "loss": 1.035, + "nll_loss": 0.9761091470718384, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.1004115492105484, + "rewards/margins": 0.03642246127128601, + "rewards/rejected": -0.13683399558067322, + "step": 3210 + }, + { + "epoch": 0.58, + "grad_norm": 1.3003288507461548, + "learning_rate": 6.183141650895327e-06, + "log_odds_chosen": 0.4215551018714905, + "log_odds_ratio": -0.673181414604187, + "logits/chosen": -0.38877248764038086, + "logits/rejected": -0.40194326639175415, + "logps/chosen": -1.0147056579589844, + "logps/rejected": -1.3024789094924927, + "loss": 1.0088, + "nll_loss": 0.9414365887641907, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10147056728601456, + "rewards/margins": 0.028777319937944412, + "rewards/rejected": -0.13024787604808807, + "step": 3220 + }, + { + "epoch": 0.58, + "grad_norm": 1.1259840726852417, + "learning_rate": 6.177318386955889e-06, + "log_odds_chosen": 0.65233314037323, + "log_odds_ratio": -0.5480107069015503, + "logits/chosen": -0.38454505801200867, + "logits/rejected": -0.3793458938598633, + "logps/chosen": -0.920840859413147, + "logps/rejected": -1.2983678579330444, + "loss": 0.9632, + "nll_loss": 0.9083762168884277, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09208408743143082, + "rewards/margins": 0.03775270655751228, + "rewards/rejected": -0.1298367828130722, + "step": 3230 + }, + { + "epoch": 0.59, + "grad_norm": 1.1241098642349243, + "learning_rate": 6.1714951230164505e-06, + "log_odds_chosen": 0.44269585609436035, + "log_odds_ratio": -0.6357613801956177, + "logits/chosen": -0.4421209394931793, + "logits/rejected": -0.4722370207309723, + "logps/chosen": -1.0277878046035767, + "logps/rejected": -1.2992737293243408, + "loss": 1.0363, + "nll_loss": 0.9727180600166321, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10277877748012543, + "rewards/margins": 0.02714858576655388, + "rewards/rejected": -0.1299273669719696, + "step": 3240 + }, + { + "epoch": 0.59, + "grad_norm": 1.237729549407959, + "learning_rate": 6.165671859077013e-06, + "log_odds_chosen": 1.0110903978347778, + "log_odds_ratio": -0.46358394622802734, + "logits/chosen": -0.28028032183647156, + "logits/rejected": -0.3303956091403961, + "logps/chosen": -0.9014646410942078, + "logps/rejected": -1.5677226781845093, + "loss": 0.9381, + "nll_loss": 0.8917421102523804, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09014646708965302, + "rewards/margins": 0.06662581115961075, + "rewards/rejected": -0.15677228569984436, + "step": 3250 + }, + { + "epoch": 0.59, + "grad_norm": 1.3306907415390015, + "learning_rate": 6.159848595137574e-06, + "log_odds_chosen": 0.5208662748336792, + "log_odds_ratio": -0.6377922296524048, + "logits/chosen": -0.4297618865966797, + "logits/rejected": -0.42744913697242737, + "logps/chosen": -1.0824130773544312, + "logps/rejected": -1.4034180641174316, + "loss": 1.0254, + "nll_loss": 0.9615737795829773, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10824130475521088, + "rewards/margins": 0.03210049122571945, + "rewards/rejected": -0.14034178853034973, + "step": 3260 + }, + { + "epoch": 0.59, + "grad_norm": 1.011088490486145, + "learning_rate": 6.154025331198136e-06, + "log_odds_chosen": 0.3441120982170105, + "log_odds_ratio": -0.660956621170044, + "logits/chosen": -0.4124126434326172, + "logits/rejected": -0.3904462456703186, + "logps/chosen": -1.0279223918914795, + "logps/rejected": -1.287450909614563, + "loss": 1.0092, + "nll_loss": 0.9431363940238953, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10279224067926407, + "rewards/margins": 0.025952842086553574, + "rewards/rejected": -0.12874507904052734, + "step": 3270 + }, + { + "epoch": 0.59, + "grad_norm": 1.1600735187530518, + "learning_rate": 6.148202067258698e-06, + "log_odds_chosen": 0.9012781381607056, + "log_odds_ratio": -0.4865642189979553, + "logits/chosen": -0.33863896131515503, + "logits/rejected": -0.36632639169692993, + "logps/chosen": -0.8989202380180359, + "logps/rejected": -1.4705233573913574, + "loss": 0.8402, + "nll_loss": 0.7914935946464539, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.08989204466342926, + "rewards/margins": 0.05716029554605484, + "rewards/rejected": -0.1470523327589035, + "step": 3280 + }, + { + "epoch": 0.59, + "grad_norm": 0.9526046514511108, + "learning_rate": 6.14237880331926e-06, + "log_odds_chosen": 0.625182032585144, + "log_odds_ratio": -0.5688682794570923, + "logits/chosen": -0.38069894909858704, + "logits/rejected": -0.4275147318840027, + "logps/chosen": -0.8982593417167664, + "logps/rejected": -1.2962671518325806, + "loss": 0.9541, + "nll_loss": 0.8972567319869995, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08982594311237335, + "rewards/margins": 0.039800770580768585, + "rewards/rejected": -0.12962672114372253, + "step": 3290 + }, + { + "epoch": 0.6, + "grad_norm": 0.8836661577224731, + "learning_rate": 6.136555539379822e-06, + "log_odds_chosen": 0.6172818541526794, + "log_odds_ratio": -0.5925203561782837, + "logits/chosen": -0.4373806416988373, + "logits/rejected": -0.5029473304748535, + "logps/chosen": -0.9505300521850586, + "logps/rejected": -1.3144811391830444, + "loss": 1.0003, + "nll_loss": 0.9410818219184875, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09505301713943481, + "rewards/margins": 0.0363951250910759, + "rewards/rejected": -0.13144811987876892, + "step": 3300 + }, + { + "epoch": 0.6, + "grad_norm": 0.7096745371818542, + "learning_rate": 6.130732275440384e-06, + "log_odds_chosen": 0.6025352478027344, + "log_odds_ratio": -0.598220944404602, + "logits/chosen": -0.3227522373199463, + "logits/rejected": -0.4034956097602844, + "logps/chosen": -0.9186944961547852, + "logps/rejected": -1.2923507690429688, + "loss": 0.9583, + "nll_loss": 0.8985183835029602, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09186945855617523, + "rewards/margins": 0.03736562281847, + "rewards/rejected": -0.12923508882522583, + "step": 3310 + }, + { + "epoch": 0.6, + "grad_norm": 0.6237562894821167, + "learning_rate": 6.1249090115009465e-06, + "log_odds_chosen": 0.8236312866210938, + "log_odds_ratio": -0.5209259986877441, + "logits/chosen": -0.3754587769508362, + "logits/rejected": -0.423706591129303, + "logps/chosen": -0.8962365984916687, + "logps/rejected": -1.4527724981307983, + "loss": 0.9374, + "nll_loss": 0.8853539228439331, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08962366729974747, + "rewards/margins": 0.055653590708971024, + "rewards/rejected": -0.1452772468328476, + "step": 3320 + }, + { + "epoch": 0.6, + "grad_norm": 1.383882999420166, + "learning_rate": 6.119085747561508e-06, + "log_odds_chosen": 0.49736565351486206, + "log_odds_ratio": -0.6017710566520691, + "logits/chosen": -0.47567468881607056, + "logits/rejected": -0.4611749053001404, + "logps/chosen": -1.004540205001831, + "logps/rejected": -1.3043259382247925, + "loss": 1.0461, + "nll_loss": 0.985937237739563, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10045403242111206, + "rewards/margins": 0.0299785528331995, + "rewards/rejected": -0.13043257594108582, + "step": 3330 + }, + { + "epoch": 0.6, + "grad_norm": 2.3500630855560303, + "learning_rate": 6.11326248362207e-06, + "log_odds_chosen": 0.7103801965713501, + "log_odds_ratio": -0.5463491678237915, + "logits/chosen": -0.3539440631866455, + "logits/rejected": -0.39365509152412415, + "logps/chosen": -0.9359360933303833, + "logps/rejected": -1.3999426364898682, + "loss": 0.9283, + "nll_loss": 0.87371426820755, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09359361231327057, + "rewards/margins": 0.046400636434555054, + "rewards/rejected": -0.13999423384666443, + "step": 3340 + }, + { + "epoch": 0.61, + "grad_norm": 1.4304038286209106, + "learning_rate": 6.107439219682632e-06, + "log_odds_chosen": 0.9102018475532532, + "log_odds_ratio": -0.5017456412315369, + "logits/chosen": -0.37354880571365356, + "logits/rejected": -0.39932340383529663, + "logps/chosen": -0.8635073900222778, + "logps/rejected": -1.4160997867584229, + "loss": 0.9113, + "nll_loss": 0.8611549139022827, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08635074645280838, + "rewards/margins": 0.05525922775268555, + "rewards/rejected": -0.14160996675491333, + "step": 3350 + }, + { + "epoch": 0.61, + "grad_norm": 0.8932999968528748, + "learning_rate": 6.101615955743193e-06, + "log_odds_chosen": 0.4051954746246338, + "log_odds_ratio": -0.6483933329582214, + "logits/chosen": -0.4127295911312103, + "logits/rejected": -0.44972100853919983, + "logps/chosen": -0.9479262232780457, + "logps/rejected": -1.258101224899292, + "loss": 0.9926, + "nll_loss": 0.9278033375740051, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09479261934757233, + "rewards/margins": 0.03101750835776329, + "rewards/rejected": -0.12581013143062592, + "step": 3360 + }, + { + "epoch": 0.61, + "grad_norm": 0.8245841860771179, + "learning_rate": 6.0957926918037555e-06, + "log_odds_chosen": 0.7083054184913635, + "log_odds_ratio": -0.513762891292572, + "logits/chosen": -0.40341416001319885, + "logits/rejected": -0.42578190565109253, + "logps/chosen": -0.9963234066963196, + "logps/rejected": -1.4809606075286865, + "loss": 1.0241, + "nll_loss": 0.9727651476860046, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09963233768939972, + "rewards/margins": 0.04846369847655296, + "rewards/rejected": -0.14809605479240417, + "step": 3370 + }, + { + "epoch": 0.61, + "grad_norm": 0.7346036434173584, + "learning_rate": 6.089969427864318e-06, + "log_odds_chosen": 0.7608426213264465, + "log_odds_ratio": -0.5603666305541992, + "logits/chosen": -0.4570868909358978, + "logits/rejected": -0.4746372699737549, + "logps/chosen": -1.0287775993347168, + "logps/rejected": -1.521809697151184, + "loss": 1.044, + "nll_loss": 0.9879173040390015, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10287775844335556, + "rewards/margins": 0.04930321127176285, + "rewards/rejected": -0.1521809697151184, + "step": 3380 + }, + { + "epoch": 0.61, + "grad_norm": 0.9534040689468384, + "learning_rate": 6.084146163924879e-06, + "log_odds_chosen": 0.8834999203681946, + "log_odds_ratio": -0.5292294025421143, + "logits/chosen": -0.4181883931159973, + "logits/rejected": -0.44676756858825684, + "logps/chosen": -0.941826343536377, + "logps/rejected": -1.5739504098892212, + "loss": 0.9869, + "nll_loss": 0.9340030550956726, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09418264031410217, + "rewards/margins": 0.06321238726377487, + "rewards/rejected": -0.15739500522613525, + "step": 3390 + }, + { + "epoch": 0.61, + "grad_norm": 1.0045500993728638, + "learning_rate": 6.0783228999854416e-06, + "log_odds_chosen": 0.6764132380485535, + "log_odds_ratio": -0.5628331303596497, + "logits/chosen": -0.42462119460105896, + "logits/rejected": -0.4508097767829895, + "logps/chosen": -0.9737855792045593, + "logps/rejected": -1.395506501197815, + "loss": 1.003, + "nll_loss": 0.9467493891716003, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09737856686115265, + "rewards/margins": 0.04217210039496422, + "rewards/rejected": -0.13955065608024597, + "step": 3400 + }, + { + "epoch": 0.62, + "grad_norm": 0.7688149809837341, + "learning_rate": 6.072499636046004e-06, + "log_odds_chosen": 0.7314145565032959, + "log_odds_ratio": -0.5189186930656433, + "logits/chosen": -0.3948749005794525, + "logits/rejected": -0.41057324409484863, + "logps/chosen": -1.0129984617233276, + "logps/rejected": -1.5188207626342773, + "loss": 0.9809, + "nll_loss": 0.9290172457695007, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10129984468221664, + "rewards/margins": 0.05058223009109497, + "rewards/rejected": -0.15188206732273102, + "step": 3410 + }, + { + "epoch": 0.62, + "grad_norm": 0.5660412311553955, + "learning_rate": 6.066676372106565e-06, + "log_odds_chosen": 0.40381866693496704, + "log_odds_ratio": -0.6203995943069458, + "logits/chosen": -0.3358531892299652, + "logits/rejected": -0.3932770788669586, + "logps/chosen": -0.9776952862739563, + "logps/rejected": -1.2311264276504517, + "loss": 1.0244, + "nll_loss": 0.9623534083366394, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09776954352855682, + "rewards/margins": 0.025343095883727074, + "rewards/rejected": -0.12311263382434845, + "step": 3420 + }, + { + "epoch": 0.62, + "grad_norm": 0.8201726675033569, + "learning_rate": 6.060853108167128e-06, + "log_odds_chosen": 0.8580142259597778, + "log_odds_ratio": -0.5500550866127014, + "logits/chosen": -0.38643431663513184, + "logits/rejected": -0.42080745100975037, + "logps/chosen": -0.9666447639465332, + "logps/rejected": -1.4860246181488037, + "loss": 1.0203, + "nll_loss": 0.9652493596076965, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09666448831558228, + "rewards/margins": 0.05193798616528511, + "rewards/rejected": -0.1486024558544159, + "step": 3430 + }, + { + "epoch": 0.62, + "grad_norm": 1.111434817314148, + "learning_rate": 6.055029844227689e-06, + "log_odds_chosen": 0.8360646963119507, + "log_odds_ratio": -0.5022442936897278, + "logits/chosen": -0.3898642361164093, + "logits/rejected": -0.3858799636363983, + "logps/chosen": -0.9356891512870789, + "logps/rejected": -1.484752893447876, + "loss": 0.969, + "nll_loss": 0.9187499284744263, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09356891363859177, + "rewards/margins": 0.054906368255615234, + "rewards/rejected": -0.1484752744436264, + "step": 3440 + }, + { + "epoch": 0.62, + "grad_norm": 0.8832417726516724, + "learning_rate": 6.0492065802882514e-06, + "log_odds_chosen": 0.5634386539459229, + "log_odds_ratio": -0.6130571365356445, + "logits/chosen": -0.4038141369819641, + "logits/rejected": -0.44958561658859253, + "logps/chosen": -1.0071508884429932, + "logps/rejected": -1.4017693996429443, + "loss": 1.0727, + "nll_loss": 1.0114319324493408, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10071510076522827, + "rewards/margins": 0.03946184366941452, + "rewards/rejected": -0.140176922082901, + "step": 3450 + }, + { + "epoch": 0.63, + "grad_norm": 1.0038444995880127, + "learning_rate": 6.043383316348813e-06, + "log_odds_chosen": 0.7370051741600037, + "log_odds_ratio": -0.5406359434127808, + "logits/chosen": -0.43003183603286743, + "logits/rejected": -0.4604741036891937, + "logps/chosen": -1.009166955947876, + "logps/rejected": -1.5070292949676514, + "loss": 0.9398, + "nll_loss": 0.885736346244812, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10091669857501984, + "rewards/margins": 0.04978623613715172, + "rewards/rejected": -0.15070292353630066, + "step": 3460 + }, + { + "epoch": 0.63, + "grad_norm": 0.7314472794532776, + "learning_rate": 6.037560052409375e-06, + "log_odds_chosen": 0.9042521715164185, + "log_odds_ratio": -0.5049646496772766, + "logits/chosen": -0.3623233139514923, + "logits/rejected": -0.4032473564147949, + "logps/chosen": -0.8811138272285461, + "logps/rejected": -1.4935479164123535, + "loss": 0.9903, + "nll_loss": 0.9398002624511719, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08811138570308685, + "rewards/margins": 0.061243414878845215, + "rewards/rejected": -0.14935480058193207, + "step": 3470 + }, + { + "epoch": 0.63, + "grad_norm": 0.7558674216270447, + "learning_rate": 6.0317367884699375e-06, + "log_odds_chosen": 0.6601871252059937, + "log_odds_ratio": -0.5303488373756409, + "logits/chosen": -0.41661277413368225, + "logits/rejected": -0.4547964036464691, + "logps/chosen": -1.0405640602111816, + "logps/rejected": -1.4991106986999512, + "loss": 1.0412, + "nll_loss": 0.988142192363739, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10405639559030533, + "rewards/margins": 0.04585467278957367, + "rewards/rejected": -0.1499110758304596, + "step": 3480 + }, + { + "epoch": 0.63, + "grad_norm": 1.1181609630584717, + "learning_rate": 6.025913524530499e-06, + "log_odds_chosen": 0.6442530751228333, + "log_odds_ratio": -0.5743609666824341, + "logits/chosen": -0.4180160462856293, + "logits/rejected": -0.4605022370815277, + "logps/chosen": -1.0506532192230225, + "logps/rejected": -1.4884201288223267, + "loss": 1.0156, + "nll_loss": 0.9582085609436035, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10506530851125717, + "rewards/margins": 0.04377670958638191, + "rewards/rejected": -0.14884202182292938, + "step": 3490 + }, + { + "epoch": 0.63, + "grad_norm": 0.6729230880737305, + "learning_rate": 6.020090260591061e-06, + "log_odds_chosen": 0.6248651742935181, + "log_odds_ratio": -0.5695139169692993, + "logits/chosen": -0.4435412883758545, + "logits/rejected": -0.49575695395469666, + "logps/chosen": -1.0582239627838135, + "logps/rejected": -1.4656044244766235, + "loss": 1.0504, + "nll_loss": 0.9934715032577515, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10582239925861359, + "rewards/margins": 0.04073803871870041, + "rewards/rejected": -0.1465604454278946, + "step": 3500 + }, + { + "epoch": 0.63, + "grad_norm": 1.7165751457214355, + "learning_rate": 6.014266996651624e-06, + "log_odds_chosen": 0.6814987063407898, + "log_odds_ratio": -0.5507006049156189, + "logits/chosen": -0.3809913396835327, + "logits/rejected": -0.44108277559280396, + "logps/chosen": -0.9436809420585632, + "logps/rejected": -1.4071236848831177, + "loss": 0.9835, + "nll_loss": 0.9284241795539856, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0943681001663208, + "rewards/margins": 0.046344272792339325, + "rewards/rejected": -0.14071236550807953, + "step": 3510 + }, + { + "epoch": 0.64, + "grad_norm": 0.9776991605758667, + "learning_rate": 6.008443732712184e-06, + "log_odds_chosen": 0.8048272132873535, + "log_odds_ratio": -0.5047035217285156, + "logits/chosen": -0.3753579556941986, + "logits/rejected": -0.4107332229614258, + "logps/chosen": -0.8500427007675171, + "logps/rejected": -1.3697882890701294, + "loss": 0.9852, + "nll_loss": 0.9346874952316284, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0850042775273323, + "rewards/margins": 0.05197455734014511, + "rewards/rejected": -0.13697883486747742, + "step": 3520 + }, + { + "epoch": 0.64, + "grad_norm": 0.9801293611526489, + "learning_rate": 6.0026204687727465e-06, + "log_odds_chosen": 0.6912647485733032, + "log_odds_ratio": -0.5525920987129211, + "logits/chosen": -0.3607081174850464, + "logits/rejected": -0.35922548174858093, + "logps/chosen": -0.9259079098701477, + "logps/rejected": -1.367457389831543, + "loss": 0.955, + "nll_loss": 0.8997598886489868, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09259079396724701, + "rewards/margins": 0.04415493831038475, + "rewards/rejected": -0.13674573600292206, + "step": 3530 + }, + { + "epoch": 0.64, + "grad_norm": 2.1034672260284424, + "learning_rate": 5.996797204833309e-06, + "log_odds_chosen": 0.5489142537117004, + "log_odds_ratio": -0.6321261525154114, + "logits/chosen": -0.3296765387058258, + "logits/rejected": -0.3923155665397644, + "logps/chosen": -1.0097711086273193, + "logps/rejected": -1.359426498413086, + "loss": 1.0141, + "nll_loss": 0.9508882761001587, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10097712278366089, + "rewards/margins": 0.03496553748846054, + "rewards/rejected": -0.13594265282154083, + "step": 3540 + }, + { + "epoch": 0.64, + "grad_norm": 0.7424829006195068, + "learning_rate": 5.99097394089387e-06, + "log_odds_chosen": 0.7225674390792847, + "log_odds_ratio": -0.5436952114105225, + "logits/chosen": -0.4128730893135071, + "logits/rejected": -0.46673256158828735, + "logps/chosen": -0.9159235954284668, + "logps/rejected": -1.370883822441101, + "loss": 1.0188, + "nll_loss": 0.9644242525100708, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09159235656261444, + "rewards/margins": 0.04549603909254074, + "rewards/rejected": -0.13708840310573578, + "step": 3550 + }, + { + "epoch": 0.64, + "grad_norm": 1.374440312385559, + "learning_rate": 5.985150676954433e-06, + "log_odds_chosen": 0.8487750887870789, + "log_odds_ratio": -0.5091025829315186, + "logits/chosen": -0.3625090718269348, + "logits/rejected": -0.3949008882045746, + "logps/chosen": -0.8889452219009399, + "logps/rejected": -1.4368683099746704, + "loss": 0.9307, + "nll_loss": 0.8798302412033081, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08889452368021011, + "rewards/margins": 0.05479230731725693, + "rewards/rejected": -0.14368684589862823, + "step": 3560 + }, + { + "epoch": 0.64, + "grad_norm": 1.3159680366516113, + "learning_rate": 5.979327413014995e-06, + "log_odds_chosen": 0.43507567048072815, + "log_odds_ratio": -0.6821829676628113, + "logits/chosen": -0.40561336278915405, + "logits/rejected": -0.42101773619651794, + "logps/chosen": -1.0213924646377563, + "logps/rejected": -1.3662772178649902, + "loss": 1.0356, + "nll_loss": 0.9673385620117188, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.10213924944400787, + "rewards/margins": 0.034488484263420105, + "rewards/rejected": -0.13662774860858917, + "step": 3570 + }, + { + "epoch": 0.65, + "grad_norm": 0.8232075572013855, + "learning_rate": 5.973504149075556e-06, + "log_odds_chosen": 0.45091715455055237, + "log_odds_ratio": -0.6581467390060425, + "logits/chosen": -0.40304097533226013, + "logits/rejected": -0.41084232926368713, + "logps/chosen": -0.9728569984436035, + "logps/rejected": -1.2779741287231445, + "loss": 1.0213, + "nll_loss": 0.9555120468139648, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09728571027517319, + "rewards/margins": 0.030511703342199326, + "rewards/rejected": -0.12779740989208221, + "step": 3580 + }, + { + "epoch": 0.65, + "grad_norm": 1.1752350330352783, + "learning_rate": 5.967680885136119e-06, + "log_odds_chosen": 0.5622051358222961, + "log_odds_ratio": -0.621745228767395, + "logits/chosen": -0.3896362781524658, + "logits/rejected": -0.4081563949584961, + "logps/chosen": -0.8804425001144409, + "logps/rejected": -1.2338899374008179, + "loss": 0.9236, + "nll_loss": 0.8613778948783875, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08804424852132797, + "rewards/margins": 0.03534474968910217, + "rewards/rejected": -0.12338900566101074, + "step": 3590 + }, + { + "epoch": 0.65, + "grad_norm": 0.8000437021255493, + "learning_rate": 5.961857621196681e-06, + "log_odds_chosen": 0.643998920917511, + "log_odds_ratio": -0.5432684421539307, + "logits/chosen": -0.36141398549079895, + "logits/rejected": -0.39153724908828735, + "logps/chosen": -0.8999090194702148, + "logps/rejected": -1.299071192741394, + "loss": 0.9452, + "nll_loss": 0.8909200429916382, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08999090641736984, + "rewards/margins": 0.03991622105240822, + "rewards/rejected": -0.12990711629390717, + "step": 3600 + }, + { + "epoch": 0.65, + "grad_norm": 1.351096510887146, + "learning_rate": 5.956034357257242e-06, + "log_odds_chosen": 0.6249284744262695, + "log_odds_ratio": -0.5861242413520813, + "logits/chosen": -0.3442951738834381, + "logits/rejected": -0.37348872423171997, + "logps/chosen": -1.0251295566558838, + "logps/rejected": -1.4468566179275513, + "loss": 1.0134, + "nll_loss": 0.9547685384750366, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10251295566558838, + "rewards/margins": 0.04217272251844406, + "rewards/rejected": -0.14468567073345184, + "step": 3610 + }, + { + "epoch": 0.65, + "grad_norm": 0.854928731918335, + "learning_rate": 5.950211093317804e-06, + "log_odds_chosen": 0.36324846744537354, + "log_odds_ratio": -0.6676633358001709, + "logits/chosen": -0.3442520201206207, + "logits/rejected": -0.3605124354362488, + "logps/chosen": -0.9377188682556152, + "logps/rejected": -1.135480523109436, + "loss": 0.9911, + "nll_loss": 0.9242986440658569, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09377188980579376, + "rewards/margins": 0.01977616176009178, + "rewards/rejected": -0.11354805529117584, + "step": 3620 + }, + { + "epoch": 0.66, + "grad_norm": 0.8976321816444397, + "learning_rate": 5.944387829378366e-06, + "log_odds_chosen": 0.7812395095825195, + "log_odds_ratio": -0.5367813110351562, + "logits/chosen": -0.341721773147583, + "logits/rejected": -0.37966251373291016, + "logps/chosen": -0.9868513345718384, + "logps/rejected": -1.5086472034454346, + "loss": 0.962, + "nll_loss": 0.908337414264679, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0986851304769516, + "rewards/margins": 0.05217960476875305, + "rewards/rejected": -0.15086473524570465, + "step": 3630 + }, + { + "epoch": 0.66, + "grad_norm": 1.1400549411773682, + "learning_rate": 5.938564565438928e-06, + "log_odds_chosen": 0.7185646295547485, + "log_odds_ratio": -0.5341478586196899, + "logits/chosen": -0.31118088960647583, + "logits/rejected": -0.35552436113357544, + "logps/chosen": -0.9433846473693848, + "logps/rejected": -1.4317991733551025, + "loss": 1.0236, + "nll_loss": 0.9701422452926636, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09433845430612564, + "rewards/margins": 0.048841461539268494, + "rewards/rejected": -0.14317990839481354, + "step": 3640 + }, + { + "epoch": 0.66, + "grad_norm": 1.4526828527450562, + "learning_rate": 5.93274130149949e-06, + "log_odds_chosen": 0.673244297504425, + "log_odds_ratio": -0.5420977473258972, + "logits/chosen": -0.3347756266593933, + "logits/rejected": -0.3824438452720642, + "logps/chosen": -0.8603219985961914, + "logps/rejected": -1.2901854515075684, + "loss": 0.9376, + "nll_loss": 0.8833721280097961, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0860322043299675, + "rewards/margins": 0.04298635199666023, + "rewards/rejected": -0.12901854515075684, + "step": 3650 + }, + { + "epoch": 0.66, + "grad_norm": 1.4973233938217163, + "learning_rate": 5.926918037560052e-06, + "log_odds_chosen": 0.8371866345405579, + "log_odds_ratio": -0.5633835792541504, + "logits/chosen": -0.3284203112125397, + "logits/rejected": -0.38096609711647034, + "logps/chosen": -0.8801159858703613, + "logps/rejected": -1.420422077178955, + "loss": 0.9905, + "nll_loss": 0.9342048764228821, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08801160752773285, + "rewards/margins": 0.05403059720993042, + "rewards/rejected": -0.14204218983650208, + "step": 3660 + }, + { + "epoch": 0.66, + "grad_norm": 1.4614819288253784, + "learning_rate": 5.921094773620615e-06, + "log_odds_chosen": 0.9499984979629517, + "log_odds_ratio": -0.475381076335907, + "logits/chosen": -0.2903318405151367, + "logits/rejected": -0.35688871145248413, + "logps/chosen": -0.8208259344100952, + "logps/rejected": -1.4491088390350342, + "loss": 0.8832, + "nll_loss": 0.8356998562812805, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0820825919508934, + "rewards/margins": 0.06282828003168106, + "rewards/rejected": -0.14491088688373566, + "step": 3670 + }, + { + "epoch": 0.66, + "grad_norm": 1.2535169124603271, + "learning_rate": 5.915271509681176e-06, + "log_odds_chosen": 0.7504245638847351, + "log_odds_ratio": -0.5328863859176636, + "logits/chosen": -0.3805959224700928, + "logits/rejected": -0.3868086338043213, + "logps/chosen": -0.9830179214477539, + "logps/rejected": -1.489959478378296, + "loss": 0.9957, + "nll_loss": 0.9423898458480835, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09830178320407867, + "rewards/margins": 0.05069415643811226, + "rewards/rejected": -0.14899595081806183, + "step": 3680 + }, + { + "epoch": 0.67, + "grad_norm": 1.5583924055099487, + "learning_rate": 5.909448245741738e-06, + "log_odds_chosen": 0.6056820750236511, + "log_odds_ratio": -0.5717626810073853, + "logits/chosen": -0.38489967584609985, + "logits/rejected": -0.4191763401031494, + "logps/chosen": -1.0282983779907227, + "logps/rejected": -1.4288691282272339, + "loss": 0.9785, + "nll_loss": 0.9213641285896301, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10282983630895615, + "rewards/margins": 0.04005708545446396, + "rewards/rejected": -0.1428869366645813, + "step": 3690 + }, + { + "epoch": 0.67, + "grad_norm": 2.3320605754852295, + "learning_rate": 5.903624981802301e-06, + "log_odds_chosen": 1.079827904701233, + "log_odds_ratio": -0.4331550598144531, + "logits/chosen": -0.3357813060283661, + "logits/rejected": -0.3873363733291626, + "logps/chosen": -0.8198086023330688, + "logps/rejected": -1.5241756439208984, + "loss": 0.8674, + "nll_loss": 0.8240398168563843, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.081980861723423, + "rewards/margins": 0.07043670862913132, + "rewards/rejected": -0.15241757035255432, + "step": 3700 + }, + { + "epoch": 0.67, + "grad_norm": 1.4478205442428589, + "learning_rate": 5.897801717862861e-06, + "log_odds_chosen": 0.7121813893318176, + "log_odds_ratio": -0.5246703028678894, + "logits/chosen": -0.372341126203537, + "logits/rejected": -0.40585875511169434, + "logps/chosen": -0.9683243036270142, + "logps/rejected": -1.4463528394699097, + "loss": 0.9554, + "nll_loss": 0.9029725193977356, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09683243185281754, + "rewards/margins": 0.04780285805463791, + "rewards/rejected": -0.14463528990745544, + "step": 3710 + }, + { + "epoch": 0.67, + "grad_norm": 0.7791962623596191, + "learning_rate": 5.891978453923424e-06, + "log_odds_chosen": 0.6235748529434204, + "log_odds_ratio": -0.5931678414344788, + "logits/chosen": -0.37661951780319214, + "logits/rejected": -0.38163405656814575, + "logps/chosen": -0.9610217809677124, + "logps/rejected": -1.3512619733810425, + "loss": 0.948, + "nll_loss": 0.8886823654174805, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09610219299793243, + "rewards/margins": 0.03902401775121689, + "rewards/rejected": -0.13512620329856873, + "step": 3720 + }, + { + "epoch": 0.67, + "grad_norm": 1.0583852529525757, + "learning_rate": 5.886155189983986e-06, + "log_odds_chosen": 0.7444362640380859, + "log_odds_ratio": -0.5444598197937012, + "logits/chosen": -0.36561426520347595, + "logits/rejected": -0.42521706223487854, + "logps/chosen": -0.9446412324905396, + "logps/rejected": -1.4132436513900757, + "loss": 0.9795, + "nll_loss": 0.9250993728637695, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09446412324905396, + "rewards/margins": 0.04686024412512779, + "rewards/rejected": -0.14132437109947205, + "step": 3730 + }, + { + "epoch": 0.68, + "grad_norm": 1.1684253215789795, + "learning_rate": 5.880331926044547e-06, + "log_odds_chosen": 0.7757261395454407, + "log_odds_ratio": -0.5709885358810425, + "logits/chosen": -0.29772838950157166, + "logits/rejected": -0.292438268661499, + "logps/chosen": -0.9121503829956055, + "logps/rejected": -1.3928760290145874, + "loss": 0.9107, + "nll_loss": 0.8535849452018738, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09121503680944443, + "rewards/margins": 0.048072561621665955, + "rewards/rejected": -0.13928762078285217, + "step": 3740 + }, + { + "epoch": 0.68, + "grad_norm": 1.7918241024017334, + "learning_rate": 5.87450866210511e-06, + "log_odds_chosen": 0.6071350574493408, + "log_odds_ratio": -0.5676754117012024, + "logits/chosen": -0.37720435857772827, + "logits/rejected": -0.4086214601993561, + "logps/chosen": -0.9288979768753052, + "logps/rejected": -1.316955327987671, + "loss": 1.0195, + "nll_loss": 0.9627493023872375, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09288980066776276, + "rewards/margins": 0.03880572319030762, + "rewards/rejected": -0.13169552385807037, + "step": 3750 + }, + { + "epoch": 0.68, + "grad_norm": 0.7484256029129028, + "learning_rate": 5.868685398165672e-06, + "log_odds_chosen": 0.5376003384590149, + "log_odds_ratio": -0.6325076818466187, + "logits/chosen": -0.399244487285614, + "logits/rejected": -0.4125980734825134, + "logps/chosen": -0.9814577102661133, + "logps/rejected": -1.357806921005249, + "loss": 1.0426, + "nll_loss": 0.9793224334716797, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.09814576804637909, + "rewards/margins": 0.03763493150472641, + "rewards/rejected": -0.1357807070016861, + "step": 3760 + }, + { + "epoch": 0.68, + "grad_norm": 1.0059581995010376, + "learning_rate": 5.8628621342262335e-06, + "log_odds_chosen": 0.6811798810958862, + "log_odds_ratio": -0.5329106450080872, + "logits/chosen": -0.39979439973831177, + "logits/rejected": -0.429983913898468, + "logps/chosen": -0.9924589991569519, + "logps/rejected": -1.4579194784164429, + "loss": 0.953, + "nll_loss": 0.8996642231941223, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09924589097499847, + "rewards/margins": 0.046546053141355515, + "rewards/rejected": -0.1457919478416443, + "step": 3770 + }, + { + "epoch": 0.68, + "grad_norm": 0.6539117097854614, + "learning_rate": 5.857038870286796e-06, + "log_odds_chosen": 0.8818238973617554, + "log_odds_ratio": -0.5463408827781677, + "logits/chosen": -0.33649712800979614, + "logits/rejected": -0.3999442160129547, + "logps/chosen": -0.9039346575737, + "logps/rejected": -1.4749833345413208, + "loss": 1.0149, + "nll_loss": 0.9602839350700378, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09039346128702164, + "rewards/margins": 0.05710485577583313, + "rewards/rejected": -0.14749832451343536, + "step": 3780 + }, + { + "epoch": 0.68, + "grad_norm": 1.743882179260254, + "learning_rate": 5.851215606347357e-06, + "log_odds_chosen": 0.7480627298355103, + "log_odds_ratio": -0.5538538098335266, + "logits/chosen": -0.4183572232723236, + "logits/rejected": -0.4714382290840149, + "logps/chosen": -0.9981712102890015, + "logps/rejected": -1.461587905883789, + "loss": 0.9667, + "nll_loss": 0.9112657308578491, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09981712698936462, + "rewards/margins": 0.046341657638549805, + "rewards/rejected": -0.14615878462791443, + "step": 3790 + }, + { + "epoch": 0.69, + "grad_norm": 1.5963619947433472, + "learning_rate": 5.845392342407919e-06, + "log_odds_chosen": 0.4980127215385437, + "log_odds_ratio": -0.6052228808403015, + "logits/chosen": -0.4400199353694916, + "logits/rejected": -0.4431309103965759, + "logps/chosen": -0.9494178891181946, + "logps/rejected": -1.2823858261108398, + "loss": 0.9827, + "nll_loss": 0.9221373796463013, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09494178742170334, + "rewards/margins": 0.03329680114984512, + "rewards/rejected": -0.12823858857154846, + "step": 3800 + }, + { + "epoch": 0.69, + "grad_norm": 1.0663025379180908, + "learning_rate": 5.839569078468481e-06, + "log_odds_chosen": 0.6610610485076904, + "log_odds_ratio": -0.5737951993942261, + "logits/chosen": -0.40231895446777344, + "logits/rejected": -0.44721460342407227, + "logps/chosen": -0.9405794143676758, + "logps/rejected": -1.3562818765640259, + "loss": 0.9729, + "nll_loss": 0.9154725074768066, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09405793994665146, + "rewards/margins": 0.04157023876905441, + "rewards/rejected": -0.13562817871570587, + "step": 3810 + }, + { + "epoch": 0.69, + "grad_norm": 0.6941992044448853, + "learning_rate": 5.833745814529043e-06, + "log_odds_chosen": 0.3320769965648651, + "log_odds_ratio": -0.6687268018722534, + "logits/chosen": -0.4455398619174957, + "logits/rejected": -0.46363648772239685, + "logps/chosen": -1.063694715499878, + "logps/rejected": -1.2793588638305664, + "loss": 1.1192, + "nll_loss": 1.0523333549499512, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10636948049068451, + "rewards/margins": 0.02156640589237213, + "rewards/rejected": -0.12793588638305664, + "step": 3820 + }, + { + "epoch": 0.69, + "grad_norm": 0.7818502187728882, + "learning_rate": 5.827922550589605e-06, + "log_odds_chosen": 0.7564540505409241, + "log_odds_ratio": -0.5458071231842041, + "logits/chosen": -0.39144763350486755, + "logits/rejected": -0.43861907720565796, + "logps/chosen": -0.9263273477554321, + "logps/rejected": -1.4547199010849, + "loss": 0.9623, + "nll_loss": 0.9077495336532593, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0926327258348465, + "rewards/margins": 0.05283927172422409, + "rewards/rejected": -0.14547200500965118, + "step": 3830 + }, + { + "epoch": 0.69, + "grad_norm": 1.542531132698059, + "learning_rate": 5.822099286650167e-06, + "log_odds_chosen": 0.609904408454895, + "log_odds_ratio": -0.570465087890625, + "logits/chosen": -0.38601264357566833, + "logits/rejected": -0.45281514525413513, + "logps/chosen": -0.917966365814209, + "logps/rejected": -1.28914213180542, + "loss": 0.9971, + "nll_loss": 0.9401028752326965, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0917966365814209, + "rewards/margins": 0.03711757808923721, + "rewards/rejected": -0.12891420722007751, + "step": 3840 + }, + { + "epoch": 0.7, + "grad_norm": 0.7471659779548645, + "learning_rate": 5.816276022710729e-06, + "log_odds_chosen": 0.36103135347366333, + "log_odds_ratio": -0.6382339596748352, + "logits/chosen": -0.4501968026161194, + "logits/rejected": -0.46252304315567017, + "logps/chosen": -1.0752949714660645, + "logps/rejected": -1.3563129901885986, + "loss": 0.9825, + "nll_loss": 0.9186823964118958, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10752949863672256, + "rewards/margins": 0.028101811185479164, + "rewards/rejected": -0.13563130795955658, + "step": 3850 + }, + { + "epoch": 0.7, + "grad_norm": 0.9536011815071106, + "learning_rate": 5.810452758771292e-06, + "log_odds_chosen": 0.7206624746322632, + "log_odds_ratio": -0.5553077459335327, + "logits/chosen": -0.4182675778865814, + "logits/rejected": -0.45456624031066895, + "logps/chosen": -0.9347223043441772, + "logps/rejected": -1.4037220478057861, + "loss": 1.0241, + "nll_loss": 0.9685796499252319, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09347222745418549, + "rewards/margins": 0.04689997434616089, + "rewards/rejected": -0.14037221670150757, + "step": 3860 + }, + { + "epoch": 0.7, + "grad_norm": 1.2044615745544434, + "learning_rate": 5.804629494831853e-06, + "log_odds_chosen": 0.7299908399581909, + "log_odds_ratio": -0.5608175992965698, + "logits/chosen": -0.3640984892845154, + "logits/rejected": -0.4097954332828522, + "logps/chosen": -0.9941253662109375, + "logps/rejected": -1.464939832687378, + "loss": 0.9944, + "nll_loss": 0.9383255243301392, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09941253811120987, + "rewards/margins": 0.04708145186305046, + "rewards/rejected": -0.14649398624897003, + "step": 3870 + }, + { + "epoch": 0.7, + "grad_norm": 1.311955451965332, + "learning_rate": 5.798806230892415e-06, + "log_odds_chosen": 0.7198322415351868, + "log_odds_ratio": -0.5975719690322876, + "logits/chosen": -0.38058674335479736, + "logits/rejected": -0.42003631591796875, + "logps/chosen": -0.8995689153671265, + "logps/rejected": -1.3722004890441895, + "loss": 0.9498, + "nll_loss": 0.8900270462036133, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08995689451694489, + "rewards/margins": 0.047263167798519135, + "rewards/rejected": -0.13722005486488342, + "step": 3880 + }, + { + "epoch": 0.7, + "grad_norm": 1.3792704343795776, + "learning_rate": 5.792982966952977e-06, + "log_odds_chosen": 0.5147228837013245, + "log_odds_ratio": -0.6110295057296753, + "logits/chosen": -0.45408734679222107, + "logits/rejected": -0.4423491358757019, + "logps/chosen": -1.0131367444992065, + "logps/rejected": -1.3439712524414062, + "loss": 0.9864, + "nll_loss": 0.92534339427948, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10131368786096573, + "rewards/margins": 0.03308345377445221, + "rewards/rejected": -0.13439713418483734, + "step": 3890 + }, + { + "epoch": 0.7, + "grad_norm": 1.2134101390838623, + "learning_rate": 5.7871597030135384e-06, + "log_odds_chosen": 0.8219815492630005, + "log_odds_ratio": -0.5065579414367676, + "logits/chosen": -0.4033416211605072, + "logits/rejected": -0.4501563608646393, + "logps/chosen": -0.9055941700935364, + "logps/rejected": -1.4462974071502686, + "loss": 0.9623, + "nll_loss": 0.9116464853286743, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09055942296981812, + "rewards/margins": 0.054070331156253815, + "rewards/rejected": -0.14462974667549133, + "step": 3900 + }, + { + "epoch": 0.71, + "grad_norm": 1.2515047788619995, + "learning_rate": 5.781336439074101e-06, + "log_odds_chosen": 0.46673351526260376, + "log_odds_ratio": -0.627618134021759, + "logits/chosen": -0.3921314775943756, + "logits/rejected": -0.4186267852783203, + "logps/chosen": -1.0056527853012085, + "logps/rejected": -1.3170154094696045, + "loss": 1.0003, + "nll_loss": 0.9375259280204773, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.10056529194116592, + "rewards/margins": 0.031136253848671913, + "rewards/rejected": -0.1317015439271927, + "step": 3910 + }, + { + "epoch": 0.71, + "grad_norm": 2.3137080669403076, + "learning_rate": 5.775513175134663e-06, + "log_odds_chosen": 0.7130810618400574, + "log_odds_ratio": -0.5645673871040344, + "logits/chosen": -0.42148932814598083, + "logits/rejected": -0.44052475690841675, + "logps/chosen": -1.0017608404159546, + "logps/rejected": -1.5046265125274658, + "loss": 0.9761, + "nll_loss": 0.9195976257324219, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10017608106136322, + "rewards/margins": 0.05028656870126724, + "rewards/rejected": -0.15046265721321106, + "step": 3920 + }, + { + "epoch": 0.71, + "grad_norm": 1.1239081621170044, + "learning_rate": 5.7696899111952245e-06, + "log_odds_chosen": 0.6651091575622559, + "log_odds_ratio": -0.5374134182929993, + "logits/chosen": -0.3734278082847595, + "logits/rejected": -0.4400072693824768, + "logps/chosen": -0.8720897436141968, + "logps/rejected": -1.2871179580688477, + "loss": 0.9574, + "nll_loss": 0.9036323428153992, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08720897883176804, + "rewards/margins": 0.04150282219052315, + "rewards/rejected": -0.12871180474758148, + "step": 3930 + }, + { + "epoch": 0.71, + "grad_norm": 0.7715298533439636, + "learning_rate": 5.763866647255787e-06, + "log_odds_chosen": 0.5933562517166138, + "log_odds_ratio": -0.5951903462409973, + "logits/chosen": -0.38817816972732544, + "logits/rejected": -0.4285859167575836, + "logps/chosen": -0.9582163691520691, + "logps/rejected": -1.3706133365631104, + "loss": 0.9348, + "nll_loss": 0.8752536773681641, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09582163393497467, + "rewards/margins": 0.041239701211452484, + "rewards/rejected": -0.13706132769584656, + "step": 3940 + }, + { + "epoch": 0.71, + "grad_norm": 0.7734090685844421, + "learning_rate": 5.758043383316349e-06, + "log_odds_chosen": 0.8612390756607056, + "log_odds_ratio": -0.49259430170059204, + "logits/chosen": -0.41785088181495667, + "logits/rejected": -0.4185684323310852, + "logps/chosen": -0.8485938906669617, + "logps/rejected": -1.3895423412322998, + "loss": 0.9617, + "nll_loss": 0.9124394655227661, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08485939353704453, + "rewards/margins": 0.054094843566417694, + "rewards/rejected": -0.13895423710346222, + "step": 3950 + }, + { + "epoch": 0.72, + "grad_norm": 1.0429623126983643, + "learning_rate": 5.752220119376911e-06, + "log_odds_chosen": 0.9815561175346375, + "log_odds_ratio": -0.5164690017700195, + "logits/chosen": -0.3655704855918884, + "logits/rejected": -0.41669750213623047, + "logps/chosen": -0.8296697735786438, + "logps/rejected": -1.476696491241455, + "loss": 0.9442, + "nll_loss": 0.8925908803939819, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08296697586774826, + "rewards/margins": 0.06470267474651337, + "rewards/rejected": -0.14766964316368103, + "step": 3960 + }, + { + "epoch": 0.72, + "grad_norm": 1.7995119094848633, + "learning_rate": 5.746396855437472e-06, + "log_odds_chosen": 0.717779278755188, + "log_odds_ratio": -0.6176232695579529, + "logits/chosen": -0.4269459843635559, + "logits/rejected": -0.4877711832523346, + "logps/chosen": -1.0023086071014404, + "logps/rejected": -1.502467155456543, + "loss": 1.0947, + "nll_loss": 1.0329031944274902, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.1002308577299118, + "rewards/margins": 0.05001585930585861, + "rewards/rejected": -0.15024670958518982, + "step": 3970 + }, + { + "epoch": 0.72, + "grad_norm": 0.7775267362594604, + "learning_rate": 5.740573591498034e-06, + "log_odds_chosen": 0.66571444272995, + "log_odds_ratio": -0.605377733707428, + "logits/chosen": -0.33649665117263794, + "logits/rejected": -0.4037664532661438, + "logps/chosen": -0.9523887634277344, + "logps/rejected": -1.3614118099212646, + "loss": 0.9883, + "nll_loss": 0.9277440905570984, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09523888677358627, + "rewards/margins": 0.04090230539441109, + "rewards/rejected": -0.13614118099212646, + "step": 3980 + }, + { + "epoch": 0.72, + "grad_norm": 1.1453152894973755, + "learning_rate": 5.734750327558596e-06, + "log_odds_chosen": 0.6597369909286499, + "log_odds_ratio": -0.5555736422538757, + "logits/chosen": -0.38733386993408203, + "logits/rejected": -0.4213401675224304, + "logps/chosen": -0.920432448387146, + "logps/rejected": -1.325626015663147, + "loss": 0.9479, + "nll_loss": 0.8923455476760864, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09204325079917908, + "rewards/margins": 0.0405193530023098, + "rewards/rejected": -0.13256260752677917, + "step": 3990 + }, + { + "epoch": 0.72, + "grad_norm": 1.2393028736114502, + "learning_rate": 5.728927063619158e-06, + "log_odds_chosen": 0.7080034613609314, + "log_odds_ratio": -0.5932170748710632, + "logits/chosen": -0.32581627368927, + "logits/rejected": -0.392342209815979, + "logps/chosen": -0.9800984263420105, + "logps/rejected": -1.3934428691864014, + "loss": 0.9699, + "nll_loss": 0.9105931520462036, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09800984710454941, + "rewards/margins": 0.04133444279432297, + "rewards/rejected": -0.13934428989887238, + "step": 4000 + }, + { + "epoch": 0.72, + "grad_norm": 1.5815924406051636, + "learning_rate": 5.7231037996797204e-06, + "log_odds_chosen": 0.9281368255615234, + "log_odds_ratio": -0.4948226809501648, + "logits/chosen": -0.3796248733997345, + "logits/rejected": -0.42877936363220215, + "logps/chosen": -0.8506223559379578, + "logps/rejected": -1.4400184154510498, + "loss": 0.9755, + "nll_loss": 0.926009476184845, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08506224304437637, + "rewards/margins": 0.05893959850072861, + "rewards/rejected": -0.14400185644626617, + "step": 4010 + }, + { + "epoch": 0.73, + "grad_norm": 1.324011206626892, + "learning_rate": 5.717280535740282e-06, + "log_odds_chosen": 0.8670433759689331, + "log_odds_ratio": -0.5069810152053833, + "logits/chosen": -0.41438978910446167, + "logits/rejected": -0.4259106516838074, + "logps/chosen": -0.9058378338813782, + "logps/rejected": -1.4797426462173462, + "loss": 0.9619, + "nll_loss": 0.9111762046813965, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09058378636837006, + "rewards/margins": 0.0573904812335968, + "rewards/rejected": -0.14797425270080566, + "step": 4020 + }, + { + "epoch": 0.73, + "grad_norm": 1.286294937133789, + "learning_rate": 5.711457271800844e-06, + "log_odds_chosen": 0.7373972535133362, + "log_odds_ratio": -0.5619674921035767, + "logits/chosen": -0.3601471185684204, + "logits/rejected": -0.3947383463382721, + "logps/chosen": -0.9275285601615906, + "logps/rejected": -1.4038407802581787, + "loss": 0.9351, + "nll_loss": 0.8788579702377319, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0927528589963913, + "rewards/margins": 0.04763120785355568, + "rewards/rejected": -0.14038407802581787, + "step": 4030 + }, + { + "epoch": 0.73, + "grad_norm": 2.57619571685791, + "learning_rate": 5.7056340078614065e-06, + "log_odds_chosen": 0.5859150886535645, + "log_odds_ratio": -0.5528852343559265, + "logits/chosen": -0.3482866883277893, + "logits/rejected": -0.3774065375328064, + "logps/chosen": -1.0320050716400146, + "logps/rejected": -1.4059724807739258, + "loss": 0.9701, + "nll_loss": 0.9148612022399902, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10320049524307251, + "rewards/margins": 0.03739675134420395, + "rewards/rejected": -0.14059725403785706, + "step": 4040 + }, + { + "epoch": 0.73, + "grad_norm": 1.194467544555664, + "learning_rate": 5.699810743921968e-06, + "log_odds_chosen": 0.7288400530815125, + "log_odds_ratio": -0.5820309519767761, + "logits/chosen": -0.3455452024936676, + "logits/rejected": -0.38425126671791077, + "logps/chosen": -0.9070445895195007, + "logps/rejected": -1.4033013582229614, + "loss": 0.9193, + "nll_loss": 0.8611399531364441, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09070447087287903, + "rewards/margins": 0.04962567239999771, + "rewards/rejected": -0.14033015072345734, + "step": 4050 + }, + { + "epoch": 0.73, + "grad_norm": 1.142704725265503, + "learning_rate": 5.6939874799825295e-06, + "log_odds_chosen": 0.4837094843387604, + "log_odds_ratio": -0.6172317266464233, + "logits/chosen": -0.44991883635520935, + "logits/rejected": -0.4560603201389313, + "logps/chosen": -1.0034350156784058, + "logps/rejected": -1.2976691722869873, + "loss": 1.1288, + "nll_loss": 1.0670511722564697, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1003435030579567, + "rewards/margins": 0.0294234249740839, + "rewards/rejected": -0.12976691126823425, + "step": 4060 + }, + { + "epoch": 0.74, + "grad_norm": 1.461199402809143, + "learning_rate": 5.688164216043092e-06, + "log_odds_chosen": 0.6147937178611755, + "log_odds_ratio": -0.5649920701980591, + "logits/chosen": -0.3980977535247803, + "logits/rejected": -0.4219932556152344, + "logps/chosen": -0.9132669568061829, + "logps/rejected": -1.2988579273223877, + "loss": 0.9656, + "nll_loss": 0.9091387987136841, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09132669121026993, + "rewards/margins": 0.03855909779667854, + "rewards/rejected": -0.12988579273223877, + "step": 4070 + }, + { + "epoch": 0.74, + "grad_norm": 1.2351607084274292, + "learning_rate": 5.682340952103654e-06, + "log_odds_chosen": 0.5310704112052917, + "log_odds_ratio": -0.6117419004440308, + "logits/chosen": -0.3770817518234253, + "logits/rejected": -0.4008653163909912, + "logps/chosen": -0.9646285772323608, + "logps/rejected": -1.3340489864349365, + "loss": 0.9654, + "nll_loss": 0.9041979908943176, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09646286070346832, + "rewards/margins": 0.03694205358624458, + "rewards/rejected": -0.1334048956632614, + "step": 4080 + }, + { + "epoch": 0.74, + "grad_norm": 1.237679362297058, + "learning_rate": 5.6765176881642155e-06, + "log_odds_chosen": 0.8533763885498047, + "log_odds_ratio": -0.486391544342041, + "logits/chosen": -0.40168827772140503, + "logits/rejected": -0.4379729628562927, + "logps/chosen": -0.8621419072151184, + "logps/rejected": -1.3524459600448608, + "loss": 1.0422, + "nll_loss": 0.9935612678527832, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08621419221162796, + "rewards/margins": 0.04903041943907738, + "rewards/rejected": -0.13524460792541504, + "step": 4090 + }, + { + "epoch": 0.74, + "grad_norm": 0.5430986285209656, + "learning_rate": 5.670694424224778e-06, + "log_odds_chosen": 0.46620744466781616, + "log_odds_ratio": -0.6109627485275269, + "logits/chosen": -0.4185555577278137, + "logits/rejected": -0.44289064407348633, + "logps/chosen": -0.9953963160514832, + "logps/rejected": -1.2747821807861328, + "loss": 1.0714, + "nll_loss": 1.0102908611297607, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09953964501619339, + "rewards/margins": 0.02793857827782631, + "rewards/rejected": -0.1274782121181488, + "step": 4100 + }, + { + "epoch": 0.74, + "grad_norm": 0.8633012175559998, + "learning_rate": 5.66487116028534e-06, + "log_odds_chosen": 0.6167644262313843, + "log_odds_ratio": -0.5935536623001099, + "logits/chosen": -0.34687721729278564, + "logits/rejected": -0.3849531412124634, + "logps/chosen": -0.8814305067062378, + "logps/rejected": -1.2702577114105225, + "loss": 0.9524, + "nll_loss": 0.892997145652771, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08814306557178497, + "rewards/margins": 0.03888271749019623, + "rewards/rejected": -0.12702576816082, + "step": 4110 + }, + { + "epoch": 0.74, + "grad_norm": 1.4002399444580078, + "learning_rate": 5.659047896345902e-06, + "log_odds_chosen": 0.5371646285057068, + "log_odds_ratio": -0.5932449698448181, + "logits/chosen": -0.40844884514808655, + "logits/rejected": -0.4111505150794983, + "logps/chosen": -1.0008736848831177, + "logps/rejected": -1.3484508991241455, + "loss": 0.9959, + "nll_loss": 0.9365564584732056, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10008736699819565, + "rewards/margins": 0.03475772216916084, + "rewards/rejected": -0.1348450928926468, + "step": 4120 + }, + { + "epoch": 0.75, + "grad_norm": 1.1915359497070312, + "learning_rate": 5.653224632406464e-06, + "log_odds_chosen": 0.5471753478050232, + "log_odds_ratio": -0.5521697998046875, + "logits/chosen": -0.4373193681240082, + "logits/rejected": -0.48634281754493713, + "logps/chosen": -1.02994966506958, + "logps/rejected": -1.420654058456421, + "loss": 1.0112, + "nll_loss": 0.9560235738754272, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10299496352672577, + "rewards/margins": 0.03907042369246483, + "rewards/rejected": -0.1420653760433197, + "step": 4130 + }, + { + "epoch": 0.75, + "grad_norm": 0.7222558856010437, + "learning_rate": 5.647401368467026e-06, + "log_odds_chosen": 0.4993858337402344, + "log_odds_ratio": -0.641208827495575, + "logits/chosen": -0.37473541498184204, + "logits/rejected": -0.4002726078033447, + "logps/chosen": -1.0152702331542969, + "logps/rejected": -1.339428186416626, + "loss": 0.9919, + "nll_loss": 0.9277341961860657, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10152701288461685, + "rewards/margins": 0.032415807247161865, + "rewards/rejected": -0.13394282758235931, + "step": 4140 + }, + { + "epoch": 0.75, + "grad_norm": 1.2790604829788208, + "learning_rate": 5.641578104527587e-06, + "log_odds_chosen": 0.4182191789150238, + "log_odds_ratio": -0.6287438273429871, + "logits/chosen": -0.45408788323402405, + "logits/rejected": -0.4832921624183655, + "logps/chosen": -1.0568732023239136, + "logps/rejected": -1.3412824869155884, + "loss": 1.0578, + "nll_loss": 0.9948795437812805, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10568732023239136, + "rewards/margins": 0.028440916910767555, + "rewards/rejected": -0.13412824273109436, + "step": 4150 + }, + { + "epoch": 0.75, + "grad_norm": 1.3140400648117065, + "learning_rate": 5.635754840588149e-06, + "log_odds_chosen": 0.6435329914093018, + "log_odds_ratio": -0.6096024513244629, + "logits/chosen": -0.33399510383605957, + "logits/rejected": -0.36445289850234985, + "logps/chosen": -0.8361338376998901, + "logps/rejected": -1.2639483213424683, + "loss": 0.9501, + "nll_loss": 0.8891298174858093, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08361340314149857, + "rewards/margins": 0.042781438678503036, + "rewards/rejected": -0.1263948380947113, + "step": 4160 + }, + { + "epoch": 0.75, + "grad_norm": 1.5185847282409668, + "learning_rate": 5.6299315766487115e-06, + "log_odds_chosen": 0.6422057747840881, + "log_odds_ratio": -0.5717750787734985, + "logits/chosen": -0.4139169156551361, + "logits/rejected": -0.49386876821517944, + "logps/chosen": -0.9835192561149597, + "logps/rejected": -1.4217901229858398, + "loss": 1.0613, + "nll_loss": 1.0041275024414062, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09835191816091537, + "rewards/margins": 0.04382709413766861, + "rewards/rejected": -0.14217904210090637, + "step": 4170 + }, + { + "epoch": 0.76, + "grad_norm": 0.852863609790802, + "learning_rate": 5.624108312709273e-06, + "log_odds_chosen": 0.5467121005058289, + "log_odds_ratio": -0.5848273038864136, + "logits/chosen": -0.4137410521507263, + "logits/rejected": -0.4252719283103943, + "logps/chosen": -0.9758207201957703, + "logps/rejected": -1.328446626663208, + "loss": 0.9739, + "nll_loss": 0.9153772592544556, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09758206456899643, + "rewards/margins": 0.035262592136859894, + "rewards/rejected": -0.13284465670585632, + "step": 4180 + }, + { + "epoch": 0.76, + "grad_norm": 1.452010154724121, + "learning_rate": 5.618285048769835e-06, + "log_odds_chosen": 0.8447461128234863, + "log_odds_ratio": -0.5126752853393555, + "logits/chosen": -0.3651435971260071, + "logits/rejected": -0.44532880187034607, + "logps/chosen": -0.940411388874054, + "logps/rejected": -1.488644003868103, + "loss": 0.9913, + "nll_loss": 0.9400469064712524, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09404114633798599, + "rewards/margins": 0.05482326075434685, + "rewards/rejected": -0.14886441826820374, + "step": 4190 + }, + { + "epoch": 0.76, + "grad_norm": 1.0430749654769897, + "learning_rate": 5.6124617848303976e-06, + "log_odds_chosen": 0.7600489854812622, + "log_odds_ratio": -0.547897219657898, + "logits/chosen": -0.40632161498069763, + "logits/rejected": -0.4137774407863617, + "logps/chosen": -0.9630746841430664, + "logps/rejected": -1.4354467391967773, + "loss": 1.0375, + "nll_loss": 0.9827224612236023, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09630747139453888, + "rewards/margins": 0.04723720625042915, + "rewards/rejected": -0.14354465901851654, + "step": 4200 + }, + { + "epoch": 0.76, + "grad_norm": 0.9618161916732788, + "learning_rate": 5.606638520890959e-06, + "log_odds_chosen": 0.821193516254425, + "log_odds_ratio": -0.5814388990402222, + "logits/chosen": -0.36870136857032776, + "logits/rejected": -0.40048137307167053, + "logps/chosen": -0.8505674600601196, + "logps/rejected": -1.3683149814605713, + "loss": 0.9269, + "nll_loss": 0.8687930107116699, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08505673706531525, + "rewards/margins": 0.051774751394987106, + "rewards/rejected": -0.13683149218559265, + "step": 4210 + }, + { + "epoch": 0.76, + "grad_norm": 1.4219672679901123, + "learning_rate": 5.600815256951521e-06, + "log_odds_chosen": 0.8768616914749146, + "log_odds_ratio": -0.5199444890022278, + "logits/chosen": -0.3847038149833679, + "logits/rejected": -0.4094681739807129, + "logps/chosen": -0.9236054420471191, + "logps/rejected": -1.48633873462677, + "loss": 0.9239, + "nll_loss": 0.8718730211257935, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09236054867506027, + "rewards/margins": 0.05627333000302315, + "rewards/rejected": -0.14863386750221252, + "step": 4220 + }, + { + "epoch": 0.76, + "grad_norm": 1.4613364934921265, + "learning_rate": 5.594991993012084e-06, + "log_odds_chosen": 0.692061185836792, + "log_odds_ratio": -0.5662203431129456, + "logits/chosen": -0.36791688203811646, + "logits/rejected": -0.4184791147708893, + "logps/chosen": -0.9626785516738892, + "logps/rejected": -1.4185993671417236, + "loss": 0.9929, + "nll_loss": 0.9362456202507019, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09626786410808563, + "rewards/margins": 0.04559207707643509, + "rewards/rejected": -0.14185991883277893, + "step": 4230 + }, + { + "epoch": 0.77, + "grad_norm": 1.1804014444351196, + "learning_rate": 5.589168729072644e-06, + "log_odds_chosen": 1.0438271760940552, + "log_odds_ratio": -0.5010775327682495, + "logits/chosen": -0.267377644777298, + "logits/rejected": -0.3655751347541809, + "logps/chosen": -0.9364291429519653, + "logps/rejected": -1.6218111515045166, + "loss": 0.9331, + "nll_loss": 0.8829880952835083, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09364292025566101, + "rewards/margins": 0.06853820383548737, + "rewards/rejected": -0.16218113899230957, + "step": 4240 + }, + { + "epoch": 0.77, + "grad_norm": 1.9332170486450195, + "learning_rate": 5.5833454651332066e-06, + "log_odds_chosen": 0.83074951171875, + "log_odds_ratio": -0.5328481197357178, + "logits/chosen": -0.4035532474517822, + "logits/rejected": -0.44083279371261597, + "logps/chosen": -0.8890374302864075, + "logps/rejected": -1.4090745449066162, + "loss": 0.969, + "nll_loss": 0.9156768918037415, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08890374004840851, + "rewards/margins": 0.052003733813762665, + "rewards/rejected": -0.14090748131275177, + "step": 4250 + }, + { + "epoch": 0.77, + "grad_norm": 1.3763980865478516, + "learning_rate": 5.577522201193769e-06, + "log_odds_chosen": 0.5008147954940796, + "log_odds_ratio": -0.6271563768386841, + "logits/chosen": -0.40320873260498047, + "logits/rejected": -0.412767231464386, + "logps/chosen": -0.9467431306838989, + "logps/rejected": -1.296858310699463, + "loss": 0.9401, + "nll_loss": 0.8774242401123047, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09467431157827377, + "rewards/margins": 0.03501152992248535, + "rewards/rejected": -0.12968584895133972, + "step": 4260 + }, + { + "epoch": 0.77, + "grad_norm": 0.9446521401405334, + "learning_rate": 5.57169893725433e-06, + "log_odds_chosen": 1.1107661724090576, + "log_odds_ratio": -0.4330429136753082, + "logits/chosen": -0.31322717666625977, + "logits/rejected": -0.38749533891677856, + "logps/chosen": -0.8513630628585815, + "logps/rejected": -1.5474624633789062, + "loss": 0.8809, + "nll_loss": 0.8376407623291016, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0851363092660904, + "rewards/margins": 0.06960994750261307, + "rewards/rejected": -0.15474626421928406, + "step": 4270 + }, + { + "epoch": 0.77, + "grad_norm": 1.3432685136795044, + "learning_rate": 5.565875673314893e-06, + "log_odds_chosen": 0.8585469126701355, + "log_odds_ratio": -0.5709123015403748, + "logits/chosen": -0.32336562871932983, + "logits/rejected": -0.38996315002441406, + "logps/chosen": -0.8784129023551941, + "logps/rejected": -1.417546272277832, + "loss": 0.967, + "nll_loss": 0.9099494814872742, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08784128725528717, + "rewards/margins": 0.053913332521915436, + "rewards/rejected": -0.1417546272277832, + "step": 4280 + }, + { + "epoch": 0.77, + "grad_norm": 1.2538154125213623, + "learning_rate": 5.560052409375455e-06, + "log_odds_chosen": 0.7539855241775513, + "log_odds_ratio": -0.5314663052558899, + "logits/chosen": -0.40349698066711426, + "logits/rejected": -0.419721782207489, + "logps/chosen": -0.9041644334793091, + "logps/rejected": -1.377606987953186, + "loss": 0.9484, + "nll_loss": 0.8953009843826294, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09041643887758255, + "rewards/margins": 0.04734424501657486, + "rewards/rejected": -0.1377606987953186, + "step": 4290 + }, + { + "epoch": 0.78, + "grad_norm": 2.1267170906066895, + "learning_rate": 5.554229145436017e-06, + "log_odds_chosen": 0.7543531656265259, + "log_odds_ratio": -0.5795196294784546, + "logits/chosen": -0.4250953793525696, + "logits/rejected": -0.4518701434135437, + "logps/chosen": -1.0713355541229248, + "logps/rejected": -1.5738701820373535, + "loss": 0.9899, + "nll_loss": 0.931910514831543, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10713355243206024, + "rewards/margins": 0.05025345832109451, + "rewards/rejected": -0.15738701820373535, + "step": 4300 + }, + { + "epoch": 0.78, + "grad_norm": 1.9224661588668823, + "learning_rate": 5.548405881496579e-06, + "log_odds_chosen": 0.6704081296920776, + "log_odds_ratio": -0.5380033850669861, + "logits/chosen": -0.44478529691696167, + "logits/rejected": -0.4515528678894043, + "logps/chosen": -0.9250621795654297, + "logps/rejected": -1.3818514347076416, + "loss": 0.9657, + "nll_loss": 0.9119402170181274, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09250621497631073, + "rewards/margins": 0.04567892104387283, + "rewards/rejected": -0.13818514347076416, + "step": 4310 + }, + { + "epoch": 0.78, + "grad_norm": 1.2206400632858276, + "learning_rate": 5.542582617557141e-06, + "log_odds_chosen": 0.5698705315589905, + "log_odds_ratio": -0.5757175087928772, + "logits/chosen": -0.4712826609611511, + "logits/rejected": -0.48210567235946655, + "logps/chosen": -0.9412860870361328, + "logps/rejected": -1.3091002702713013, + "loss": 0.9899, + "nll_loss": 0.9323747754096985, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09412859380245209, + "rewards/margins": 0.036781422793865204, + "rewards/rejected": -0.1309100240468979, + "step": 4320 + }, + { + "epoch": 0.78, + "grad_norm": 1.0580228567123413, + "learning_rate": 5.5367593536177025e-06, + "log_odds_chosen": 0.6712430119514465, + "log_odds_ratio": -0.5941036939620972, + "logits/chosen": -0.44383248686790466, + "logits/rejected": -0.4895065426826477, + "logps/chosen": -0.9147260785102844, + "logps/rejected": -1.3670275211334229, + "loss": 0.9959, + "nll_loss": 0.9365051984786987, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09147261083126068, + "rewards/margins": 0.04523014649748802, + "rewards/rejected": -0.136702761054039, + "step": 4330 + }, + { + "epoch": 0.78, + "grad_norm": 0.9036986827850342, + "learning_rate": 5.530936089678264e-06, + "log_odds_chosen": 0.5545127987861633, + "log_odds_ratio": -0.59429532289505, + "logits/chosen": -0.42196816205978394, + "logits/rejected": -0.4064735472202301, + "logps/chosen": -0.999508261680603, + "logps/rejected": -1.366876482963562, + "loss": 0.9843, + "nll_loss": 0.9248775243759155, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09995082765817642, + "rewards/margins": 0.03673681244254112, + "rewards/rejected": -0.13668763637542725, + "step": 4340 + }, + { + "epoch": 0.79, + "grad_norm": 0.7505858540534973, + "learning_rate": 5.525112825738826e-06, + "log_odds_chosen": 0.668211817741394, + "log_odds_ratio": -0.557309091091156, + "logits/chosen": -0.41097337007522583, + "logits/rejected": -0.3994588553905487, + "logps/chosen": -0.9319581985473633, + "logps/rejected": -1.3750684261322021, + "loss": 1.0205, + "nll_loss": 0.9647325277328491, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09319581091403961, + "rewards/margins": 0.0443110391497612, + "rewards/rejected": -0.1375068575143814, + "step": 4350 + }, + { + "epoch": 0.79, + "grad_norm": 1.393135666847229, + "learning_rate": 5.519289561799389e-06, + "log_odds_chosen": 1.0365409851074219, + "log_odds_ratio": -0.4862661361694336, + "logits/chosen": -0.36097291111946106, + "logits/rejected": -0.4341823160648346, + "logps/chosen": -0.850311279296875, + "logps/rejected": -1.543900966644287, + "loss": 0.933, + "nll_loss": 0.884366512298584, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08503112196922302, + "rewards/margins": 0.06935896724462509, + "rewards/rejected": -0.15439006686210632, + "step": 4360 + }, + { + "epoch": 0.79, + "grad_norm": 1.2937564849853516, + "learning_rate": 5.51346629785995e-06, + "log_odds_chosen": 0.7271625399589539, + "log_odds_ratio": -0.5418993830680847, + "logits/chosen": -0.408609539270401, + "logits/rejected": -0.4532496929168701, + "logps/chosen": -0.9302921295166016, + "logps/rejected": -1.426904320716858, + "loss": 1.0401, + "nll_loss": 0.985892117023468, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0930292084813118, + "rewards/margins": 0.04966122657060623, + "rewards/rejected": -0.14269044995307922, + "step": 4370 + }, + { + "epoch": 0.79, + "grad_norm": 1.5124766826629639, + "learning_rate": 5.507643033920512e-06, + "log_odds_chosen": 0.8994364738464355, + "log_odds_ratio": -0.5436853170394897, + "logits/chosen": -0.3987227976322174, + "logits/rejected": -0.426923930644989, + "logps/chosen": -0.857147216796875, + "logps/rejected": -1.457137107849121, + "loss": 0.9784, + "nll_loss": 0.9239856600761414, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08571472018957138, + "rewards/margins": 0.059999000281095505, + "rewards/rejected": -0.1457137167453766, + "step": 4380 + }, + { + "epoch": 0.79, + "grad_norm": 0.9244928956031799, + "learning_rate": 5.501819769981075e-06, + "log_odds_chosen": 0.7180790305137634, + "log_odds_ratio": -0.595329225063324, + "logits/chosen": -0.3936954140663147, + "logits/rejected": -0.421609103679657, + "logps/chosen": -1.0266858339309692, + "logps/rejected": -1.4452166557312012, + "loss": 1.0865, + "nll_loss": 1.0269358158111572, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10266858339309692, + "rewards/margins": 0.041853077709674835, + "rewards/rejected": -0.14452166855335236, + "step": 4390 + }, + { + "epoch": 0.79, + "grad_norm": 1.1748363971710205, + "learning_rate": 5.495996506041636e-06, + "log_odds_chosen": 0.8399966955184937, + "log_odds_ratio": -0.559502363204956, + "logits/chosen": -0.3925052881240845, + "logits/rejected": -0.40640386939048767, + "logps/chosen": -0.8787053823471069, + "logps/rejected": -1.4464792013168335, + "loss": 0.9667, + "nll_loss": 0.9107036590576172, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08787054568529129, + "rewards/margins": 0.05677737668156624, + "rewards/rejected": -0.14464792609214783, + "step": 4400 + }, + { + "epoch": 0.8, + "grad_norm": 1.6740537881851196, + "learning_rate": 5.490173242102198e-06, + "log_odds_chosen": 0.583748459815979, + "log_odds_ratio": -0.6321566700935364, + "logits/chosen": -0.4121069014072418, + "logits/rejected": -0.44561561942100525, + "logps/chosen": -1.0274041891098022, + "logps/rejected": -1.407701849937439, + "loss": 1.047, + "nll_loss": 0.9837868809700012, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10274042934179306, + "rewards/margins": 0.03802977129817009, + "rewards/rejected": -0.14077021181583405, + "step": 4410 + }, + { + "epoch": 0.8, + "grad_norm": 1.3400599956512451, + "learning_rate": 5.48434997816276e-06, + "log_odds_chosen": 0.5425913333892822, + "log_odds_ratio": -0.6277004480361938, + "logits/chosen": -0.44407615065574646, + "logits/rejected": -0.46227678656578064, + "logps/chosen": -1.0416014194488525, + "logps/rejected": -1.4123096466064453, + "loss": 1.0788, + "nll_loss": 1.0160634517669678, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10416014492511749, + "rewards/margins": 0.03707083314657211, + "rewards/rejected": -0.141230970621109, + "step": 4420 + }, + { + "epoch": 0.8, + "grad_norm": 5.0677666664123535, + "learning_rate": 5.478526714223321e-06, + "log_odds_chosen": 0.8549364805221558, + "log_odds_ratio": -0.47799697518348694, + "logits/chosen": -0.3967309594154358, + "logits/rejected": -0.4370170533657074, + "logps/chosen": -0.9174606204032898, + "logps/rejected": -1.5248515605926514, + "loss": 1.0526, + "nll_loss": 1.0048235654830933, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09174606949090958, + "rewards/margins": 0.06073909252882004, + "rewards/rejected": -0.15248516201972961, + "step": 4430 + }, + { + "epoch": 0.8, + "grad_norm": 1.2501968145370483, + "learning_rate": 5.472703450283884e-06, + "log_odds_chosen": 0.5208200216293335, + "log_odds_ratio": -0.6153031587600708, + "logits/chosen": -0.36386433243751526, + "logits/rejected": -0.3963198661804199, + "logps/chosen": -0.9649707674980164, + "logps/rejected": -1.306401252746582, + "loss": 0.9706, + "nll_loss": 0.9090933799743652, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09649708122015, + "rewards/margins": 0.03414304554462433, + "rewards/rejected": -0.13064011931419373, + "step": 4440 + }, + { + "epoch": 0.8, + "grad_norm": 1.1581801176071167, + "learning_rate": 5.466880186344446e-06, + "log_odds_chosen": 0.9284378886222839, + "log_odds_ratio": -0.49612656235694885, + "logits/chosen": -0.3325726389884949, + "logits/rejected": -0.4070536494255066, + "logps/chosen": -0.8295499682426453, + "logps/rejected": -1.4266316890716553, + "loss": 0.9135, + "nll_loss": 0.8638966679573059, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08295499533414841, + "rewards/margins": 0.059708189219236374, + "rewards/rejected": -0.14266319572925568, + "step": 4450 + }, + { + "epoch": 0.81, + "grad_norm": 1.7593944072723389, + "learning_rate": 5.4610569224050074e-06, + "log_odds_chosen": 0.684478223323822, + "log_odds_ratio": -0.5533784627914429, + "logits/chosen": -0.4242308735847473, + "logits/rejected": -0.44120240211486816, + "logps/chosen": -0.8975669145584106, + "logps/rejected": -1.3506085872650146, + "loss": 1.0322, + "nll_loss": 0.9768352508544922, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08975669741630554, + "rewards/margins": 0.04530417546629906, + "rewards/rejected": -0.1350608766078949, + "step": 4460 + }, + { + "epoch": 0.81, + "grad_norm": 0.8792012333869934, + "learning_rate": 5.45523365846557e-06, + "log_odds_chosen": 0.9008004069328308, + "log_odds_ratio": -0.5196114778518677, + "logits/chosen": -0.35840699076652527, + "logits/rejected": -0.42450451850891113, + "logps/chosen": -0.9610995054244995, + "logps/rejected": -1.565719485282898, + "loss": 0.9387, + "nll_loss": 0.8867459297180176, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09610994905233383, + "rewards/margins": 0.06046200543642044, + "rewards/rejected": -0.15657195448875427, + "step": 4470 + }, + { + "epoch": 0.81, + "grad_norm": 1.587249755859375, + "learning_rate": 5.449410394526132e-06, + "log_odds_chosen": 0.661650538444519, + "log_odds_ratio": -0.578790545463562, + "logits/chosen": -0.4225030541419983, + "logits/rejected": -0.4778100848197937, + "logps/chosen": -0.9953657984733582, + "logps/rejected": -1.4192094802856445, + "loss": 1.0387, + "nll_loss": 0.9808080792427063, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09953657537698746, + "rewards/margins": 0.042384374886751175, + "rewards/rejected": -0.14192095398902893, + "step": 4480 + }, + { + "epoch": 0.81, + "grad_norm": 1.2669217586517334, + "learning_rate": 5.4435871305866935e-06, + "log_odds_chosen": 0.9236847758293152, + "log_odds_ratio": -0.5020954012870789, + "logits/chosen": -0.382222443819046, + "logits/rejected": -0.4366540014743805, + "logps/chosen": -0.8376493453979492, + "logps/rejected": -1.4251618385314941, + "loss": 0.9316, + "nll_loss": 0.8813702464103699, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0837649330496788, + "rewards/margins": 0.05875125527381897, + "rewards/rejected": -0.14251618087291718, + "step": 4490 + }, + { + "epoch": 0.81, + "grad_norm": 1.1096593141555786, + "learning_rate": 5.437763866647255e-06, + "log_odds_chosen": 0.9368969798088074, + "log_odds_ratio": -0.49079108238220215, + "logits/chosen": -0.3573613166809082, + "logits/rejected": -0.39419031143188477, + "logps/chosen": -0.8301242589950562, + "logps/rejected": -1.4331512451171875, + "loss": 0.9555, + "nll_loss": 0.9064178466796875, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08301243185997009, + "rewards/margins": 0.06030268594622612, + "rewards/rejected": -0.1433151364326477, + "step": 4500 + }, + { + "epoch": 0.81, + "grad_norm": 1.9662997722625732, + "learning_rate": 5.431940602707817e-06, + "log_odds_chosen": 0.9530878067016602, + "log_odds_ratio": -0.5022386312484741, + "logits/chosen": -0.3724839687347412, + "logits/rejected": -0.3958912193775177, + "logps/chosen": -0.9541193842887878, + "logps/rejected": -1.603494644165039, + "loss": 0.9589, + "nll_loss": 0.908686637878418, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09541194140911102, + "rewards/margins": 0.0649375170469284, + "rewards/rejected": -0.16034945845603943, + "step": 4510 + }, + { + "epoch": 0.82, + "grad_norm": 0.777763307094574, + "learning_rate": 5.42611733876838e-06, + "log_odds_chosen": 0.724637508392334, + "log_odds_ratio": -0.5301684141159058, + "logits/chosen": -0.4204856753349304, + "logits/rejected": -0.42652568221092224, + "logps/chosen": -0.8927604556083679, + "logps/rejected": -1.378150463104248, + "loss": 0.9923, + "nll_loss": 0.9392973780632019, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08927605301141739, + "rewards/margins": 0.048538997769355774, + "rewards/rejected": -0.13781504333019257, + "step": 4520 + }, + { + "epoch": 0.82, + "grad_norm": 0.9448487162590027, + "learning_rate": 5.420294074828941e-06, + "log_odds_chosen": 0.650304913520813, + "log_odds_ratio": -0.5700558423995972, + "logits/chosen": -0.42849674820899963, + "logits/rejected": -0.476339727640152, + "logps/chosen": -0.9701536893844604, + "logps/rejected": -1.434536337852478, + "loss": 1.0054, + "nll_loss": 0.9483936429023743, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.097015380859375, + "rewards/margins": 0.046438273042440414, + "rewards/rejected": -0.14345364272594452, + "step": 4530 + }, + { + "epoch": 0.82, + "grad_norm": 0.9929624199867249, + "learning_rate": 5.414470810889503e-06, + "log_odds_chosen": 0.7134051322937012, + "log_odds_ratio": -0.5507954359054565, + "logits/chosen": -0.379250705242157, + "logits/rejected": -0.3842385709285736, + "logps/chosen": -0.9411032795906067, + "logps/rejected": -1.4669625759124756, + "loss": 0.8943, + "nll_loss": 0.8392144441604614, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09411032497882843, + "rewards/margins": 0.05258594825863838, + "rewards/rejected": -0.1466962844133377, + "step": 4540 + }, + { + "epoch": 0.82, + "grad_norm": 1.1224552392959595, + "learning_rate": 5.408647546950066e-06, + "log_odds_chosen": 0.8553325533866882, + "log_odds_ratio": -0.5414713621139526, + "logits/chosen": -0.3530065715312958, + "logits/rejected": -0.38633108139038086, + "logps/chosen": -0.829110324382782, + "logps/rejected": -1.3547379970550537, + "loss": 0.936, + "nll_loss": 0.8818100690841675, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08291102945804596, + "rewards/margins": 0.052562762051820755, + "rewards/rejected": -0.1354738026857376, + "step": 4550 + }, + { + "epoch": 0.82, + "grad_norm": 0.7922306060791016, + "learning_rate": 5.402824283010627e-06, + "log_odds_chosen": 0.8722979426383972, + "log_odds_ratio": -0.5566241145133972, + "logits/chosen": -0.43379703164100647, + "logits/rejected": -0.4889557957649231, + "logps/chosen": -0.975425124168396, + "logps/rejected": -1.6006053686141968, + "loss": 1.0169, + "nll_loss": 0.9612849354743958, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09754250943660736, + "rewards/margins": 0.06251802295446396, + "rewards/rejected": -0.1600605547428131, + "step": 4560 + }, + { + "epoch": 0.83, + "grad_norm": 1.6481794118881226, + "learning_rate": 5.3970010190711895e-06, + "log_odds_chosen": 0.7812891602516174, + "log_odds_ratio": -0.5631781816482544, + "logits/chosen": -0.37542352080345154, + "logits/rejected": -0.41719841957092285, + "logps/chosen": -0.8898963928222656, + "logps/rejected": -1.4438140392303467, + "loss": 0.9596, + "nll_loss": 0.9033276438713074, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08898963034152985, + "rewards/margins": 0.05539176985621452, + "rewards/rejected": -0.14438140392303467, + "step": 4570 + }, + { + "epoch": 0.83, + "grad_norm": 1.1498469114303589, + "learning_rate": 5.391177755131752e-06, + "log_odds_chosen": 1.0272377729415894, + "log_odds_ratio": -0.4711379408836365, + "logits/chosen": -0.3855094313621521, + "logits/rejected": -0.4190591871738434, + "logps/chosen": -0.9228911399841309, + "logps/rejected": -1.5633227825164795, + "loss": 0.9365, + "nll_loss": 0.8894192576408386, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.09228911995887756, + "rewards/margins": 0.06404314935207367, + "rewards/rejected": -0.15633228421211243, + "step": 4580 + }, + { + "epoch": 0.83, + "grad_norm": 1.1245613098144531, + "learning_rate": 5.385354491192312e-06, + "log_odds_chosen": 0.9771651029586792, + "log_odds_ratio": -0.4908887445926666, + "logits/chosen": -0.42949455976486206, + "logits/rejected": -0.5072841048240662, + "logps/chosen": -0.8424657583236694, + "logps/rejected": -1.5049989223480225, + "loss": 0.9413, + "nll_loss": 0.8921648263931274, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08424657583236694, + "rewards/margins": 0.06625331938266754, + "rewards/rejected": -0.15049989521503448, + "step": 4590 + }, + { + "epoch": 0.83, + "grad_norm": 1.0176246166229248, + "learning_rate": 5.379531227252875e-06, + "log_odds_chosen": 0.9292768239974976, + "log_odds_ratio": -0.48836570978164673, + "logits/chosen": -0.46856871247291565, + "logits/rejected": -0.49828463792800903, + "logps/chosen": -1.0002449750900269, + "logps/rejected": -1.6327403783798218, + "loss": 0.9623, + "nll_loss": 0.913472056388855, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.1000244989991188, + "rewards/margins": 0.06324952840805054, + "rewards/rejected": -0.16327403485774994, + "step": 4600 + }, + { + "epoch": 0.83, + "grad_norm": 1.0745056867599487, + "learning_rate": 5.373707963313437e-06, + "log_odds_chosen": 0.6398543119430542, + "log_odds_ratio": -0.6533128619194031, + "logits/chosen": -0.4060142934322357, + "logits/rejected": -0.45436620712280273, + "logps/chosen": -0.9138556718826294, + "logps/rejected": -1.3905316591262817, + "loss": 0.9979, + "nll_loss": 0.9325958490371704, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09138555824756622, + "rewards/margins": 0.04766761511564255, + "rewards/rejected": -0.13905318081378937, + "step": 4610 + }, + { + "epoch": 0.83, + "grad_norm": 1.0568596124649048, + "learning_rate": 5.3678846993739985e-06, + "log_odds_chosen": 0.8728116750717163, + "log_odds_ratio": -0.48789605498313904, + "logits/chosen": -0.4428630471229553, + "logits/rejected": -0.493156760931015, + "logps/chosen": -0.9080031514167786, + "logps/rejected": -1.4789577722549438, + "loss": 0.9531, + "nll_loss": 0.9042849540710449, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09080030024051666, + "rewards/margins": 0.057095468044281006, + "rewards/rejected": -0.14789576828479767, + "step": 4620 + }, + { + "epoch": 0.84, + "grad_norm": 0.9505348205566406, + "learning_rate": 5.362061435434561e-06, + "log_odds_chosen": 0.7823434472084045, + "log_odds_ratio": -0.549795925617218, + "logits/chosen": -0.4028090834617615, + "logits/rejected": -0.42478498816490173, + "logps/chosen": -0.8729179501533508, + "logps/rejected": -1.3873783349990845, + "loss": 0.9906, + "nll_loss": 0.9356420636177063, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08729179203510284, + "rewards/margins": 0.051446039229631424, + "rewards/rejected": -0.13873784244060516, + "step": 4630 + }, + { + "epoch": 0.84, + "grad_norm": 1.7304534912109375, + "learning_rate": 5.356238171495123e-06, + "log_odds_chosen": 0.6297367215156555, + "log_odds_ratio": -0.5629245638847351, + "logits/chosen": -0.444740355014801, + "logits/rejected": -0.48241329193115234, + "logps/chosen": -0.9954794049263, + "logps/rejected": -1.4082391262054443, + "loss": 0.985, + "nll_loss": 0.9287381172180176, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09954794496297836, + "rewards/margins": 0.04127597063779831, + "rewards/rejected": -0.14082391560077667, + "step": 4640 + }, + { + "epoch": 0.84, + "grad_norm": 0.9871335029602051, + "learning_rate": 5.3504149075556846e-06, + "log_odds_chosen": 0.8629505038261414, + "log_odds_ratio": -0.4875113368034363, + "logits/chosen": -0.36363014578819275, + "logits/rejected": -0.4274236559867859, + "logps/chosen": -0.9839668273925781, + "logps/rejected": -1.553021788597107, + "loss": 0.9214, + "nll_loss": 0.8726351857185364, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0983966737985611, + "rewards/margins": 0.05690549686551094, + "rewards/rejected": -0.15530219674110413, + "step": 4650 + }, + { + "epoch": 0.84, + "grad_norm": 1.7025538682937622, + "learning_rate": 5.344591643616247e-06, + "log_odds_chosen": 0.7422502636909485, + "log_odds_ratio": -0.564644455909729, + "logits/chosen": -0.3728085458278656, + "logits/rejected": -0.4222579896450043, + "logps/chosen": -0.9774104952812195, + "logps/rejected": -1.4680044651031494, + "loss": 0.9695, + "nll_loss": 0.9130756258964539, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09774105250835419, + "rewards/margins": 0.04905937984585762, + "rewards/rejected": -0.1468004435300827, + "step": 4660 + }, + { + "epoch": 0.84, + "grad_norm": 1.6551975011825562, + "learning_rate": 5.338768379676809e-06, + "log_odds_chosen": 0.8004302978515625, + "log_odds_ratio": -0.5492068529129028, + "logits/chosen": -0.4279244840145111, + "logits/rejected": -0.44948825240135193, + "logps/chosen": -0.9123814702033997, + "logps/rejected": -1.4314771890640259, + "loss": 0.931, + "nll_loss": 0.8761026263237, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09123813360929489, + "rewards/margins": 0.05190957337617874, + "rewards/rejected": -0.14314770698547363, + "step": 4670 + }, + { + "epoch": 0.85, + "grad_norm": 1.4915745258331299, + "learning_rate": 5.33294511573737e-06, + "log_odds_chosen": 0.8939487338066101, + "log_odds_ratio": -0.5288721323013306, + "logits/chosen": -0.4202663004398346, + "logits/rejected": -0.44463270902633667, + "logps/chosen": -0.905720591545105, + "logps/rejected": -1.4950666427612305, + "loss": 0.9727, + "nll_loss": 0.9198009371757507, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0905720591545105, + "rewards/margins": 0.058934617787599564, + "rewards/rejected": -0.14950667321681976, + "step": 4680 + }, + { + "epoch": 0.85, + "grad_norm": 1.170964241027832, + "learning_rate": 5.327121851797932e-06, + "log_odds_chosen": 0.7633172869682312, + "log_odds_ratio": -0.578994870185852, + "logits/chosen": -0.4211356043815613, + "logits/rejected": -0.4459422528743744, + "logps/chosen": -0.9920495748519897, + "logps/rejected": -1.4625046253204346, + "loss": 0.9584, + "nll_loss": 0.9004647135734558, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09920495003461838, + "rewards/margins": 0.0470455177128315, + "rewards/rejected": -0.14625045657157898, + "step": 4690 + }, + { + "epoch": 0.85, + "grad_norm": 1.534651756286621, + "learning_rate": 5.321298587858494e-06, + "log_odds_chosen": 0.7604216933250427, + "log_odds_ratio": -0.6054350733757019, + "logits/chosen": -0.39500564336776733, + "logits/rejected": -0.43382158875465393, + "logps/chosen": -0.9889154434204102, + "logps/rejected": -1.4807031154632568, + "loss": 0.9964, + "nll_loss": 0.9358501434326172, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09889154881238937, + "rewards/margins": 0.049178753048181534, + "rewards/rejected": -0.1480703055858612, + "step": 4700 + }, + { + "epoch": 0.85, + "grad_norm": 1.5456305742263794, + "learning_rate": 5.315475323919056e-06, + "log_odds_chosen": 0.44413527846336365, + "log_odds_ratio": -0.6286059617996216, + "logits/chosen": -0.4555909037590027, + "logits/rejected": -0.48065558075904846, + "logps/chosen": -1.0448988676071167, + "logps/rejected": -1.3062857389450073, + "loss": 1.0519, + "nll_loss": 0.9890422821044922, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10448990762233734, + "rewards/margins": 0.026138659566640854, + "rewards/rejected": -0.1306285560131073, + "step": 4710 + }, + { + "epoch": 0.85, + "grad_norm": 1.510901689529419, + "learning_rate": 5.309652059979618e-06, + "log_odds_chosen": 1.0771484375, + "log_odds_ratio": -0.49190235137939453, + "logits/chosen": -0.38036924600601196, + "logits/rejected": -0.4628377854824066, + "logps/chosen": -0.8989608883857727, + "logps/rejected": -1.5778999328613281, + "loss": 0.8991, + "nll_loss": 0.8498629331588745, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08989609032869339, + "rewards/margins": 0.06789391487836838, + "rewards/rejected": -0.15779002010822296, + "step": 4720 + }, + { + "epoch": 0.85, + "grad_norm": 1.0319769382476807, + "learning_rate": 5.3038287960401805e-06, + "log_odds_chosen": 0.6651164889335632, + "log_odds_ratio": -0.5910425782203674, + "logits/chosen": -0.42693108320236206, + "logits/rejected": -0.470429003238678, + "logps/chosen": -0.8990481495857239, + "logps/rejected": -1.3087271451950073, + "loss": 1.007, + "nll_loss": 0.9479179382324219, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08990481495857239, + "rewards/margins": 0.0409679040312767, + "rewards/rejected": -0.1308727115392685, + "step": 4730 + }, + { + "epoch": 0.86, + "grad_norm": 0.9942054748535156, + "learning_rate": 5.298005532100743e-06, + "log_odds_chosen": 0.5316375494003296, + "log_odds_ratio": -0.6433253288269043, + "logits/chosen": -0.4612106382846832, + "logits/rejected": -0.45017772912979126, + "logps/chosen": -1.0055973529815674, + "logps/rejected": -1.338026762008667, + "loss": 1.0462, + "nll_loss": 0.981835663318634, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10055973380804062, + "rewards/margins": 0.03324293717741966, + "rewards/rejected": -0.13380268216133118, + "step": 4740 + }, + { + "epoch": 0.86, + "grad_norm": 1.2705566883087158, + "learning_rate": 5.292182268161304e-06, + "log_odds_chosen": 1.0579116344451904, + "log_odds_ratio": -0.4778444170951843, + "logits/chosen": -0.3648456037044525, + "logits/rejected": -0.42490434646606445, + "logps/chosen": -0.8988125920295715, + "logps/rejected": -1.6541303396224976, + "loss": 0.9395, + "nll_loss": 0.8917468786239624, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08988126367330551, + "rewards/margins": 0.07553178071975708, + "rewards/rejected": -0.1654130518436432, + "step": 4750 + }, + { + "epoch": 0.86, + "grad_norm": 1.1533735990524292, + "learning_rate": 5.2863590042218666e-06, + "log_odds_chosen": 0.6809478998184204, + "log_odds_ratio": -0.5645762085914612, + "logits/chosen": -0.4856683611869812, + "logits/rejected": -0.46850576996803284, + "logps/chosen": -1.0219817161560059, + "logps/rejected": -1.4996047019958496, + "loss": 1.02, + "nll_loss": 0.9635196924209595, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.10219816863536835, + "rewards/margins": 0.04776228964328766, + "rewards/rejected": -0.149960458278656, + "step": 4760 + }, + { + "epoch": 0.86, + "grad_norm": 1.5749801397323608, + "learning_rate": 5.280535740282428e-06, + "log_odds_chosen": 0.7470858693122864, + "log_odds_ratio": -0.5543025732040405, + "logits/chosen": -0.4672362804412842, + "logits/rejected": -0.46102237701416016, + "logps/chosen": -0.8804488182067871, + "logps/rejected": -1.3649325370788574, + "loss": 0.9422, + "nll_loss": 0.8867942690849304, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08804487437009811, + "rewards/margins": 0.048448383808135986, + "rewards/rejected": -0.1364932507276535, + "step": 4770 + }, + { + "epoch": 0.86, + "grad_norm": 1.3381456136703491, + "learning_rate": 5.2747124763429895e-06, + "log_odds_chosen": 0.8641374707221985, + "log_odds_ratio": -0.5256321430206299, + "logits/chosen": -0.41251951456069946, + "logits/rejected": -0.44938522577285767, + "logps/chosen": -0.9954856038093567, + "logps/rejected": -1.5450937747955322, + "loss": 1.0149, + "nll_loss": 0.9623681902885437, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09954856336116791, + "rewards/margins": 0.05496079847216606, + "rewards/rejected": -0.15450936555862427, + "step": 4780 + }, + { + "epoch": 0.87, + "grad_norm": 1.0874555110931396, + "learning_rate": 5.268889212403552e-06, + "log_odds_chosen": 0.7685006260871887, + "log_odds_ratio": -0.5665751695632935, + "logits/chosen": -0.4145042300224304, + "logits/rejected": -0.471210241317749, + "logps/chosen": -1.0652424097061157, + "logps/rejected": -1.5898144245147705, + "loss": 1.087, + "nll_loss": 1.0303508043289185, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10652424395084381, + "rewards/margins": 0.052457213401794434, + "rewards/rejected": -0.15898147225379944, + "step": 4790 + }, + { + "epoch": 0.87, + "grad_norm": 1.6360236406326294, + "learning_rate": 5.263065948464114e-06, + "log_odds_chosen": 0.9018100500106812, + "log_odds_ratio": -0.4937785267829895, + "logits/chosen": -0.39242830872535706, + "logits/rejected": -0.43380218744277954, + "logps/chosen": -0.9768654108047485, + "logps/rejected": -1.5783965587615967, + "loss": 0.9547, + "nll_loss": 0.905341625213623, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09768654406070709, + "rewards/margins": 0.060153115540742874, + "rewards/rejected": -0.15783965587615967, + "step": 4800 + }, + { + "epoch": 0.87, + "grad_norm": 1.1695739030838013, + "learning_rate": 5.257242684524676e-06, + "log_odds_chosen": 0.7925978899002075, + "log_odds_ratio": -0.5527957677841187, + "logits/chosen": -0.40756258368492126, + "logits/rejected": -0.40037378668785095, + "logps/chosen": -0.9216617345809937, + "logps/rejected": -1.4269583225250244, + "loss": 0.9425, + "nll_loss": 0.8872434496879578, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09216617792844772, + "rewards/margins": 0.050529658794403076, + "rewards/rejected": -0.1426958292722702, + "step": 4810 + }, + { + "epoch": 0.87, + "grad_norm": 1.5395045280456543, + "learning_rate": 5.251419420585238e-06, + "log_odds_chosen": 0.7387697100639343, + "log_odds_ratio": -0.6026879549026489, + "logits/chosen": -0.505529522895813, + "logits/rejected": -0.5170288681983948, + "logps/chosen": -1.066630482673645, + "logps/rejected": -1.5880917310714722, + "loss": 1.0232, + "nll_loss": 0.9629791378974915, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.1066630631685257, + "rewards/margins": 0.052146125584840775, + "rewards/rejected": -0.15880918502807617, + "step": 4820 + }, + { + "epoch": 0.87, + "grad_norm": 1.0072517395019531, + "learning_rate": 5.2455961566458e-06, + "log_odds_chosen": 0.978954017162323, + "log_odds_ratio": -0.4921804368495941, + "logits/chosen": -0.4218106269836426, + "logits/rejected": -0.4727579951286316, + "logps/chosen": -0.8569656610488892, + "logps/rejected": -1.5145273208618164, + "loss": 0.9357, + "nll_loss": 0.886450469493866, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08569655567407608, + "rewards/margins": 0.0657561793923378, + "rewards/rejected": -0.15145273506641388, + "step": 4830 + }, + { + "epoch": 0.87, + "grad_norm": 1.023287057876587, + "learning_rate": 5.239772892706362e-06, + "log_odds_chosen": 0.9301230311393738, + "log_odds_ratio": -0.5898328423500061, + "logits/chosen": -0.4207271933555603, + "logits/rejected": -0.439255952835083, + "logps/chosen": -0.932141125202179, + "logps/rejected": -1.6405792236328125, + "loss": 0.9573, + "nll_loss": 0.898327648639679, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09321411699056625, + "rewards/margins": 0.07084380090236664, + "rewards/rejected": -0.1640579104423523, + "step": 4840 + }, + { + "epoch": 0.88, + "grad_norm": 0.7330833077430725, + "learning_rate": 5.233949628766924e-06, + "log_odds_chosen": 0.7396227717399597, + "log_odds_ratio": -0.5504485368728638, + "logits/chosen": -0.3369649052619934, + "logits/rejected": -0.42328447103500366, + "logps/chosen": -0.8521198034286499, + "logps/rejected": -1.316091775894165, + "loss": 0.9385, + "nll_loss": 0.8834612965583801, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08521198481321335, + "rewards/margins": 0.046397190541028976, + "rewards/rejected": -0.13160917162895203, + "step": 4850 + }, + { + "epoch": 0.88, + "grad_norm": 1.677101969718933, + "learning_rate": 5.2281263648274854e-06, + "log_odds_chosen": 1.0122201442718506, + "log_odds_ratio": -0.5119179487228394, + "logits/chosen": -0.43187013268470764, + "logits/rejected": -0.4635971188545227, + "logps/chosen": -0.9174407124519348, + "logps/rejected": -1.548681616783142, + "loss": 1.0155, + "nll_loss": 0.9643124341964722, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0917440727353096, + "rewards/margins": 0.06312408298254013, + "rewards/rejected": -0.15486815571784973, + "step": 4860 + }, + { + "epoch": 0.88, + "grad_norm": 1.1978373527526855, + "learning_rate": 5.222303100888047e-06, + "log_odds_chosen": 0.5320814251899719, + "log_odds_ratio": -0.6548636555671692, + "logits/chosen": -0.38169607520103455, + "logits/rejected": -0.3945736885070801, + "logps/chosen": -0.9942175149917603, + "logps/rejected": -1.326147198677063, + "loss": 1.0518, + "nll_loss": 0.9862810969352722, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09942174702882767, + "rewards/margins": 0.03319296985864639, + "rewards/rejected": -0.13261471688747406, + "step": 4870 + }, + { + "epoch": 0.88, + "grad_norm": 0.9683055281639099, + "learning_rate": 5.216479836948609e-06, + "log_odds_chosen": 0.6372218728065491, + "log_odds_ratio": -0.559242844581604, + "logits/chosen": -0.4606989324092865, + "logits/rejected": -0.4654863476753235, + "logps/chosen": -0.9736550450325012, + "logps/rejected": -1.3879189491271973, + "loss": 1.0846, + "nll_loss": 1.0286335945129395, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09736550599336624, + "rewards/margins": 0.0414263978600502, + "rewards/rejected": -0.13879190385341644, + "step": 4880 + }, + { + "epoch": 0.88, + "grad_norm": 0.8117696046829224, + "learning_rate": 5.2106565730091715e-06, + "log_odds_chosen": 0.6601318120956421, + "log_odds_ratio": -0.5823066234588623, + "logits/chosen": -0.41589921712875366, + "logits/rejected": -0.45476651191711426, + "logps/chosen": -0.9463253021240234, + "logps/rejected": -1.3839843273162842, + "loss": 0.991, + "nll_loss": 0.9328001737594604, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09463252872228622, + "rewards/margins": 0.043765921145677567, + "rewards/rejected": -0.1383984386920929, + "step": 4890 + }, + { + "epoch": 0.89, + "grad_norm": 0.7774304747581482, + "learning_rate": 5.204833309069733e-06, + "log_odds_chosen": 0.5918189287185669, + "log_odds_ratio": -0.5761577486991882, + "logits/chosen": -0.4116500914096832, + "logits/rejected": -0.44521966576576233, + "logps/chosen": -0.9466740489006042, + "logps/rejected": -1.3614647388458252, + "loss": 0.9578, + "nll_loss": 0.90022212266922, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09466739743947983, + "rewards/margins": 0.041479066014289856, + "rewards/rejected": -0.13614647090435028, + "step": 4900 + }, + { + "epoch": 0.89, + "grad_norm": 1.037978172302246, + "learning_rate": 5.199010045130295e-06, + "log_odds_chosen": 0.686073899269104, + "log_odds_ratio": -0.5590696930885315, + "logits/chosen": -0.42658406496047974, + "logits/rejected": -0.4590669572353363, + "logps/chosen": -0.9395986795425415, + "logps/rejected": -1.4080203771591187, + "loss": 0.986, + "nll_loss": 0.9300875663757324, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09395986050367355, + "rewards/margins": 0.04684218019247055, + "rewards/rejected": -0.1408020555973053, + "step": 4910 + }, + { + "epoch": 0.89, + "grad_norm": 1.1430654525756836, + "learning_rate": 5.193186781190858e-06, + "log_odds_chosen": 0.8383340835571289, + "log_odds_ratio": -0.5327781438827515, + "logits/chosen": -0.4290473461151123, + "logits/rejected": -0.4340090751647949, + "logps/chosen": -0.8470977544784546, + "logps/rejected": -1.4011995792388916, + "loss": 0.9516, + "nll_loss": 0.8983678817749023, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0847097784280777, + "rewards/margins": 0.055410176515579224, + "rewards/rejected": -0.14011994004249573, + "step": 4920 + }, + { + "epoch": 0.89, + "grad_norm": 1.4240922927856445, + "learning_rate": 5.18736351725142e-06, + "log_odds_chosen": 0.6269794702529907, + "log_odds_ratio": -0.5663076639175415, + "logits/chosen": -0.44721508026123047, + "logits/rejected": -0.49740925431251526, + "logps/chosen": -0.9577927589416504, + "logps/rejected": -1.383522629737854, + "loss": 0.9916, + "nll_loss": 0.9349943399429321, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09577927738428116, + "rewards/margins": 0.0425729937851429, + "rewards/rejected": -0.13835227489471436, + "step": 4930 + }, + { + "epoch": 0.89, + "grad_norm": 2.0192978382110596, + "learning_rate": 5.181540253311981e-06, + "log_odds_chosen": 0.7749100923538208, + "log_odds_ratio": -0.5209966897964478, + "logits/chosen": -0.43163347244262695, + "logits/rejected": -0.4931555688381195, + "logps/chosen": -0.9455229043960571, + "logps/rejected": -1.4639647006988525, + "loss": 1.0147, + "nll_loss": 0.962630569934845, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09455229341983795, + "rewards/margins": 0.051844190806150436, + "rewards/rejected": -0.1463964879512787, + "step": 4940 + }, + { + "epoch": 0.89, + "grad_norm": 2.5260753631591797, + "learning_rate": 5.175716989372543e-06, + "log_odds_chosen": 0.7062619924545288, + "log_odds_ratio": -0.5886339545249939, + "logits/chosen": -0.3936625123023987, + "logits/rejected": -0.4544674754142761, + "logps/chosen": -1.0079313516616821, + "logps/rejected": -1.518587350845337, + "loss": 0.9657, + "nll_loss": 0.9068438410758972, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10079314559698105, + "rewards/margins": 0.051065593957901, + "rewards/rejected": -0.15185873210430145, + "step": 4950 + }, + { + "epoch": 0.9, + "grad_norm": 1.9594887495040894, + "learning_rate": 5.169893725433105e-06, + "log_odds_chosen": 0.9933183789253235, + "log_odds_ratio": -0.5047029256820679, + "logits/chosen": -0.42144671082496643, + "logits/rejected": -0.4574872851371765, + "logps/chosen": -0.9184238314628601, + "logps/rejected": -1.5921399593353271, + "loss": 1.0062, + "nll_loss": 0.9557191133499146, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09184238314628601, + "rewards/margins": 0.06737162172794342, + "rewards/rejected": -0.15921400487422943, + "step": 4960 + }, + { + "epoch": 0.9, + "grad_norm": 2.419018268585205, + "learning_rate": 5.164070461493667e-06, + "log_odds_chosen": 0.42638593912124634, + "log_odds_ratio": -0.5967411994934082, + "logits/chosen": -0.4372042119503021, + "logits/rejected": -0.4627193510532379, + "logps/chosen": -1.0391814708709717, + "logps/rejected": -1.3309781551361084, + "loss": 0.9981, + "nll_loss": 0.938385009765625, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10391815751791, + "rewards/margins": 0.02917967364192009, + "rewards/rejected": -0.1330978125333786, + "step": 4970 + }, + { + "epoch": 0.9, + "grad_norm": 0.9298260807991028, + "learning_rate": 5.158247197554229e-06, + "log_odds_chosen": 0.641968846321106, + "log_odds_ratio": -0.5857947468757629, + "logits/chosen": -0.3784220218658447, + "logits/rejected": -0.45632854104042053, + "logps/chosen": -0.8558870553970337, + "logps/rejected": -1.2764778137207031, + "loss": 1.0052, + "nll_loss": 0.9466080665588379, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08558870851993561, + "rewards/margins": 0.04205907881259918, + "rewards/rejected": -0.1276477873325348, + "step": 4980 + }, + { + "epoch": 0.9, + "grad_norm": 1.8054282665252686, + "learning_rate": 5.152423933614791e-06, + "log_odds_chosen": 0.6764596700668335, + "log_odds_ratio": -0.5754591822624207, + "logits/chosen": -0.4195118546485901, + "logits/rejected": -0.46894583106040955, + "logps/chosen": -0.9912070035934448, + "logps/rejected": -1.4474741220474243, + "loss": 0.9957, + "nll_loss": 0.9381793141365051, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09912069886922836, + "rewards/margins": 0.04562670364975929, + "rewards/rejected": -0.14474740624427795, + "step": 4990 + }, + { + "epoch": 0.9, + "grad_norm": 0.8131672739982605, + "learning_rate": 5.146600669675353e-06, + "log_odds_chosen": 0.765887439250946, + "log_odds_ratio": -0.5449743866920471, + "logits/chosen": -0.383352130651474, + "logits/rejected": -0.4293951392173767, + "logps/chosen": -0.825584888458252, + "logps/rejected": -1.3375442028045654, + "loss": 0.9055, + "nll_loss": 0.8509842753410339, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08255849033594131, + "rewards/margins": 0.05119592696428299, + "rewards/rejected": -0.1337544023990631, + "step": 5000 + }, + { + "epoch": 0.91, + "grad_norm": 1.0602699518203735, + "learning_rate": 5.140777405735915e-06, + "log_odds_chosen": 0.7626697421073914, + "log_odds_ratio": -0.5582002401351929, + "logits/chosen": -0.4061199724674225, + "logits/rejected": -0.4781287610530853, + "logps/chosen": -0.942223072052002, + "logps/rejected": -1.4417130947113037, + "loss": 0.9715, + "nll_loss": 0.9156550168991089, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.094222292304039, + "rewards/margins": 0.04994902387261391, + "rewards/rejected": -0.1441713124513626, + "step": 5010 + }, + { + "epoch": 0.91, + "grad_norm": 0.9965958595275879, + "learning_rate": 5.134954141796477e-06, + "log_odds_chosen": 0.5501303672790527, + "log_odds_ratio": -0.5543341636657715, + "logits/chosen": -0.3945973217487335, + "logits/rejected": -0.4275182783603668, + "logps/chosen": -0.8684164881706238, + "logps/rejected": -1.2159730195999146, + "loss": 0.9075, + "nll_loss": 0.8520703315734863, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0868416577577591, + "rewards/margins": 0.0347556471824646, + "rewards/rejected": -0.1215972900390625, + "step": 5020 + }, + { + "epoch": 0.91, + "grad_norm": 1.8370288610458374, + "learning_rate": 5.129130877857039e-06, + "log_odds_chosen": 0.9271780848503113, + "log_odds_ratio": -0.49825724959373474, + "logits/chosen": -0.383684366941452, + "logits/rejected": -0.44511160254478455, + "logps/chosen": -0.8650614619255066, + "logps/rejected": -1.4556103944778442, + "loss": 1.0062, + "nll_loss": 0.9563736915588379, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08650614321231842, + "rewards/margins": 0.059054892510175705, + "rewards/rejected": -0.14556102454662323, + "step": 5030 + }, + { + "epoch": 0.91, + "grad_norm": 1.0370079278945923, + "learning_rate": 5.1233076139176e-06, + "log_odds_chosen": 0.5564397573471069, + "log_odds_ratio": -0.6237277388572693, + "logits/chosen": -0.46575039625167847, + "logits/rejected": -0.4472903609275818, + "logps/chosen": -0.9088039398193359, + "logps/rejected": -1.2560622692108154, + "loss": 1.0217, + "nll_loss": 0.9593534469604492, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0908803939819336, + "rewards/margins": 0.03472583740949631, + "rewards/rejected": -0.1256062239408493, + "step": 5040 + }, + { + "epoch": 0.91, + "grad_norm": 1.349543571472168, + "learning_rate": 5.1174843499781625e-06, + "log_odds_chosen": 0.8041173219680786, + "log_odds_ratio": -0.5334728956222534, + "logits/chosen": -0.5147528052330017, + "logits/rejected": -0.528853714466095, + "logps/chosen": -1.0080214738845825, + "logps/rejected": -1.5837531089782715, + "loss": 1.0237, + "nll_loss": 0.9703797101974487, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10080213844776154, + "rewards/margins": 0.05757317692041397, + "rewards/rejected": -0.1583753228187561, + "step": 5050 + }, + { + "epoch": 0.91, + "grad_norm": 0.9374119639396667, + "learning_rate": 5.111661086038724e-06, + "log_odds_chosen": 0.588254988193512, + "log_odds_ratio": -0.6173927187919617, + "logits/chosen": -0.3634551167488098, + "logits/rejected": -0.4189843237400055, + "logps/chosen": -0.9690176248550415, + "logps/rejected": -1.4033445119857788, + "loss": 0.9763, + "nll_loss": 0.9145170450210571, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.09690175950527191, + "rewards/margins": 0.04343269020318985, + "rewards/rejected": -0.14033445715904236, + "step": 5060 + }, + { + "epoch": 0.92, + "grad_norm": 1.4300905466079712, + "learning_rate": 5.105837822099286e-06, + "log_odds_chosen": 0.9700382351875305, + "log_odds_ratio": -0.5175814032554626, + "logits/chosen": -0.417670875787735, + "logits/rejected": -0.43058285117149353, + "logps/chosen": -0.8614059686660767, + "logps/rejected": -1.5537515878677368, + "loss": 0.9059, + "nll_loss": 0.8541040420532227, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08614059537649155, + "rewards/margins": 0.06923457235097885, + "rewards/rejected": -0.1553751528263092, + "step": 5070 + }, + { + "epoch": 0.92, + "grad_norm": 1.2038459777832031, + "learning_rate": 5.100014558159849e-06, + "log_odds_chosen": 0.8242164850234985, + "log_odds_ratio": -0.5438815355300903, + "logits/chosen": -0.4557031989097595, + "logits/rejected": -0.47849076986312866, + "logps/chosen": -1.0131621360778809, + "logps/rejected": -1.5966014862060547, + "loss": 0.9387, + "nll_loss": 0.8842656016349792, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10131619870662689, + "rewards/margins": 0.05834393948316574, + "rewards/rejected": -0.15966013073921204, + "step": 5080 + }, + { + "epoch": 0.92, + "grad_norm": 1.4084062576293945, + "learning_rate": 5.09419129422041e-06, + "log_odds_chosen": 0.8046883344650269, + "log_odds_ratio": -0.5245261192321777, + "logits/chosen": -0.39046531915664673, + "logits/rejected": -0.42180243134498596, + "logps/chosen": -1.027181625366211, + "logps/rejected": -1.5842394828796387, + "loss": 1.0032, + "nll_loss": 0.950783371925354, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10271817445755005, + "rewards/margins": 0.055705778300762177, + "rewards/rejected": -0.15842394530773163, + "step": 5090 + }, + { + "epoch": 0.92, + "grad_norm": 1.1398626565933228, + "learning_rate": 5.088368030280972e-06, + "log_odds_chosen": 0.7871803045272827, + "log_odds_ratio": -0.5711994767189026, + "logits/chosen": -0.42440158128738403, + "logits/rejected": -0.42357882857322693, + "logps/chosen": -0.8840295076370239, + "logps/rejected": -1.4321167469024658, + "loss": 0.9717, + "nll_loss": 0.9145844578742981, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08840294927358627, + "rewards/margins": 0.05480872467160225, + "rewards/rejected": -0.14321167767047882, + "step": 5100 + }, + { + "epoch": 0.92, + "grad_norm": 1.2491414546966553, + "learning_rate": 5.082544766341535e-06, + "log_odds_chosen": 0.7577713131904602, + "log_odds_ratio": -0.5364837646484375, + "logits/chosen": -0.4503016471862793, + "logits/rejected": -0.4818963408470154, + "logps/chosen": -0.9299535751342773, + "logps/rejected": -1.4110002517700195, + "loss": 0.9741, + "nll_loss": 0.9204285740852356, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09299536049365997, + "rewards/margins": 0.0481046661734581, + "rewards/rejected": -0.14110003411769867, + "step": 5110 + }, + { + "epoch": 0.92, + "grad_norm": 1.3868515491485596, + "learning_rate": 5.076721502402095e-06, + "log_odds_chosen": 0.9328304529190063, + "log_odds_ratio": -0.5016459226608276, + "logits/chosen": -0.47459521889686584, + "logits/rejected": -0.468188613653183, + "logps/chosen": -0.9387983083724976, + "logps/rejected": -1.6026138067245483, + "loss": 1.0102, + "nll_loss": 0.9600510597229004, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09387984126806259, + "rewards/margins": 0.06638153642416, + "rewards/rejected": -0.1602613925933838, + "step": 5120 + }, + { + "epoch": 0.93, + "grad_norm": 1.8675638437271118, + "learning_rate": 5.070898238462658e-06, + "log_odds_chosen": 0.8587830662727356, + "log_odds_ratio": -0.5065292119979858, + "logits/chosen": -0.4888245165348053, + "logits/rejected": -0.501750648021698, + "logps/chosen": -0.9960910081863403, + "logps/rejected": -1.5872774124145508, + "loss": 1.0089, + "nll_loss": 0.9582953453063965, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09960909932851791, + "rewards/margins": 0.05911865830421448, + "rewards/rejected": -0.158727765083313, + "step": 5130 + }, + { + "epoch": 0.93, + "grad_norm": 1.1439307928085327, + "learning_rate": 5.06507497452322e-06, + "log_odds_chosen": 0.7081592679023743, + "log_odds_ratio": -0.5490130186080933, + "logits/chosen": -0.4350952208042145, + "logits/rejected": -0.46510592103004456, + "logps/chosen": -0.8627880215644836, + "logps/rejected": -1.3226877450942993, + "loss": 0.9911, + "nll_loss": 0.936205267906189, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08627880364656448, + "rewards/margins": 0.04598996788263321, + "rewards/rejected": -0.1322687715291977, + "step": 5140 + }, + { + "epoch": 0.93, + "grad_norm": 1.0725418329238892, + "learning_rate": 5.059251710583782e-06, + "log_odds_chosen": 0.5310506224632263, + "log_odds_ratio": -0.6339167356491089, + "logits/chosen": -0.5025271773338318, + "logits/rejected": -0.525850236415863, + "logps/chosen": -1.002502679824829, + "logps/rejected": -1.3719778060913086, + "loss": 1.0611, + "nll_loss": 0.997689425945282, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10025026649236679, + "rewards/margins": 0.03694751486182213, + "rewards/rejected": -0.13719777762889862, + "step": 5150 + }, + { + "epoch": 0.93, + "grad_norm": 2.5402228832244873, + "learning_rate": 5.053428446644344e-06, + "log_odds_chosen": 0.5080104470252991, + "log_odds_ratio": -0.6327738165855408, + "logits/chosen": -0.4841841757297516, + "logits/rejected": -0.4749310612678528, + "logps/chosen": -0.9421554803848267, + "logps/rejected": -1.2996408939361572, + "loss": 0.9475, + "nll_loss": 0.8842074275016785, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09421554952859879, + "rewards/margins": 0.03574854135513306, + "rewards/rejected": -0.12996408343315125, + "step": 5160 + }, + { + "epoch": 0.93, + "grad_norm": 1.4586751461029053, + "learning_rate": 5.047605182704906e-06, + "log_odds_chosen": 0.6196699738502502, + "log_odds_ratio": -0.5867195129394531, + "logits/chosen": -0.4862591624259949, + "logits/rejected": -0.47595709562301636, + "logps/chosen": -1.0899220705032349, + "logps/rejected": -1.5220587253570557, + "loss": 1.0038, + "nll_loss": 0.9451197385787964, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10899219661951065, + "rewards/margins": 0.043213676661252975, + "rewards/rejected": -0.15220588445663452, + "step": 5170 + }, + { + "epoch": 0.94, + "grad_norm": 1.0485233068466187, + "learning_rate": 5.041781918765468e-06, + "log_odds_chosen": 0.7615898847579956, + "log_odds_ratio": -0.5628899335861206, + "logits/chosen": -0.4379093647003174, + "logits/rejected": -0.4398167133331299, + "logps/chosen": -0.9211156964302063, + "logps/rejected": -1.4053175449371338, + "loss": 1.0219, + "nll_loss": 0.9656468629837036, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09211156517267227, + "rewards/margins": 0.04842020198702812, + "rewards/rejected": -0.1405317485332489, + "step": 5180 + }, + { + "epoch": 0.94, + "grad_norm": 1.5329257249832153, + "learning_rate": 5.03595865482603e-06, + "log_odds_chosen": 0.6447916030883789, + "log_odds_ratio": -0.5756027698516846, + "logits/chosen": -0.3828571140766144, + "logits/rejected": -0.4486420750617981, + "logps/chosen": -0.9009901881217957, + "logps/rejected": -1.3342673778533936, + "loss": 1.0371, + "nll_loss": 0.9795511960983276, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0900990217924118, + "rewards/margins": 0.04332772642374039, + "rewards/rejected": -0.1334267556667328, + "step": 5190 + }, + { + "epoch": 0.94, + "grad_norm": 1.021886944770813, + "learning_rate": 5.030135390886592e-06, + "log_odds_chosen": 1.0003609657287598, + "log_odds_ratio": -0.5113279819488525, + "logits/chosen": -0.38337117433547974, + "logits/rejected": -0.4203677177429199, + "logps/chosen": -0.812484860420227, + "logps/rejected": -1.4831180572509766, + "loss": 0.8823, + "nll_loss": 0.8311184048652649, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08124849200248718, + "rewards/margins": 0.06706332415342331, + "rewards/rejected": -0.1483118236064911, + "step": 5200 + }, + { + "epoch": 0.94, + "grad_norm": 1.1141126155853271, + "learning_rate": 5.024312126947154e-06, + "log_odds_chosen": 1.0493788719177246, + "log_odds_ratio": -0.5095570683479309, + "logits/chosen": -0.3903278708457947, + "logits/rejected": -0.45845937728881836, + "logps/chosen": -0.9482452273368835, + "logps/rejected": -1.6919740438461304, + "loss": 0.9605, + "nll_loss": 0.9095357656478882, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09482452273368835, + "rewards/margins": 0.07437288761138916, + "rewards/rejected": -0.16919739544391632, + "step": 5210 + }, + { + "epoch": 0.94, + "grad_norm": 1.2961907386779785, + "learning_rate": 5.018488863007715e-06, + "log_odds_chosen": 0.9118143320083618, + "log_odds_ratio": -0.4949149489402771, + "logits/chosen": -0.42776647210121155, + "logits/rejected": -0.49795588850975037, + "logps/chosen": -1.0686237812042236, + "logps/rejected": -1.7282140254974365, + "loss": 1.0082, + "nll_loss": 0.9587093591690063, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1068623811006546, + "rewards/margins": 0.06595902144908905, + "rewards/rejected": -0.17282140254974365, + "step": 5220 + }, + { + "epoch": 0.94, + "grad_norm": 1.5598081350326538, + "learning_rate": 5.012665599068277e-06, + "log_odds_chosen": 0.5592025518417358, + "log_odds_ratio": -0.583517849445343, + "logits/chosen": -0.4597102105617523, + "logits/rejected": -0.49349212646484375, + "logps/chosen": -0.9325317144393921, + "logps/rejected": -1.2962085008621216, + "loss": 1.0022, + "nll_loss": 0.9438241720199585, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09325318038463593, + "rewards/margins": 0.03636767342686653, + "rewards/rejected": -0.12962085008621216, + "step": 5230 + }, + { + "epoch": 0.95, + "grad_norm": 1.3146644830703735, + "learning_rate": 5.00684233512884e-06, + "log_odds_chosen": 0.7231449484825134, + "log_odds_ratio": -0.5562411546707153, + "logits/chosen": -0.47616177797317505, + "logits/rejected": -0.5005732178688049, + "logps/chosen": -0.9185335040092468, + "logps/rejected": -1.388819932937622, + "loss": 1.0186, + "nll_loss": 0.9629395604133606, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09185335785150528, + "rewards/margins": 0.047028638422489166, + "rewards/rejected": -0.13888199627399445, + "step": 5240 + }, + { + "epoch": 0.95, + "grad_norm": 1.8377481698989868, + "learning_rate": 5.001019071189401e-06, + "log_odds_chosen": 0.6257731318473816, + "log_odds_ratio": -0.5724449157714844, + "logits/chosen": -0.549246609210968, + "logits/rejected": -0.5497399568557739, + "logps/chosen": -0.963038444519043, + "logps/rejected": -1.3778165578842163, + "loss": 1.0246, + "nll_loss": 0.9673110842704773, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09630385041236877, + "rewards/margins": 0.04147782921791077, + "rewards/rejected": -0.13778167963027954, + "step": 5250 + }, + { + "epoch": 0.95, + "grad_norm": 1.3203247785568237, + "learning_rate": 4.9951958072499634e-06, + "log_odds_chosen": 0.8214631080627441, + "log_odds_ratio": -0.5461077094078064, + "logits/chosen": -0.4012463092803955, + "logits/rejected": -0.4272507131099701, + "logps/chosen": -0.8835545778274536, + "logps/rejected": -1.4272209405899048, + "loss": 0.9018, + "nll_loss": 0.8472299575805664, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08835545927286148, + "rewards/margins": 0.054366640746593475, + "rewards/rejected": -0.14272208511829376, + "step": 5260 + }, + { + "epoch": 0.95, + "grad_norm": 0.8510302305221558, + "learning_rate": 4.989372543310526e-06, + "log_odds_chosen": 0.9004091024398804, + "log_odds_ratio": -0.5193175673484802, + "logits/chosen": -0.40570640563964844, + "logits/rejected": -0.44902676343917847, + "logps/chosen": -0.9213584661483765, + "logps/rejected": -1.5180364847183228, + "loss": 0.9516, + "nll_loss": 0.8996561169624329, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09213585406541824, + "rewards/margins": 0.05966780334711075, + "rewards/rejected": -0.151803657412529, + "step": 5270 + }, + { + "epoch": 0.95, + "grad_norm": 4.7708940505981445, + "learning_rate": 4.983549279371087e-06, + "log_odds_chosen": 0.9598283767700195, + "log_odds_ratio": -0.5097156763076782, + "logits/chosen": -0.41570359468460083, + "logits/rejected": -0.435255765914917, + "logps/chosen": -0.8283422589302063, + "logps/rejected": -1.51011061668396, + "loss": 0.9208, + "nll_loss": 0.869789719581604, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08283422142267227, + "rewards/margins": 0.06817685067653656, + "rewards/rejected": -0.15101107954978943, + "step": 5280 + }, + { + "epoch": 0.96, + "grad_norm": 1.195023775100708, + "learning_rate": 4.9777260154316495e-06, + "log_odds_chosen": 0.7134519815444946, + "log_odds_ratio": -0.5415999293327332, + "logits/chosen": -0.4060707986354828, + "logits/rejected": -0.4542246460914612, + "logps/chosen": -0.9608157277107239, + "logps/rejected": -1.4222064018249512, + "loss": 0.9772, + "nll_loss": 0.9230211973190308, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09608156979084015, + "rewards/margins": 0.04613906145095825, + "rewards/rejected": -0.1422206461429596, + "step": 5290 + }, + { + "epoch": 0.96, + "grad_norm": 1.9315356016159058, + "learning_rate": 4.971902751492211e-06, + "log_odds_chosen": 0.6030072569847107, + "log_odds_ratio": -0.6301401257514954, + "logits/chosen": -0.42665156722068787, + "logits/rejected": -0.45132675766944885, + "logps/chosen": -0.9962190389633179, + "logps/rejected": -1.4173381328582764, + "loss": 1.0517, + "nll_loss": 0.9886919260025024, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09962191432714462, + "rewards/margins": 0.042111899703741074, + "rewards/rejected": -0.1417338252067566, + "step": 5300 + }, + { + "epoch": 0.96, + "grad_norm": 1.0606050491333008, + "learning_rate": 4.9660794875527724e-06, + "log_odds_chosen": 1.0494234561920166, + "log_odds_ratio": -0.47545939683914185, + "logits/chosen": -0.37000641226768494, + "logits/rejected": -0.4474189877510071, + "logps/chosen": -0.9080765843391418, + "logps/rejected": -1.5959746837615967, + "loss": 0.9406, + "nll_loss": 0.8930392265319824, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09080766141414642, + "rewards/margins": 0.06878980249166489, + "rewards/rejected": -0.1595974713563919, + "step": 5310 + }, + { + "epoch": 0.96, + "grad_norm": 1.306066870689392, + "learning_rate": 4.960256223613335e-06, + "log_odds_chosen": 0.724065899848938, + "log_odds_ratio": -0.5389540195465088, + "logits/chosen": -0.4582054018974304, + "logits/rejected": -0.4962041974067688, + "logps/chosen": -1.0584847927093506, + "logps/rejected": -1.5543081760406494, + "loss": 1.0256, + "nll_loss": 0.9717254638671875, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10584849119186401, + "rewards/margins": 0.04958232864737511, + "rewards/rejected": -0.15543082356452942, + "step": 5320 + }, + { + "epoch": 0.96, + "grad_norm": 1.0546212196350098, + "learning_rate": 4.954432959673897e-06, + "log_odds_chosen": 0.7997231483459473, + "log_odds_ratio": -0.5457186698913574, + "logits/chosen": -0.37917906045913696, + "logits/rejected": -0.4547523856163025, + "logps/chosen": -0.8591880798339844, + "logps/rejected": -1.397567629814148, + "loss": 0.9259, + "nll_loss": 0.8712942004203796, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08591881394386292, + "rewards/margins": 0.05383795499801636, + "rewards/rejected": -0.13975676894187927, + "step": 5330 + }, + { + "epoch": 0.96, + "grad_norm": 1.134579062461853, + "learning_rate": 4.9486096957344585e-06, + "log_odds_chosen": 0.8113735914230347, + "log_odds_ratio": -0.49761638045310974, + "logits/chosen": -0.4220595359802246, + "logits/rejected": -0.4521370530128479, + "logps/chosen": -0.9218913912773132, + "logps/rejected": -1.467212200164795, + "loss": 0.9593, + "nll_loss": 0.9095503091812134, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09218914806842804, + "rewards/margins": 0.05453207343816757, + "rewards/rejected": -0.14672121405601501, + "step": 5340 + }, + { + "epoch": 0.97, + "grad_norm": 1.6318745613098145, + "learning_rate": 4.942786431795021e-06, + "log_odds_chosen": 0.9257783889770508, + "log_odds_ratio": -0.48035699129104614, + "logits/chosen": -0.32946550846099854, + "logits/rejected": -0.39153873920440674, + "logps/chosen": -0.9110609889030457, + "logps/rejected": -1.5010361671447754, + "loss": 0.9243, + "nll_loss": 0.8762644529342651, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.0911061018705368, + "rewards/margins": 0.05899751931428909, + "rewards/rejected": -0.1501035988330841, + "step": 5350 + }, + { + "epoch": 0.97, + "grad_norm": 1.2999166250228882, + "learning_rate": 4.936963167855583e-06, + "log_odds_chosen": 0.6719887852668762, + "log_odds_ratio": -0.5760074853897095, + "logits/chosen": -0.5032616257667542, + "logits/rejected": -0.5166983604431152, + "logps/chosen": -0.9730969667434692, + "logps/rejected": -1.427927851676941, + "loss": 1.0772, + "nll_loss": 1.0196009874343872, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09730970114469528, + "rewards/margins": 0.045483093708753586, + "rewards/rejected": -0.14279279112815857, + "step": 5360 + }, + { + "epoch": 0.97, + "grad_norm": 1.9392198324203491, + "learning_rate": 4.9311399039161454e-06, + "log_odds_chosen": 0.6282123327255249, + "log_odds_ratio": -0.5469298362731934, + "logits/chosen": -0.45358020067214966, + "logits/rejected": -0.5022454857826233, + "logps/chosen": -1.0295902490615845, + "logps/rejected": -1.4616683721542358, + "loss": 1.0047, + "nll_loss": 0.9499963521957397, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10295902192592621, + "rewards/margins": 0.0432078093290329, + "rewards/rejected": -0.1461668312549591, + "step": 5370 + }, + { + "epoch": 0.97, + "grad_norm": 1.0072379112243652, + "learning_rate": 4.925316639976707e-06, + "log_odds_chosen": 0.7839166522026062, + "log_odds_ratio": -0.5868215560913086, + "logits/chosen": -0.5020217895507812, + "logits/rejected": -0.5194701552391052, + "logps/chosen": -0.9136531949043274, + "logps/rejected": -1.440227746963501, + "loss": 1.0671, + "nll_loss": 1.0084375143051147, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09136532247066498, + "rewards/margins": 0.05265744403004646, + "rewards/rejected": -0.14402277767658234, + "step": 5380 + }, + { + "epoch": 0.97, + "grad_norm": 0.7636964321136475, + "learning_rate": 4.919493376037268e-06, + "log_odds_chosen": 0.9430241584777832, + "log_odds_ratio": -0.48400864005088806, + "logits/chosen": -0.4314158856868744, + "logits/rejected": -0.47129616141319275, + "logps/chosen": -0.925650954246521, + "logps/rejected": -1.5474523305892944, + "loss": 0.9774, + "nll_loss": 0.9290070533752441, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09256509691476822, + "rewards/margins": 0.06218013912439346, + "rewards/rejected": -0.1547452211380005, + "step": 5390 + }, + { + "epoch": 0.98, + "grad_norm": 2.348417282104492, + "learning_rate": 4.913670112097831e-06, + "log_odds_chosen": 0.972173810005188, + "log_odds_ratio": -0.5155828595161438, + "logits/chosen": -0.34915369749069214, + "logits/rejected": -0.4212590157985687, + "logps/chosen": -0.8123193979263306, + "logps/rejected": -1.457058072090149, + "loss": 0.8675, + "nll_loss": 0.8159490823745728, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08123193681240082, + "rewards/margins": 0.06447387486696243, + "rewards/rejected": -0.14570581912994385, + "step": 5400 + }, + { + "epoch": 0.98, + "grad_norm": 0.9304720759391785, + "learning_rate": 4.907846848158392e-06, + "log_odds_chosen": 0.7508946657180786, + "log_odds_ratio": -0.5314705967903137, + "logits/chosen": -0.41652408242225647, + "logits/rejected": -0.47718554735183716, + "logps/chosen": -0.9125279188156128, + "logps/rejected": -1.4500106573104858, + "loss": 0.9048, + "nll_loss": 0.8516514897346497, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09125280380249023, + "rewards/margins": 0.05374826118350029, + "rewards/rejected": -0.14500106871128082, + "step": 5410 + }, + { + "epoch": 0.98, + "grad_norm": 1.3840512037277222, + "learning_rate": 4.9020235842189545e-06, + "log_odds_chosen": 0.8840494155883789, + "log_odds_ratio": -0.5257449150085449, + "logits/chosen": -0.3711920380592346, + "logits/rejected": -0.43855515122413635, + "logps/chosen": -0.9542511105537415, + "logps/rejected": -1.5532355308532715, + "loss": 0.9891, + "nll_loss": 0.9365428686141968, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0954250916838646, + "rewards/margins": 0.05989844724535942, + "rewards/rejected": -0.1553235501050949, + "step": 5420 + }, + { + "epoch": 0.98, + "grad_norm": 1.0180045366287231, + "learning_rate": 4.896200320279517e-06, + "log_odds_chosen": 0.6119788885116577, + "log_odds_ratio": -0.6346170902252197, + "logits/chosen": -0.42415839433670044, + "logits/rejected": -0.42158955335617065, + "logps/chosen": -0.9403412938117981, + "logps/rejected": -1.39726722240448, + "loss": 0.9807, + "nll_loss": 0.9172808527946472, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09403412789106369, + "rewards/margins": 0.04569259658455849, + "rewards/rejected": -0.13972671329975128, + "step": 5430 + }, + { + "epoch": 0.98, + "grad_norm": 2.6387805938720703, + "learning_rate": 4.890377056340078e-06, + "log_odds_chosen": 0.8593441843986511, + "log_odds_ratio": -0.5371569991111755, + "logits/chosen": -0.4286310076713562, + "logits/rejected": -0.44843417406082153, + "logps/chosen": -0.9194551706314087, + "logps/rejected": -1.4854052066802979, + "loss": 1.0245, + "nll_loss": 0.9708328247070312, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09194551408290863, + "rewards/margins": 0.056595008820295334, + "rewards/rejected": -0.14854054152965546, + "step": 5440 + }, + { + "epoch": 0.98, + "grad_norm": 1.9560596942901611, + "learning_rate": 4.8845537924006405e-06, + "log_odds_chosen": 0.9763432741165161, + "log_odds_ratio": -0.4967397153377533, + "logits/chosen": -0.3855515420436859, + "logits/rejected": -0.43095794320106506, + "logps/chosen": -0.9564552307128906, + "logps/rejected": -1.5772628784179688, + "loss": 0.8998, + "nll_loss": 0.8501434326171875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09564553201198578, + "rewards/margins": 0.06208076328039169, + "rewards/rejected": -0.15772630274295807, + "step": 5450 + }, + { + "epoch": 0.99, + "grad_norm": 1.3743293285369873, + "learning_rate": 4.878730528461203e-06, + "log_odds_chosen": 0.9867672920227051, + "log_odds_ratio": -0.4925385117530823, + "logits/chosen": -0.3673693537712097, + "logits/rejected": -0.43347668647766113, + "logps/chosen": -0.9285440444946289, + "logps/rejected": -1.5850690603256226, + "loss": 0.8967, + "nll_loss": 0.8474740982055664, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09285441040992737, + "rewards/margins": 0.0656525045633316, + "rewards/rejected": -0.15850690007209778, + "step": 5460 + }, + { + "epoch": 0.99, + "grad_norm": 1.4439855813980103, + "learning_rate": 4.872907264521764e-06, + "log_odds_chosen": 0.9440599679946899, + "log_odds_ratio": -0.525600254535675, + "logits/chosen": -0.4502839148044586, + "logits/rejected": -0.48919907212257385, + "logps/chosen": -0.8880621194839478, + "logps/rejected": -1.5326378345489502, + "loss": 1.003, + "nll_loss": 0.9504783749580383, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08880620449781418, + "rewards/margins": 0.06445760279893875, + "rewards/rejected": -0.15326380729675293, + "step": 5470 + }, + { + "epoch": 0.99, + "grad_norm": 1.1630498170852661, + "learning_rate": 4.867084000582326e-06, + "log_odds_chosen": 1.0208189487457275, + "log_odds_ratio": -0.4648086130619049, + "logits/chosen": -0.37898311018943787, + "logits/rejected": -0.3820800185203552, + "logps/chosen": -0.8005183339118958, + "logps/rejected": -1.456621766090393, + "loss": 0.8536, + "nll_loss": 0.8070961236953735, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08005184680223465, + "rewards/margins": 0.06561031937599182, + "rewards/rejected": -0.14566215872764587, + "step": 5480 + }, + { + "epoch": 0.99, + "grad_norm": 0.786601185798645, + "learning_rate": 4.861260736642888e-06, + "log_odds_chosen": 0.9441927075386047, + "log_odds_ratio": -0.507569432258606, + "logits/chosen": -0.4196494519710541, + "logits/rejected": -0.47500672936439514, + "logps/chosen": -0.8276809453964233, + "logps/rejected": -1.4831466674804688, + "loss": 0.9866, + "nll_loss": 0.9358325004577637, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08276810497045517, + "rewards/margins": 0.06554658710956573, + "rewards/rejected": -0.1483146846294403, + "step": 5490 + }, + { + "epoch": 0.99, + "grad_norm": 1.4178991317749023, + "learning_rate": 4.8554374727034495e-06, + "log_odds_chosen": 0.9024141430854797, + "log_odds_ratio": -0.5218056440353394, + "logits/chosen": -0.44788289070129395, + "logits/rejected": -0.47160372138023376, + "logps/chosen": -0.9077650904655457, + "logps/rejected": -1.4974048137664795, + "loss": 0.9775, + "nll_loss": 0.9252709150314331, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09077651798725128, + "rewards/margins": 0.05896396562457085, + "rewards/rejected": -0.14974047243595123, + "step": 5500 + }, + { + "epoch": 1.0, + "grad_norm": 1.9363542795181274, + "learning_rate": 4.849614208764012e-06, + "log_odds_chosen": 0.7485558390617371, + "log_odds_ratio": -0.6058672666549683, + "logits/chosen": -0.44489818811416626, + "logits/rejected": -0.43072813749313354, + "logps/chosen": -1.074777364730835, + "logps/rejected": -1.5849609375, + "loss": 0.9916, + "nll_loss": 0.9309671521186829, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10747772455215454, + "rewards/margins": 0.051018375903367996, + "rewards/rejected": -0.15849611163139343, + "step": 5510 + }, + { + "epoch": 1.0, + "grad_norm": 1.9006177186965942, + "learning_rate": 4.843790944824574e-06, + "log_odds_chosen": 0.9450963139533997, + "log_odds_ratio": -0.5010809302330017, + "logits/chosen": -0.458138644695282, + "logits/rejected": -0.48872989416122437, + "logps/chosen": -0.9480603337287903, + "logps/rejected": -1.5880528688430786, + "loss": 1.0535, + "nll_loss": 1.0034300088882446, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09480603039264679, + "rewards/margins": 0.0639992505311966, + "rewards/rejected": -0.15880528092384338, + "step": 5520 + }, + { + "epoch": 1.0, + "grad_norm": 1.5550851821899414, + "learning_rate": 4.837967680885136e-06, + "log_odds_chosen": 0.8837807774543762, + "log_odds_ratio": -0.5516039729118347, + "logits/chosen": -0.4584503173828125, + "logits/rejected": -0.5218029022216797, + "logps/chosen": -1.0116498470306396, + "logps/rejected": -1.6194604635238647, + "loss": 1.017, + "nll_loss": 0.9618609547615051, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10116498172283173, + "rewards/margins": 0.06078105419874191, + "rewards/rejected": -0.16194602847099304, + "step": 5530 + }, + { + "epoch": 1.0, + "eval_log_odds_chosen": 0.7954168319702148, + "eval_log_odds_ratio": -0.5471761226654053, + "eval_logits/chosen": -0.4407408535480499, + "eval_logits/rejected": -0.4670475721359253, + "eval_logps/chosen": -0.9404959082603455, + "eval_logps/rejected": -1.4815776348114014, + "eval_loss": 0.9784727096557617, + "eval_nll_loss": 0.9237551093101501, + "eval_rewards/accuracies": 0.6675246953964233, + "eval_rewards/chosen": -0.09404958784580231, + "eval_rewards/margins": 0.05410816892981529, + "eval_rewards/rejected": -0.1481577455997467, + "eval_runtime": 2286.2087, + "eval_samples_per_second": 1.02, + "eval_steps_per_second": 1.02, + "step": 5536 + }, + { + "epoch": 1.0, + "grad_norm": 1.2207567691802979, + "learning_rate": 4.832144416945698e-06, + "log_odds_chosen": 0.892682671546936, + "log_odds_ratio": -0.5117454528808594, + "logits/chosen": -0.4071148931980133, + "logits/rejected": -0.47196096181869507, + "logps/chosen": -0.9103930592536926, + "logps/rejected": -1.4970780611038208, + "loss": 0.9617, + "nll_loss": 0.9104766845703125, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09103929996490479, + "rewards/margins": 0.05866849422454834, + "rewards/rejected": -0.14970777928829193, + "step": 5540 + }, + { + "epoch": 1.0, + "grad_norm": 1.3107197284698486, + "learning_rate": 4.82632115300626e-06, + "log_odds_chosen": 0.9778968095779419, + "log_odds_ratio": -0.4795476794242859, + "logits/chosen": -0.45730486512184143, + "logits/rejected": -0.5228903889656067, + "logps/chosen": -0.8578106164932251, + "logps/rejected": -1.526828408241272, + "loss": 0.938, + "nll_loss": 0.8900574445724487, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08578106015920639, + "rewards/margins": 0.0669017806649208, + "rewards/rejected": -0.1526828408241272, + "step": 5550 + }, + { + "epoch": 1.0, + "grad_norm": 1.3291869163513184, + "learning_rate": 4.820497889066822e-06, + "log_odds_chosen": 0.7961874604225159, + "log_odds_ratio": -0.5262941718101501, + "logits/chosen": -0.4247972071170807, + "logits/rejected": -0.4710194170475006, + "logps/chosen": -0.9164491891860962, + "logps/rejected": -1.4848397970199585, + "loss": 1.0881, + "nll_loss": 1.0354729890823364, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09164492785930634, + "rewards/margins": 0.05683906003832817, + "rewards/rejected": -0.1484839916229248, + "step": 5560 + }, + { + "epoch": 1.01, + "grad_norm": 1.3004399538040161, + "learning_rate": 4.814674625127383e-06, + "log_odds_chosen": 0.888166069984436, + "log_odds_ratio": -0.4967440962791443, + "logits/chosen": -0.452568918466568, + "logits/rejected": -0.4748914837837219, + "logps/chosen": -0.8483587503433228, + "logps/rejected": -1.446743369102478, + "loss": 0.8882, + "nll_loss": 0.8385192155838013, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08483588695526123, + "rewards/margins": 0.05983845517039299, + "rewards/rejected": -0.14467434585094452, + "step": 5570 + }, + { + "epoch": 1.01, + "grad_norm": 1.4460393190383911, + "learning_rate": 4.8088513611879455e-06, + "log_odds_chosen": 0.7630864977836609, + "log_odds_ratio": -0.5620290040969849, + "logits/chosen": -0.41607731580734253, + "logits/rejected": -0.44624605774879456, + "logps/chosen": -0.9713469743728638, + "logps/rejected": -1.4890177249908447, + "loss": 0.9427, + "nll_loss": 0.8865121603012085, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09713469445705414, + "rewards/margins": 0.05176708847284317, + "rewards/rejected": -0.1489017903804779, + "step": 5580 + }, + { + "epoch": 1.01, + "grad_norm": 1.551102638244629, + "learning_rate": 4.803028097248508e-06, + "log_odds_chosen": 0.5414020419120789, + "log_odds_ratio": -0.5626022219657898, + "logits/chosen": -0.4925295412540436, + "logits/rejected": -0.48884886503219604, + "logps/chosen": -0.9413201212882996, + "logps/rejected": -1.270108938217163, + "loss": 0.9689, + "nll_loss": 0.9126895666122437, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09413202106952667, + "rewards/margins": 0.03287887945771217, + "rewards/rejected": -0.12701091170310974, + "step": 5590 + }, + { + "epoch": 1.01, + "grad_norm": 1.6918314695358276, + "learning_rate": 4.797204833309069e-06, + "log_odds_chosen": 0.6707077026367188, + "log_odds_ratio": -0.5309171080589294, + "logits/chosen": -0.47139209508895874, + "logits/rejected": -0.4601783752441406, + "logps/chosen": -0.9363657832145691, + "logps/rejected": -1.3649402856826782, + "loss": 0.9239, + "nll_loss": 0.8708307147026062, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09363657981157303, + "rewards/margins": 0.042857442051172256, + "rewards/rejected": -0.1364940106868744, + "step": 5600 + }, + { + "epoch": 1.01, + "grad_norm": 1.3999618291854858, + "learning_rate": 4.7913815693696316e-06, + "log_odds_chosen": 0.8025909662246704, + "log_odds_ratio": -0.533409595489502, + "logits/chosen": -0.4704816937446594, + "logits/rejected": -0.5034157633781433, + "logps/chosen": -0.9593275785446167, + "logps/rejected": -1.4829776287078857, + "loss": 1.0076, + "nll_loss": 0.9542564153671265, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09593275934457779, + "rewards/margins": 0.052365005016326904, + "rewards/rejected": -0.1482977569103241, + "step": 5610 + }, + { + "epoch": 1.02, + "grad_norm": 1.3082317113876343, + "learning_rate": 4.785558305430194e-06, + "log_odds_chosen": 0.8759142160415649, + "log_odds_ratio": -0.4906393587589264, + "logits/chosen": -0.4527658522129059, + "logits/rejected": -0.48744139075279236, + "logps/chosen": -0.9517688751220703, + "logps/rejected": -1.5468437671661377, + "loss": 0.9451, + "nll_loss": 0.8960543870925903, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.09517689049243927, + "rewards/margins": 0.05950748175382614, + "rewards/rejected": -0.154684379696846, + "step": 5620 + }, + { + "epoch": 1.02, + "grad_norm": 0.9139883518218994, + "learning_rate": 4.779735041490755e-06, + "log_odds_chosen": 0.8130331039428711, + "log_odds_ratio": -0.531697690486908, + "logits/chosen": -0.4422592222690582, + "logits/rejected": -0.48472967743873596, + "logps/chosen": -0.9625687599182129, + "logps/rejected": -1.5124341249465942, + "loss": 0.9998, + "nll_loss": 0.9466264843940735, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09625687450170517, + "rewards/margins": 0.054986536502838135, + "rewards/rejected": -0.1512434184551239, + "step": 5630 + }, + { + "epoch": 1.02, + "grad_norm": 1.2138420343399048, + "learning_rate": 4.773911777551318e-06, + "log_odds_chosen": 0.6880245208740234, + "log_odds_ratio": -0.5870713591575623, + "logits/chosen": -0.45311981439590454, + "logits/rejected": -0.4890304505825043, + "logps/chosen": -0.9035439491271973, + "logps/rejected": -1.3936666250228882, + "loss": 0.977, + "nll_loss": 0.9182752370834351, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09035440534353256, + "rewards/margins": 0.04901226609945297, + "rewards/rejected": -0.13936665654182434, + "step": 5640 + }, + { + "epoch": 1.02, + "grad_norm": 3.006988286972046, + "learning_rate": 4.76808851361188e-06, + "log_odds_chosen": 0.6752545237541199, + "log_odds_ratio": -0.6011164784431458, + "logits/chosen": -0.5098429918289185, + "logits/rejected": -0.5509897470474243, + "logps/chosen": -1.0110231637954712, + "logps/rejected": -1.5172195434570312, + "loss": 1.0542, + "nll_loss": 0.9940736889839172, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1011023297905922, + "rewards/margins": 0.05061963200569153, + "rewards/rejected": -0.15172193944454193, + "step": 5650 + }, + { + "epoch": 1.02, + "grad_norm": 1.0073820352554321, + "learning_rate": 4.7622652496724406e-06, + "log_odds_chosen": 0.6800388693809509, + "log_odds_ratio": -0.5709148645401001, + "logits/chosen": -0.4339476227760315, + "logits/rejected": -0.5066033601760864, + "logps/chosen": -0.8620915412902832, + "logps/rejected": -1.3292564153671265, + "loss": 0.9278, + "nll_loss": 0.870742917060852, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08620915561914444, + "rewards/margins": 0.04671648517251015, + "rewards/rejected": -0.13292565941810608, + "step": 5660 + }, + { + "epoch": 1.02, + "grad_norm": 1.3023157119750977, + "learning_rate": 4.756441985733003e-06, + "log_odds_chosen": 0.7827984690666199, + "log_odds_ratio": -0.49791890382766724, + "logits/chosen": -0.46909022331237793, + "logits/rejected": -0.499662309885025, + "logps/chosen": -0.8504747152328491, + "logps/rejected": -1.3593947887420654, + "loss": 0.9253, + "nll_loss": 0.8755376935005188, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08504746854305267, + "rewards/margins": 0.05089200660586357, + "rewards/rejected": -0.13593947887420654, + "step": 5670 + }, + { + "epoch": 1.03, + "grad_norm": 1.9402965307235718, + "learning_rate": 4.750618721793565e-06, + "log_odds_chosen": 0.764835774898529, + "log_odds_ratio": -0.5476073026657104, + "logits/chosen": -0.43662959337234497, + "logits/rejected": -0.48592323064804077, + "logps/chosen": -0.8871974945068359, + "logps/rejected": -1.366875171661377, + "loss": 0.9221, + "nll_loss": 0.8673022985458374, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08871974796056747, + "rewards/margins": 0.04796776920557022, + "rewards/rejected": -0.1366875320672989, + "step": 5680 + }, + { + "epoch": 1.03, + "grad_norm": 0.885296106338501, + "learning_rate": 4.744795457854127e-06, + "log_odds_chosen": 0.7651258707046509, + "log_odds_ratio": -0.5727325677871704, + "logits/chosen": -0.49575671553611755, + "logits/rejected": -0.534355103969574, + "logps/chosen": -0.8569524884223938, + "logps/rejected": -1.3553133010864258, + "loss": 1.0281, + "nll_loss": 0.9708574414253235, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08569525182247162, + "rewards/margins": 0.04983608052134514, + "rewards/rejected": -0.13553133606910706, + "step": 5690 + }, + { + "epoch": 1.03, + "grad_norm": 3.4250690937042236, + "learning_rate": 4.738972193914689e-06, + "log_odds_chosen": 0.9850413203239441, + "log_odds_ratio": -0.5156182050704956, + "logits/chosen": -0.44237810373306274, + "logits/rejected": -0.48232460021972656, + "logps/chosen": -0.9154784083366394, + "logps/rejected": -1.5720593929290771, + "loss": 0.9803, + "nll_loss": 0.9286953210830688, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09154783189296722, + "rewards/margins": 0.06565810739994049, + "rewards/rejected": -0.15720593929290771, + "step": 5700 + }, + { + "epoch": 1.03, + "grad_norm": 1.2578308582305908, + "learning_rate": 4.733148929975251e-06, + "log_odds_chosen": 0.5149042010307312, + "log_odds_ratio": -0.6784185171127319, + "logits/chosen": -0.5314583778381348, + "logits/rejected": -0.5327475666999817, + "logps/chosen": -1.0456842184066772, + "logps/rejected": -1.4066373109817505, + "loss": 1.0308, + "nll_loss": 0.9629226922988892, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.10456842184066772, + "rewards/margins": 0.03609530255198479, + "rewards/rejected": -0.14066371321678162, + "step": 5710 + }, + { + "epoch": 1.03, + "grad_norm": 1.155043363571167, + "learning_rate": 4.727325666035813e-06, + "log_odds_chosen": 0.8577598333358765, + "log_odds_ratio": -0.5551376342773438, + "logits/chosen": -0.4258693754673004, + "logits/rejected": -0.4448915421962738, + "logps/chosen": -0.9433683156967163, + "logps/rejected": -1.5140371322631836, + "loss": 0.9678, + "nll_loss": 0.9122626185417175, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0943368449807167, + "rewards/margins": 0.05706688016653061, + "rewards/rejected": -0.15140371024608612, + "step": 5720 + }, + { + "epoch": 1.04, + "grad_norm": 0.8873026967048645, + "learning_rate": 4.721502402096375e-06, + "log_odds_chosen": 1.106774091720581, + "log_odds_ratio": -0.495880663394928, + "logits/chosen": -0.4094298779964447, + "logits/rejected": -0.3826850354671478, + "logps/chosen": -0.7966852784156799, + "logps/rejected": -1.5608104467391968, + "loss": 0.9192, + "nll_loss": 0.8695797920227051, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.07966851443052292, + "rewards/margins": 0.07641252875328064, + "rewards/rejected": -0.15608103573322296, + "step": 5730 + }, + { + "epoch": 1.04, + "grad_norm": 0.6950234770774841, + "learning_rate": 4.715679138156937e-06, + "log_odds_chosen": 1.0668140649795532, + "log_odds_ratio": -0.48193269968032837, + "logits/chosen": -0.41230225563049316, + "logits/rejected": -0.4267541766166687, + "logps/chosen": -0.8979755640029907, + "logps/rejected": -1.6309888362884521, + "loss": 1.0101, + "nll_loss": 0.9619138836860657, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08979754149913788, + "rewards/margins": 0.07330133765935898, + "rewards/rejected": -0.16309887170791626, + "step": 5740 + }, + { + "epoch": 1.04, + "grad_norm": 1.4105861186981201, + "learning_rate": 4.709855874217498e-06, + "log_odds_chosen": 1.5627528429031372, + "log_odds_ratio": -0.4116443693637848, + "logits/chosen": -0.3588656783103943, + "logits/rejected": -0.4155047535896301, + "logps/chosen": -0.7683436870574951, + "logps/rejected": -1.859259843826294, + "loss": 0.944, + "nll_loss": 0.9028097987174988, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07683436572551727, + "rewards/margins": 0.1090916246175766, + "rewards/rejected": -0.18592599034309387, + "step": 5750 + }, + { + "epoch": 1.04, + "grad_norm": 1.2038817405700684, + "learning_rate": 4.70403261027806e-06, + "log_odds_chosen": 0.8248249888420105, + "log_odds_ratio": -0.5202821493148804, + "logits/chosen": -0.4802790582180023, + "logits/rejected": -0.4898431897163391, + "logps/chosen": -0.9521000981330872, + "logps/rejected": -1.5202988386154175, + "loss": 0.9786, + "nll_loss": 0.9266166687011719, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.095210000872612, + "rewards/margins": 0.05681987479329109, + "rewards/rejected": -0.152029886841774, + "step": 5760 + }, + { + "epoch": 1.04, + "grad_norm": 0.9356115460395813, + "learning_rate": 4.698209346338623e-06, + "log_odds_chosen": 0.9297721982002258, + "log_odds_ratio": -0.5297530889511108, + "logits/chosen": -0.38405826687812805, + "logits/rejected": -0.4060952067375183, + "logps/chosen": -0.8437892198562622, + "logps/rejected": -1.4541056156158447, + "loss": 0.897, + "nll_loss": 0.8439979553222656, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0843789204955101, + "rewards/margins": 0.06103163957595825, + "rewards/rejected": -0.14541055262088776, + "step": 5770 + }, + { + "epoch": 1.04, + "grad_norm": 1.438058614730835, + "learning_rate": 4.692386082399184e-06, + "log_odds_chosen": 0.8766366243362427, + "log_odds_ratio": -0.5100477933883667, + "logits/chosen": -0.4361240863800049, + "logits/rejected": -0.463198184967041, + "logps/chosen": -0.9211476445198059, + "logps/rejected": -1.4904837608337402, + "loss": 1.0009, + "nll_loss": 0.9499245882034302, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09211476147174835, + "rewards/margins": 0.056933604180812836, + "rewards/rejected": -0.1490483582019806, + "step": 5780 + }, + { + "epoch": 1.05, + "grad_norm": 1.1899020671844482, + "learning_rate": 4.686562818459746e-06, + "log_odds_chosen": 0.781607449054718, + "log_odds_ratio": -0.5476793050765991, + "logits/chosen": -0.46544790267944336, + "logits/rejected": -0.4789610505104065, + "logps/chosen": -0.9231128692626953, + "logps/rejected": -1.429552674293518, + "loss": 1.0049, + "nll_loss": 0.9501617550849915, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09231128543615341, + "rewards/margins": 0.05064399167895317, + "rewards/rejected": -0.14295528829097748, + "step": 5790 + }, + { + "epoch": 1.05, + "grad_norm": 2.000607967376709, + "learning_rate": 4.680739554520309e-06, + "log_odds_chosen": 0.8846811056137085, + "log_odds_ratio": -0.48624786734580994, + "logits/chosen": -0.4445672631263733, + "logits/rejected": -0.48973551392555237, + "logps/chosen": -0.8961418271064758, + "logps/rejected": -1.4623029232025146, + "loss": 0.9977, + "nll_loss": 0.9490774869918823, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08961419761180878, + "rewards/margins": 0.05661610886454582, + "rewards/rejected": -0.1462303102016449, + "step": 5800 + }, + { + "epoch": 1.05, + "grad_norm": 1.3409258127212524, + "learning_rate": 4.674916290580871e-06, + "log_odds_chosen": 0.7539998292922974, + "log_odds_ratio": -0.5352480411529541, + "logits/chosen": -0.4524649977684021, + "logits/rejected": -0.47686344385147095, + "logps/chosen": -0.9493099451065063, + "logps/rejected": -1.4658688306808472, + "loss": 0.9753, + "nll_loss": 0.9217742681503296, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.0949309915304184, + "rewards/margins": 0.05165589973330498, + "rewards/rejected": -0.14658688008785248, + "step": 5810 + }, + { + "epoch": 1.05, + "grad_norm": 0.7173593640327454, + "learning_rate": 4.6690930266414324e-06, + "log_odds_chosen": 0.5790658593177795, + "log_odds_ratio": -0.6249306797981262, + "logits/chosen": -0.49599066376686096, + "logits/rejected": -0.4908533990383148, + "logps/chosen": -1.0195724964141846, + "logps/rejected": -1.3964897394180298, + "loss": 1.051, + "nll_loss": 0.9885488748550415, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10195724666118622, + "rewards/margins": 0.03769173100590706, + "rewards/rejected": -0.13964898884296417, + "step": 5820 + }, + { + "epoch": 1.05, + "grad_norm": 1.9051140546798706, + "learning_rate": 4.663269762701995e-06, + "log_odds_chosen": 0.8640015721321106, + "log_odds_ratio": -0.514534592628479, + "logits/chosen": -0.4196191430091858, + "logits/rejected": -0.44794726371765137, + "logps/chosen": -0.9575347900390625, + "logps/rejected": -1.5376710891723633, + "loss": 0.9067, + "nll_loss": 0.8552610278129578, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09575347602367401, + "rewards/margins": 0.058013636618852615, + "rewards/rejected": -0.15376712381839752, + "step": 5830 + }, + { + "epoch": 1.05, + "grad_norm": 1.4965343475341797, + "learning_rate": 4.657446498762556e-06, + "log_odds_chosen": 0.6723813414573669, + "log_odds_ratio": -0.5684707760810852, + "logits/chosen": -0.42264777421951294, + "logits/rejected": -0.4406364858150482, + "logps/chosen": -0.8763896822929382, + "logps/rejected": -1.284791350364685, + "loss": 0.9805, + "nll_loss": 0.9236391186714172, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0876389741897583, + "rewards/margins": 0.04084015637636185, + "rewards/rejected": -0.12847913801670074, + "step": 5840 + }, + { + "epoch": 1.06, + "grad_norm": 0.7926245927810669, + "learning_rate": 4.651623234823118e-06, + "log_odds_chosen": 1.1886515617370605, + "log_odds_ratio": -0.4757661819458008, + "logits/chosen": -0.4212180972099304, + "logits/rejected": -0.43057411909103394, + "logps/chosen": -0.8711498975753784, + "logps/rejected": -1.6483008861541748, + "loss": 0.9531, + "nll_loss": 0.9055501222610474, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08711498230695724, + "rewards/margins": 0.07771511375904083, + "rewards/rejected": -0.16483010351657867, + "step": 5850 + }, + { + "epoch": 1.06, + "grad_norm": 1.2240430116653442, + "learning_rate": 4.64579997088368e-06, + "log_odds_chosen": 0.7755094766616821, + "log_odds_ratio": -0.5845614671707153, + "logits/chosen": -0.43493086099624634, + "logits/rejected": -0.45848387479782104, + "logps/chosen": -0.9059002995491028, + "logps/rejected": -1.4610520601272583, + "loss": 0.9695, + "nll_loss": 0.9110584259033203, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09059003740549088, + "rewards/margins": 0.055515170097351074, + "rewards/rejected": -0.14610520005226135, + "step": 5860 + }, + { + "epoch": 1.06, + "grad_norm": 1.060349941253662, + "learning_rate": 4.639976706944242e-06, + "log_odds_chosen": 0.7335856556892395, + "log_odds_ratio": -0.5503862500190735, + "logits/chosen": -0.3889656364917755, + "logits/rejected": -0.419551283121109, + "logps/chosen": -1.0413269996643066, + "logps/rejected": -1.5086840391159058, + "loss": 1.1028, + "nll_loss": 1.0477640628814697, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10413269698619843, + "rewards/margins": 0.04673569276928902, + "rewards/rejected": -0.15086840093135834, + "step": 5870 + }, + { + "epoch": 1.06, + "grad_norm": 1.2340961694717407, + "learning_rate": 4.634153443004804e-06, + "log_odds_chosen": 0.7230426073074341, + "log_odds_ratio": -0.5490708351135254, + "logits/chosen": -0.4647773206233978, + "logits/rejected": -0.5086523294448853, + "logps/chosen": -0.9753144383430481, + "logps/rejected": -1.4537895917892456, + "loss": 1.0191, + "nll_loss": 0.9642260670661926, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09753144532442093, + "rewards/margins": 0.04784751683473587, + "rewards/rejected": -0.1453789621591568, + "step": 5880 + }, + { + "epoch": 1.06, + "grad_norm": 0.7249181270599365, + "learning_rate": 4.628330179065366e-06, + "log_odds_chosen": 0.7544859647750854, + "log_odds_ratio": -0.5356615781784058, + "logits/chosen": -0.43114471435546875, + "logits/rejected": -0.4747946858406067, + "logps/chosen": -0.8531882166862488, + "logps/rejected": -1.3764461278915405, + "loss": 0.9115, + "nll_loss": 0.8579704165458679, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08531881868839264, + "rewards/margins": 0.05232580378651619, + "rewards/rejected": -0.13764461874961853, + "step": 5890 + }, + { + "epoch": 1.07, + "grad_norm": 0.8657764196395874, + "learning_rate": 4.622506915125928e-06, + "log_odds_chosen": 0.9151697158813477, + "log_odds_ratio": -0.5579023361206055, + "logits/chosen": -0.446617990732193, + "logits/rejected": -0.4951063096523285, + "logps/chosen": -0.9576125144958496, + "logps/rejected": -1.5999959707260132, + "loss": 0.9359, + "nll_loss": 0.8801458477973938, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0957612469792366, + "rewards/margins": 0.06423834711313248, + "rewards/rejected": -0.15999959409236908, + "step": 5900 + }, + { + "epoch": 1.07, + "grad_norm": 1.1720466613769531, + "learning_rate": 4.61668365118649e-06, + "log_odds_chosen": 0.6918897032737732, + "log_odds_ratio": -0.5745338201522827, + "logits/chosen": -0.425567090511322, + "logits/rejected": -0.445716917514801, + "logps/chosen": -0.9016758799552917, + "logps/rejected": -1.3733348846435547, + "loss": 0.9548, + "nll_loss": 0.8973618745803833, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09016759693622589, + "rewards/margins": 0.04716590791940689, + "rewards/rejected": -0.13733351230621338, + "step": 5910 + }, + { + "epoch": 1.07, + "grad_norm": 2.0577094554901123, + "learning_rate": 4.610860387247052e-06, + "log_odds_chosen": 0.8531296849250793, + "log_odds_ratio": -0.5350161194801331, + "logits/chosen": -0.4382871985435486, + "logits/rejected": -0.4813820421695709, + "logps/chosen": -0.8610553741455078, + "logps/rejected": -1.437717080116272, + "loss": 0.9136, + "nll_loss": 0.8601323962211609, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08610554039478302, + "rewards/margins": 0.05766616389155388, + "rewards/rejected": -0.1437717080116272, + "step": 5920 + }, + { + "epoch": 1.07, + "grad_norm": 1.0709292888641357, + "learning_rate": 4.605037123307614e-06, + "log_odds_chosen": 0.6848903894424438, + "log_odds_ratio": -0.5732904672622681, + "logits/chosen": -0.4110352098941803, + "logits/rejected": -0.47563010454177856, + "logps/chosen": -0.9090906977653503, + "logps/rejected": -1.3542063236236572, + "loss": 0.9591, + "nll_loss": 0.9017614126205444, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09090907871723175, + "rewards/margins": 0.044511578977108, + "rewards/rejected": -0.13542065024375916, + "step": 5930 + }, + { + "epoch": 1.07, + "grad_norm": 1.3522981405258179, + "learning_rate": 4.599213859368175e-06, + "log_odds_chosen": 0.7799988389015198, + "log_odds_ratio": -0.5210399627685547, + "logits/chosen": -0.4593687951564789, + "logits/rejected": -0.4631672501564026, + "logps/chosen": -0.8581777811050415, + "logps/rejected": -1.3306677341461182, + "loss": 0.9031, + "nll_loss": 0.8510422706604004, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08581778407096863, + "rewards/margins": 0.047249000519514084, + "rewards/rejected": -0.133066788315773, + "step": 5940 + }, + { + "epoch": 1.07, + "grad_norm": 0.6679449677467346, + "learning_rate": 4.593390595428737e-06, + "log_odds_chosen": 0.8621038198471069, + "log_odds_ratio": -0.518430233001709, + "logits/chosen": -0.44318872690200806, + "logits/rejected": -0.45970430970191956, + "logps/chosen": -0.8508337140083313, + "logps/rejected": -1.4162659645080566, + "loss": 0.886, + "nll_loss": 0.8341652154922485, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08508336544036865, + "rewards/margins": 0.056543223559856415, + "rewards/rejected": -0.14162659645080566, + "step": 5950 + }, + { + "epoch": 1.08, + "grad_norm": 0.9131520390510559, + "learning_rate": 4.5875673314893e-06, + "log_odds_chosen": 0.5632303953170776, + "log_odds_ratio": -0.5651002526283264, + "logits/chosen": -0.5047626495361328, + "logits/rejected": -0.5000123381614685, + "logps/chosen": -0.9939088821411133, + "logps/rejected": -1.3372070789337158, + "loss": 0.9807, + "nll_loss": 0.9242087602615356, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09939088672399521, + "rewards/margins": 0.034329816699028015, + "rewards/rejected": -0.13372069597244263, + "step": 5960 + }, + { + "epoch": 1.08, + "grad_norm": 0.9747700691223145, + "learning_rate": 4.581744067549861e-06, + "log_odds_chosen": 0.7390889525413513, + "log_odds_ratio": -0.5641605257987976, + "logits/chosen": -0.43641337752342224, + "logits/rejected": -0.4768204689025879, + "logps/chosen": -0.9668332934379578, + "logps/rejected": -1.476610541343689, + "loss": 0.9901, + "nll_loss": 0.9336675405502319, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0966833308339119, + "rewards/margins": 0.05097772926092148, + "rewards/rejected": -0.14766106009483337, + "step": 5970 + }, + { + "epoch": 1.08, + "grad_norm": 1.3658349514007568, + "learning_rate": 4.5759208036104235e-06, + "log_odds_chosen": 0.803102970123291, + "log_odds_ratio": -0.5294958353042603, + "logits/chosen": -0.44828686118125916, + "logits/rejected": -0.4648512899875641, + "logps/chosen": -0.9527362585067749, + "logps/rejected": -1.4989818334579468, + "loss": 0.9945, + "nll_loss": 0.9415693283081055, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09527363628149033, + "rewards/margins": 0.05462455749511719, + "rewards/rejected": -0.14989818632602692, + "step": 5980 + }, + { + "epoch": 1.08, + "grad_norm": 1.7984719276428223, + "learning_rate": 4.570097539670986e-06, + "log_odds_chosen": 0.5657428503036499, + "log_odds_ratio": -0.6062055826187134, + "logits/chosen": -0.44131985306739807, + "logits/rejected": -0.45149001479148865, + "logps/chosen": -1.01093327999115, + "logps/rejected": -1.351680040359497, + "loss": 0.9973, + "nll_loss": 0.9366718530654907, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10109331458806992, + "rewards/margins": 0.03407468646764755, + "rewards/rejected": -0.13516801595687866, + "step": 5990 + }, + { + "epoch": 1.08, + "grad_norm": 2.813422441482544, + "learning_rate": 4.564274275731547e-06, + "log_odds_chosen": 0.7859033346176147, + "log_odds_ratio": -0.5689486265182495, + "logits/chosen": -0.37758010625839233, + "logits/rejected": -0.4466930031776428, + "logps/chosen": -0.8639854192733765, + "logps/rejected": -1.4098567962646484, + "loss": 0.8868, + "nll_loss": 0.8299161791801453, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08639854192733765, + "rewards/margins": 0.05458713322877884, + "rewards/rejected": -0.14098568260669708, + "step": 6000 + }, + { + "epoch": 1.09, + "grad_norm": 0.9352004528045654, + "learning_rate": 4.558451011792109e-06, + "log_odds_chosen": 0.7700805068016052, + "log_odds_ratio": -0.5558144450187683, + "logits/chosen": -0.3771985173225403, + "logits/rejected": -0.44899502396583557, + "logps/chosen": -0.9024406671524048, + "logps/rejected": -1.4145829677581787, + "loss": 0.9885, + "nll_loss": 0.9329215884208679, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09024406969547272, + "rewards/margins": 0.05121422931551933, + "rewards/rejected": -0.14145830273628235, + "step": 6010 + }, + { + "epoch": 1.09, + "grad_norm": 1.1320077180862427, + "learning_rate": 4.552627747852671e-06, + "log_odds_chosen": 0.6384034752845764, + "log_odds_ratio": -0.5681486129760742, + "logits/chosen": -0.4968019127845764, + "logits/rejected": -0.5048823356628418, + "logps/chosen": -0.8819563984870911, + "logps/rejected": -1.2801580429077148, + "loss": 0.9733, + "nll_loss": 0.9164499044418335, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08819563686847687, + "rewards/margins": 0.039820168167352676, + "rewards/rejected": -0.12801580131053925, + "step": 6020 + }, + { + "epoch": 1.09, + "grad_norm": 1.4417674541473389, + "learning_rate": 4.546804483913233e-06, + "log_odds_chosen": 0.845604419708252, + "log_odds_ratio": -0.5084959268569946, + "logits/chosen": -0.4493161141872406, + "logits/rejected": -0.4819253981113434, + "logps/chosen": -0.8610888719558716, + "logps/rejected": -1.4244296550750732, + "loss": 0.9825, + "nll_loss": 0.9316762685775757, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08610888570547104, + "rewards/margins": 0.05633409693837166, + "rewards/rejected": -0.142442986369133, + "step": 6030 + }, + { + "epoch": 1.09, + "grad_norm": 0.7863137722015381, + "learning_rate": 4.540981219973795e-06, + "log_odds_chosen": 0.6155441403388977, + "log_odds_ratio": -0.6100500226020813, + "logits/chosen": -0.4968458116054535, + "logits/rejected": -0.4958006739616394, + "logps/chosen": -0.9442992210388184, + "logps/rejected": -1.3343608379364014, + "loss": 0.9896, + "nll_loss": 0.9286164045333862, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09442992508411407, + "rewards/margins": 0.039006151258945465, + "rewards/rejected": -0.13343606889247894, + "step": 6040 + }, + { + "epoch": 1.09, + "grad_norm": 1.2931441068649292, + "learning_rate": 4.535157956034357e-06, + "log_odds_chosen": 0.8838874697685242, + "log_odds_ratio": -0.5274507999420166, + "logits/chosen": -0.4653933644294739, + "logits/rejected": -0.5074115991592407, + "logps/chosen": -0.9719902276992798, + "logps/rejected": -1.5905590057373047, + "loss": 0.9916, + "nll_loss": 0.9388928413391113, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09719902276992798, + "rewards/margins": 0.06185689568519592, + "rewards/rejected": -0.1590559035539627, + "step": 6050 + }, + { + "epoch": 1.09, + "grad_norm": 1.2712411880493164, + "learning_rate": 4.529334692094919e-06, + "log_odds_chosen": 0.7494404315948486, + "log_odds_ratio": -0.5353826284408569, + "logits/chosen": -0.3938870429992676, + "logits/rejected": -0.45134368538856506, + "logps/chosen": -0.9210939407348633, + "logps/rejected": -1.441649317741394, + "loss": 0.9476, + "nll_loss": 0.8940416574478149, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09210939705371857, + "rewards/margins": 0.05205554515123367, + "rewards/rejected": -0.14416493475437164, + "step": 6060 + }, + { + "epoch": 1.1, + "grad_norm": 1.6904181241989136, + "learning_rate": 4.523511428155481e-06, + "log_odds_chosen": 1.1151350736618042, + "log_odds_ratio": -0.4561994671821594, + "logits/chosen": -0.43357786536216736, + "logits/rejected": -0.4941074848175049, + "logps/chosen": -0.9114344716072083, + "logps/rejected": -1.6274280548095703, + "loss": 0.9271, + "nll_loss": 0.881447434425354, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09114344418048859, + "rewards/margins": 0.07159935683012009, + "rewards/rejected": -0.16274279356002808, + "step": 6070 + }, + { + "epoch": 1.1, + "grad_norm": 1.0081928968429565, + "learning_rate": 4.517688164216043e-06, + "log_odds_chosen": 0.88865727186203, + "log_odds_ratio": -0.5304638147354126, + "logits/chosen": -0.4526711404323578, + "logits/rejected": -0.46278172731399536, + "logps/chosen": -0.9050025939941406, + "logps/rejected": -1.5034633874893188, + "loss": 0.9395, + "nll_loss": 0.8864428400993347, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09050027281045914, + "rewards/margins": 0.05984606221318245, + "rewards/rejected": -0.15034635365009308, + "step": 6080 + }, + { + "epoch": 1.1, + "grad_norm": 1.6177700757980347, + "learning_rate": 4.5118649002766055e-06, + "log_odds_chosen": 1.024202585220337, + "log_odds_ratio": -0.4576869606971741, + "logits/chosen": -0.4027012288570404, + "logits/rejected": -0.4568304121494293, + "logps/chosen": -0.8995013236999512, + "logps/rejected": -1.5450494289398193, + "loss": 0.9174, + "nll_loss": 0.8716768026351929, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08995013684034348, + "rewards/margins": 0.06455481052398682, + "rewards/rejected": -0.1545049250125885, + "step": 6090 + }, + { + "epoch": 1.1, + "grad_norm": 0.9513935446739197, + "learning_rate": 4.506041636337166e-06, + "log_odds_chosen": 1.1876469850540161, + "log_odds_ratio": -0.48842042684555054, + "logits/chosen": -0.46368637681007385, + "logits/rejected": -0.4937184453010559, + "logps/chosen": -0.9262706637382507, + "logps/rejected": -1.740277886390686, + "loss": 0.9675, + "nll_loss": 0.918613612651825, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09262706339359283, + "rewards/margins": 0.08140072971582413, + "rewards/rejected": -0.17402780055999756, + "step": 6100 + }, + { + "epoch": 1.1, + "grad_norm": 1.707017183303833, + "learning_rate": 4.500218372397728e-06, + "log_odds_chosen": 0.9699726104736328, + "log_odds_ratio": -0.5156748294830322, + "logits/chosen": -0.4302978515625, + "logits/rejected": -0.44206157326698303, + "logps/chosen": -0.9146237373352051, + "logps/rejected": -1.5761725902557373, + "loss": 0.9557, + "nll_loss": 0.9041454195976257, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09146237373352051, + "rewards/margins": 0.06615491211414337, + "rewards/rejected": -0.15761728584766388, + "step": 6110 + }, + { + "epoch": 1.11, + "grad_norm": 1.1624763011932373, + "learning_rate": 4.494395108458291e-06, + "log_odds_chosen": 1.2337392568588257, + "log_odds_ratio": -0.4633565843105316, + "logits/chosen": -0.45803871750831604, + "logits/rejected": -0.5049036741256714, + "logps/chosen": -0.8313882946968079, + "logps/rejected": -1.6546128988265991, + "loss": 0.974, + "nll_loss": 0.9276957511901855, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08313882350921631, + "rewards/margins": 0.08232248574495316, + "rewards/rejected": -0.16546130180358887, + "step": 6120 + }, + { + "epoch": 1.11, + "grad_norm": 1.7360762357711792, + "learning_rate": 4.488571844518852e-06, + "log_odds_chosen": 0.7800405621528625, + "log_odds_ratio": -0.5640308856964111, + "logits/chosen": -0.48888611793518066, + "logits/rejected": -0.526140034198761, + "logps/chosen": -0.8599061965942383, + "logps/rejected": -1.3839830160140991, + "loss": 0.9853, + "nll_loss": 0.9288476705551147, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08599063009023666, + "rewards/margins": 0.05240767449140549, + "rewards/rejected": -0.13839831948280334, + "step": 6130 + }, + { + "epoch": 1.11, + "grad_norm": 0.8478294014930725, + "learning_rate": 4.4827485805794145e-06, + "log_odds_chosen": 0.8905304074287415, + "log_odds_ratio": -0.5386573672294617, + "logits/chosen": -0.4045413136482239, + "logits/rejected": -0.4013892710208893, + "logps/chosen": -0.9712546467781067, + "logps/rejected": -1.5939693450927734, + "loss": 0.9416, + "nll_loss": 0.887769877910614, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09712545573711395, + "rewards/margins": 0.062271494418382645, + "rewards/rejected": -0.1593969464302063, + "step": 6140 + }, + { + "epoch": 1.11, + "grad_norm": 1.7147661447525024, + "learning_rate": 4.476925316639977e-06, + "log_odds_chosen": 0.8515718579292297, + "log_odds_ratio": -0.5489001274108887, + "logits/chosen": -0.4691835343837738, + "logits/rejected": -0.5160794258117676, + "logps/chosen": -1.0208523273468018, + "logps/rejected": -1.6025129556655884, + "loss": 1.0615, + "nll_loss": 1.0066022872924805, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10208523273468018, + "rewards/margins": 0.05816606804728508, + "rewards/rejected": -0.16025128960609436, + "step": 6150 + }, + { + "epoch": 1.11, + "grad_norm": 1.0859298706054688, + "learning_rate": 4.471102052700538e-06, + "log_odds_chosen": 0.8427707552909851, + "log_odds_ratio": -0.5371182560920715, + "logits/chosen": -0.4537859559059143, + "logits/rejected": -0.4494473338127136, + "logps/chosen": -0.9272929430007935, + "logps/rejected": -1.486228108406067, + "loss": 1.0053, + "nll_loss": 0.9515641927719116, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09272929280996323, + "rewards/margins": 0.05589351803064346, + "rewards/rejected": -0.1486227959394455, + "step": 6160 + }, + { + "epoch": 1.11, + "grad_norm": 1.7710280418395996, + "learning_rate": 4.465278788761101e-06, + "log_odds_chosen": 0.7671918869018555, + "log_odds_ratio": -0.5367628335952759, + "logits/chosen": -0.4476463794708252, + "logits/rejected": -0.45224714279174805, + "logps/chosen": -1.0947606563568115, + "logps/rejected": -1.627105951309204, + "loss": 1.0101, + "nll_loss": 0.956439197063446, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10947606712579727, + "rewards/margins": 0.053234536200761795, + "rewards/rejected": -0.16271059215068817, + "step": 6170 + }, + { + "epoch": 1.12, + "grad_norm": 1.216435432434082, + "learning_rate": 4.459455524821663e-06, + "log_odds_chosen": 0.6147326827049255, + "log_odds_ratio": -0.5704823732376099, + "logits/chosen": -0.4661136269569397, + "logits/rejected": -0.46458595991134644, + "logps/chosen": -0.9244877099990845, + "logps/rejected": -1.3198649883270264, + "loss": 0.9443, + "nll_loss": 0.8872181177139282, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09244877099990845, + "rewards/margins": 0.03953772783279419, + "rewards/rejected": -0.13198649883270264, + "step": 6180 + }, + { + "epoch": 1.12, + "grad_norm": 2.7578301429748535, + "learning_rate": 4.4536322608822235e-06, + "log_odds_chosen": 0.7873707413673401, + "log_odds_ratio": -0.5360560417175293, + "logits/chosen": -0.4558858871459961, + "logits/rejected": -0.49239593744277954, + "logps/chosen": -0.967001736164093, + "logps/rejected": -1.5444661378860474, + "loss": 0.9927, + "nll_loss": 0.9390754699707031, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09670017659664154, + "rewards/margins": 0.05774643272161484, + "rewards/rejected": -0.15444661676883698, + "step": 6190 + }, + { + "epoch": 1.12, + "grad_norm": 1.234567403793335, + "learning_rate": 4.447808996942786e-06, + "log_odds_chosen": 0.7407630681991577, + "log_odds_ratio": -0.5109227895736694, + "logits/chosen": -0.4580449163913727, + "logits/rejected": -0.46283191442489624, + "logps/chosen": -0.891004741191864, + "logps/rejected": -1.3632593154907227, + "loss": 0.9524, + "nll_loss": 0.9013134837150574, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08910048007965088, + "rewards/margins": 0.0472254678606987, + "rewards/rejected": -0.13632594048976898, + "step": 6200 + }, + { + "epoch": 1.12, + "grad_norm": 0.999319314956665, + "learning_rate": 4.441985733003348e-06, + "log_odds_chosen": 0.8994671106338501, + "log_odds_ratio": -0.5333329439163208, + "logits/chosen": -0.48216262459754944, + "logits/rejected": -0.44915810227394104, + "logps/chosen": -0.9370241165161133, + "logps/rejected": -1.5314620733261108, + "loss": 0.87, + "nll_loss": 0.8166621327400208, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09370241314172745, + "rewards/margins": 0.05944380164146423, + "rewards/rejected": -0.15314622223377228, + "step": 6210 + }, + { + "epoch": 1.12, + "grad_norm": 1.658113956451416, + "learning_rate": 4.4361624690639104e-06, + "log_odds_chosen": 0.9446815252304077, + "log_odds_ratio": -0.4770669937133789, + "logits/chosen": -0.44138193130493164, + "logits/rejected": -0.4626283049583435, + "logps/chosen": -0.8511655926704407, + "logps/rejected": -1.460635781288147, + "loss": 0.8938, + "nll_loss": 0.8460577726364136, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08511656522750854, + "rewards/margins": 0.06094701960682869, + "rewards/rejected": -0.14606359601020813, + "step": 6220 + }, + { + "epoch": 1.13, + "grad_norm": 1.3590821027755737, + "learning_rate": 4.430339205124472e-06, + "log_odds_chosen": 0.7557743787765503, + "log_odds_ratio": -0.5099378824234009, + "logits/chosen": -0.41205430030822754, + "logits/rejected": -0.48370322585105896, + "logps/chosen": -0.9024174809455872, + "logps/rejected": -1.3916748762130737, + "loss": 0.9609, + "nll_loss": 0.9099496603012085, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09024176001548767, + "rewards/margins": 0.0489257350564003, + "rewards/rejected": -0.13916750252246857, + "step": 6230 + }, + { + "epoch": 1.13, + "grad_norm": 1.4263062477111816, + "learning_rate": 4.424515941185034e-06, + "log_odds_chosen": 0.9851778745651245, + "log_odds_ratio": -0.5378494262695312, + "logits/chosen": -0.4365876615047455, + "logits/rejected": -0.4904526174068451, + "logps/chosen": -0.9848604202270508, + "logps/rejected": -1.7028894424438477, + "loss": 1.0141, + "nll_loss": 0.9603080749511719, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0984860435128212, + "rewards/margins": 0.07180289924144745, + "rewards/rejected": -0.17028896510601044, + "step": 6240 + }, + { + "epoch": 1.13, + "grad_norm": 1.0578852891921997, + "learning_rate": 4.4186926772455965e-06, + "log_odds_chosen": 0.8757988214492798, + "log_odds_ratio": -0.5127619504928589, + "logits/chosen": -0.41092929244041443, + "logits/rejected": -0.4253155589103699, + "logps/chosen": -0.8159357309341431, + "logps/rejected": -1.3599027395248413, + "loss": 0.915, + "nll_loss": 0.8637593984603882, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0815935730934143, + "rewards/margins": 0.054396700114011765, + "rewards/rejected": -0.13599026203155518, + "step": 6250 + }, + { + "epoch": 1.13, + "grad_norm": 1.2675119638442993, + "learning_rate": 4.412869413306158e-06, + "log_odds_chosen": 1.1324522495269775, + "log_odds_ratio": -0.48209959268569946, + "logits/chosen": -0.38643237948417664, + "logits/rejected": -0.4416092336177826, + "logps/chosen": -0.8294364213943481, + "logps/rejected": -1.6051595211029053, + "loss": 0.8791, + "nll_loss": 0.8309270739555359, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08294364809989929, + "rewards/margins": 0.07757231593132019, + "rewards/rejected": -0.16051596403121948, + "step": 6260 + }, + { + "epoch": 1.13, + "grad_norm": 1.1435970067977905, + "learning_rate": 4.40704614936672e-06, + "log_odds_chosen": 0.7152145504951477, + "log_odds_ratio": -0.5820814371109009, + "logits/chosen": -0.4874646067619324, + "logits/rejected": -0.5094522833824158, + "logps/chosen": -0.9671697616577148, + "logps/rejected": -1.4383320808410645, + "loss": 1.0701, + "nll_loss": 1.011844277381897, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0967169851064682, + "rewards/margins": 0.0471162274479866, + "rewards/rejected": -0.1438332051038742, + "step": 6270 + }, + { + "epoch": 1.13, + "grad_norm": 1.9420355558395386, + "learning_rate": 4.401222885427282e-06, + "log_odds_chosen": 0.9026612043380737, + "log_odds_ratio": -0.5407354831695557, + "logits/chosen": -0.42870211601257324, + "logits/rejected": -0.45270299911499023, + "logps/chosen": -0.9005454778671265, + "logps/rejected": -1.4829877614974976, + "loss": 0.9653, + "nll_loss": 0.9112182855606079, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09005454927682877, + "rewards/margins": 0.058244235813617706, + "rewards/rejected": -0.14829877018928528, + "step": 6280 + }, + { + "epoch": 1.14, + "grad_norm": 1.3078311681747437, + "learning_rate": 4.395399621487843e-06, + "log_odds_chosen": 0.9692419767379761, + "log_odds_ratio": -0.5271007418632507, + "logits/chosen": -0.49861112236976624, + "logits/rejected": -0.4814334809780121, + "logps/chosen": -0.8934978246688843, + "logps/rejected": -1.5919220447540283, + "loss": 0.9152, + "nll_loss": 0.8624576330184937, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08934978395700455, + "rewards/margins": 0.06984242051839828, + "rewards/rejected": -0.15919220447540283, + "step": 6290 + }, + { + "epoch": 1.14, + "grad_norm": 1.0360865592956543, + "learning_rate": 4.3895763575484055e-06, + "log_odds_chosen": 1.2350679636001587, + "log_odds_ratio": -0.4851533770561218, + "logits/chosen": -0.38407033681869507, + "logits/rejected": -0.44652214646339417, + "logps/chosen": -0.7998726963996887, + "logps/rejected": -1.5704978704452515, + "loss": 0.8281, + "nll_loss": 0.7795952558517456, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.07998727262020111, + "rewards/margins": 0.07706251740455627, + "rewards/rejected": -0.15704980492591858, + "step": 6300 + }, + { + "epoch": 1.14, + "grad_norm": 1.2178974151611328, + "learning_rate": 4.383753093608968e-06, + "log_odds_chosen": 0.7196434736251831, + "log_odds_ratio": -0.6056947708129883, + "logits/chosen": -0.5137656331062317, + "logits/rejected": -0.5180788636207581, + "logps/chosen": -1.0398476123809814, + "logps/rejected": -1.576139211654663, + "loss": 1.023, + "nll_loss": 0.9624187350273132, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10398473590612411, + "rewards/margins": 0.05362917110323906, + "rewards/rejected": -0.15761391818523407, + "step": 6310 + }, + { + "epoch": 1.14, + "grad_norm": 1.1940643787384033, + "learning_rate": 4.377929829669529e-06, + "log_odds_chosen": 0.6526185274124146, + "log_odds_ratio": -0.6317920088768005, + "logits/chosen": -0.4650532603263855, + "logits/rejected": -0.48263511061668396, + "logps/chosen": -0.9457101821899414, + "logps/rejected": -1.3629834651947021, + "loss": 0.9974, + "nll_loss": 0.9341708421707153, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09457103163003922, + "rewards/margins": 0.04172731563448906, + "rewards/rejected": -0.13629834353923798, + "step": 6320 + }, + { + "epoch": 1.14, + "grad_norm": 2.10884165763855, + "learning_rate": 4.372106565730092e-06, + "log_odds_chosen": 0.7424247860908508, + "log_odds_ratio": -0.5436392426490784, + "logits/chosen": -0.4434364438056946, + "logits/rejected": -0.47471094131469727, + "logps/chosen": -0.9834851026535034, + "logps/rejected": -1.4713070392608643, + "loss": 0.9389, + "nll_loss": 0.8845357894897461, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09834851324558258, + "rewards/margins": 0.048782214522361755, + "rewards/rejected": -0.14713071286678314, + "step": 6330 + }, + { + "epoch": 1.15, + "grad_norm": 1.1214773654937744, + "learning_rate": 4.366283301790654e-06, + "log_odds_chosen": 0.8600967526435852, + "log_odds_ratio": -0.5864149332046509, + "logits/chosen": -0.488150030374527, + "logits/rejected": -0.4834933280944824, + "logps/chosen": -0.8966549038887024, + "logps/rejected": -1.4617177248001099, + "loss": 0.9967, + "nll_loss": 0.938027024269104, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.089665487408638, + "rewards/margins": 0.056506287306547165, + "rewards/rejected": -0.14617177844047546, + "step": 6340 + }, + { + "epoch": 1.15, + "grad_norm": 1.8667078018188477, + "learning_rate": 4.360460037851215e-06, + "log_odds_chosen": 0.8151102066040039, + "log_odds_ratio": -0.5495513081550598, + "logits/chosen": -0.4086511731147766, + "logits/rejected": -0.4186836779117584, + "logps/chosen": -0.9379664659500122, + "logps/rejected": -1.51735520362854, + "loss": 0.9226, + "nll_loss": 0.8676198124885559, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09379664808511734, + "rewards/margins": 0.05793887376785278, + "rewards/rejected": -0.15173551440238953, + "step": 6350 + }, + { + "epoch": 1.15, + "grad_norm": 1.1724246740341187, + "learning_rate": 4.354636773911778e-06, + "log_odds_chosen": 0.8040224313735962, + "log_odds_ratio": -0.5135282278060913, + "logits/chosen": -0.4123757779598236, + "logits/rejected": -0.42883196473121643, + "logps/chosen": -0.8877067565917969, + "logps/rejected": -1.4244158267974854, + "loss": 0.9228, + "nll_loss": 0.871476948261261, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08877068012952805, + "rewards/margins": 0.05367090553045273, + "rewards/rejected": -0.14244157075881958, + "step": 6360 + }, + { + "epoch": 1.15, + "grad_norm": 0.7838314771652222, + "learning_rate": 4.348813509972339e-06, + "log_odds_chosen": 1.0577952861785889, + "log_odds_ratio": -0.5264900326728821, + "logits/chosen": -0.41539543867111206, + "logits/rejected": -0.44596344232559204, + "logps/chosen": -0.8569475412368774, + "logps/rejected": -1.5782248973846436, + "loss": 0.9204, + "nll_loss": 0.8677042126655579, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08569475263357162, + "rewards/margins": 0.07212773710489273, + "rewards/rejected": -0.15782250463962555, + "step": 6370 + }, + { + "epoch": 1.15, + "grad_norm": 1.7537171840667725, + "learning_rate": 4.342990246032901e-06, + "log_odds_chosen": 0.8003638982772827, + "log_odds_ratio": -0.5795815587043762, + "logits/chosen": -0.4575144648551941, + "logits/rejected": -0.4661421775817871, + "logps/chosen": -0.9662486910820007, + "logps/rejected": -1.540056824684143, + "loss": 0.9709, + "nll_loss": 0.9129317998886108, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09662487357854843, + "rewards/margins": 0.05738081410527229, + "rewards/rejected": -0.15400567650794983, + "step": 6380 + }, + { + "epoch": 1.15, + "grad_norm": 1.3851758241653442, + "learning_rate": 4.337166982093463e-06, + "log_odds_chosen": 1.0264556407928467, + "log_odds_ratio": -0.4926171898841858, + "logits/chosen": -0.3985103964805603, + "logits/rejected": -0.43339523673057556, + "logps/chosen": -0.907995343208313, + "logps/rejected": -1.6010644435882568, + "loss": 0.9261, + "nll_loss": 0.8768340945243835, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09079953283071518, + "rewards/margins": 0.06930691003799438, + "rewards/rejected": -0.16010645031929016, + "step": 6390 + }, + { + "epoch": 1.16, + "grad_norm": 1.298351764678955, + "learning_rate": 4.331343718154025e-06, + "log_odds_chosen": 0.7900893092155457, + "log_odds_ratio": -0.5369637608528137, + "logits/chosen": -0.4567854404449463, + "logits/rejected": -0.43872490525245667, + "logps/chosen": -0.942980170249939, + "logps/rejected": -1.4829621315002441, + "loss": 0.9717, + "nll_loss": 0.9179746508598328, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09429802000522614, + "rewards/margins": 0.0539981946349144, + "rewards/rejected": -0.14829620718955994, + "step": 6400 + }, + { + "epoch": 1.16, + "grad_norm": 1.8827316761016846, + "learning_rate": 4.325520454214587e-06, + "log_odds_chosen": 1.0105141401290894, + "log_odds_ratio": -0.44694510102272034, + "logits/chosen": -0.4560214877128601, + "logits/rejected": -0.48660048842430115, + "logps/chosen": -0.872963547706604, + "logps/rejected": -1.478611946105957, + "loss": 0.8971, + "nll_loss": 0.8524263501167297, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08729635179042816, + "rewards/margins": 0.06056482344865799, + "rewards/rejected": -0.14786118268966675, + "step": 6410 + }, + { + "epoch": 1.16, + "grad_norm": 1.138771891593933, + "learning_rate": 4.319697190275149e-06, + "log_odds_chosen": 0.9309137463569641, + "log_odds_ratio": -0.5124486684799194, + "logits/chosen": -0.4205206334590912, + "logits/rejected": -0.46138858795166016, + "logps/chosen": -0.9748755693435669, + "logps/rejected": -1.6254541873931885, + "loss": 0.9367, + "nll_loss": 0.8854933977127075, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09748755395412445, + "rewards/margins": 0.06505786627531052, + "rewards/rejected": -0.16254541277885437, + "step": 6420 + }, + { + "epoch": 1.16, + "grad_norm": 1.4133132696151733, + "learning_rate": 4.313873926335711e-06, + "log_odds_chosen": 1.0693262815475464, + "log_odds_ratio": -0.5076473951339722, + "logits/chosen": -0.3518396317958832, + "logits/rejected": -0.38446587324142456, + "logps/chosen": -0.81288081407547, + "logps/rejected": -1.5248903036117554, + "loss": 0.8726, + "nll_loss": 0.82184237241745, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08128808438777924, + "rewards/margins": 0.07120096683502197, + "rewards/rejected": -0.15248903632164001, + "step": 6430 + }, + { + "epoch": 1.16, + "grad_norm": 2.3196606636047363, + "learning_rate": 4.308050662396274e-06, + "log_odds_chosen": 0.6822506189346313, + "log_odds_ratio": -0.6364583969116211, + "logits/chosen": -0.47918400168418884, + "logits/rejected": -0.49134689569473267, + "logps/chosen": -1.0488958358764648, + "logps/rejected": -1.5445834398269653, + "loss": 1.072, + "nll_loss": 1.0083352327346802, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.10488957166671753, + "rewards/margins": 0.049568768590688705, + "rewards/rejected": -0.15445835888385773, + "step": 6440 + }, + { + "epoch": 1.17, + "grad_norm": 2.130206346511841, + "learning_rate": 4.302227398456835e-06, + "log_odds_chosen": 0.9953344464302063, + "log_odds_ratio": -0.5044914484024048, + "logits/chosen": -0.43052592873573303, + "logits/rejected": -0.47344738245010376, + "logps/chosen": -0.885372519493103, + "logps/rejected": -1.5817621946334839, + "loss": 0.9541, + "nll_loss": 0.9036803245544434, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08853726089000702, + "rewards/margins": 0.06963896006345749, + "rewards/rejected": -0.1581762135028839, + "step": 6450 + }, + { + "epoch": 1.17, + "grad_norm": 1.7047734260559082, + "learning_rate": 4.2964041345173965e-06, + "log_odds_chosen": 0.40977612137794495, + "log_odds_ratio": -0.6772114038467407, + "logits/chosen": -0.46431097388267517, + "logits/rejected": -0.4783390462398529, + "logps/chosen": -0.9744992256164551, + "logps/rejected": -1.2869319915771484, + "loss": 1.0318, + "nll_loss": 0.964094340801239, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09744994342327118, + "rewards/margins": 0.031243273988366127, + "rewards/rejected": -0.12869320809841156, + "step": 6460 + }, + { + "epoch": 1.17, + "grad_norm": 1.5343431234359741, + "learning_rate": 4.290580870577959e-06, + "log_odds_chosen": 1.0222392082214355, + "log_odds_ratio": -0.5148922204971313, + "logits/chosen": -0.44463086128234863, + "logits/rejected": -0.47433581948280334, + "logps/chosen": -0.8703458905220032, + "logps/rejected": -1.5388939380645752, + "loss": 0.9594, + "nll_loss": 0.9079081416130066, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08703459799289703, + "rewards/margins": 0.0668548122048378, + "rewards/rejected": -0.15388940274715424, + "step": 6470 + }, + { + "epoch": 1.17, + "grad_norm": 0.9783145189285278, + "learning_rate": 4.28475760663852e-06, + "log_odds_chosen": 1.05240797996521, + "log_odds_ratio": -0.47812169790267944, + "logits/chosen": -0.4410451054573059, + "logits/rejected": -0.46557217836380005, + "logps/chosen": -0.836539089679718, + "logps/rejected": -1.5610973834991455, + "loss": 0.8759, + "nll_loss": 0.8281365633010864, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08365390449762344, + "rewards/margins": 0.07245583832263947, + "rewards/rejected": -0.1561097502708435, + "step": 6480 + }, + { + "epoch": 1.17, + "grad_norm": 3.253295660018921, + "learning_rate": 4.278934342699083e-06, + "log_odds_chosen": 0.8776898384094238, + "log_odds_ratio": -0.4985920488834381, + "logits/chosen": -0.4755215048789978, + "logits/rejected": -0.48895248770713806, + "logps/chosen": -0.9190085530281067, + "logps/rejected": -1.5400559902191162, + "loss": 0.9548, + "nll_loss": 0.9049298167228699, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09190084040164948, + "rewards/margins": 0.062104739248752594, + "rewards/rejected": -0.15400558710098267, + "step": 6490 + }, + { + "epoch": 1.17, + "grad_norm": 0.9343768954277039, + "learning_rate": 4.273111078759645e-06, + "log_odds_chosen": 0.6879677176475525, + "log_odds_ratio": -0.5824822187423706, + "logits/chosen": -0.47967013716697693, + "logits/rejected": -0.4678632616996765, + "logps/chosen": -0.9688738584518433, + "logps/rejected": -1.4536980390548706, + "loss": 1.0141, + "nll_loss": 0.9558396339416504, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09688737988471985, + "rewards/margins": 0.04848243668675423, + "rewards/rejected": -0.14536981284618378, + "step": 6500 + }, + { + "epoch": 1.18, + "grad_norm": 1.6471149921417236, + "learning_rate": 4.267287814820206e-06, + "log_odds_chosen": 1.1193538904190063, + "log_odds_ratio": -0.47517600655555725, + "logits/chosen": -0.45033377408981323, + "logits/rejected": -0.47791576385498047, + "logps/chosen": -0.9069080352783203, + "logps/rejected": -1.702310562133789, + "loss": 0.9214, + "nll_loss": 0.8739102482795715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09069080650806427, + "rewards/margins": 0.07954025268554688, + "rewards/rejected": -0.17023104429244995, + "step": 6510 + }, + { + "epoch": 1.18, + "grad_norm": 2.0032148361206055, + "learning_rate": 4.261464550880769e-06, + "log_odds_chosen": 0.950029194355011, + "log_odds_ratio": -0.5132050514221191, + "logits/chosen": -0.4736092984676361, + "logits/rejected": -0.4746457040309906, + "logps/chosen": -0.9412399530410767, + "logps/rejected": -1.6105903387069702, + "loss": 0.9592, + "nll_loss": 0.9078750610351562, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09412400424480438, + "rewards/margins": 0.06693503260612488, + "rewards/rejected": -0.16105900704860687, + "step": 6520 + }, + { + "epoch": 1.18, + "grad_norm": 2.7363672256469727, + "learning_rate": 4.255641286941331e-06, + "log_odds_chosen": 0.6849262118339539, + "log_odds_ratio": -0.5214493870735168, + "logits/chosen": -0.4941480755805969, + "logits/rejected": -0.49812883138656616, + "logps/chosen": -0.9730516672134399, + "logps/rejected": -1.425528883934021, + "loss": 1.0632, + "nll_loss": 1.0110845565795898, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09730516374111176, + "rewards/margins": 0.045247726142406464, + "rewards/rejected": -0.14255289733409882, + "step": 6530 + }, + { + "epoch": 1.18, + "grad_norm": 0.7267311215400696, + "learning_rate": 4.2498180230018925e-06, + "log_odds_chosen": 0.8294305801391602, + "log_odds_ratio": -0.5074676275253296, + "logits/chosen": -0.4479546546936035, + "logits/rejected": -0.469372034072876, + "logps/chosen": -0.9526723623275757, + "logps/rejected": -1.5356619358062744, + "loss": 0.9706, + "nll_loss": 0.9198722839355469, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09526724368333817, + "rewards/margins": 0.05829895660281181, + "rewards/rejected": -0.15356619656085968, + "step": 6540 + }, + { + "epoch": 1.18, + "grad_norm": 0.9771788120269775, + "learning_rate": 4.243994759062454e-06, + "log_odds_chosen": 0.8805567026138306, + "log_odds_ratio": -0.5365056395530701, + "logits/chosen": -0.3927402198314667, + "logits/rejected": -0.45152372121810913, + "logps/chosen": -0.8411895036697388, + "logps/rejected": -1.3840970993041992, + "loss": 0.9869, + "nll_loss": 0.9332119226455688, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08411894738674164, + "rewards/margins": 0.054290771484375, + "rewards/rejected": -0.13840971887111664, + "step": 6550 + }, + { + "epoch": 1.18, + "grad_norm": 2.6478631496429443, + "learning_rate": 4.238171495123016e-06, + "log_odds_chosen": 0.5081368684768677, + "log_odds_ratio": -0.5900481343269348, + "logits/chosen": -0.4167002737522125, + "logits/rejected": -0.4424230456352234, + "logps/chosen": -0.9873155355453491, + "logps/rejected": -1.3592678308486938, + "loss": 0.9531, + "nll_loss": 0.8941276669502258, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09873154759407043, + "rewards/margins": 0.03719523549079895, + "rewards/rejected": -0.13592679798603058, + "step": 6560 + }, + { + "epoch": 1.19, + "grad_norm": 1.5292366743087769, + "learning_rate": 4.232348231183578e-06, + "log_odds_chosen": 1.2604739665985107, + "log_odds_ratio": -0.4504130482673645, + "logits/chosen": -0.41908130049705505, + "logits/rejected": -0.4867452085018158, + "logps/chosen": -0.9487060308456421, + "logps/rejected": -1.8027664422988892, + "loss": 0.9882, + "nll_loss": 0.9431636929512024, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09487061202526093, + "rewards/margins": 0.08540603518486023, + "rewards/rejected": -0.18027664721012115, + "step": 6570 + }, + { + "epoch": 1.19, + "grad_norm": 1.120796799659729, + "learning_rate": 4.22652496724414e-06, + "log_odds_chosen": 0.9827529788017273, + "log_odds_ratio": -0.5386685132980347, + "logits/chosen": -0.4483468532562256, + "logits/rejected": -0.46654707193374634, + "logps/chosen": -0.8629282712936401, + "logps/rejected": -1.5766785144805908, + "loss": 0.9345, + "nll_loss": 0.8805925250053406, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08629283308982849, + "rewards/margins": 0.07137502729892731, + "rewards/rejected": -0.1576678454875946, + "step": 6580 + }, + { + "epoch": 1.19, + "grad_norm": 1.3530491590499878, + "learning_rate": 4.220701703304702e-06, + "log_odds_chosen": 0.8309918642044067, + "log_odds_ratio": -0.539421558380127, + "logits/chosen": -0.47228020429611206, + "logits/rejected": -0.4613127112388611, + "logps/chosen": -0.9116449356079102, + "logps/rejected": -1.4743530750274658, + "loss": 0.8868, + "nll_loss": 0.832877516746521, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.0911644995212555, + "rewards/margins": 0.05627080798149109, + "rewards/rejected": -0.14743532240390778, + "step": 6590 + }, + { + "epoch": 1.19, + "grad_norm": 1.0767103433609009, + "learning_rate": 4.214878439365264e-06, + "log_odds_chosen": 0.8759848475456238, + "log_odds_ratio": -0.49737709760665894, + "logits/chosen": -0.43695569038391113, + "logits/rejected": -0.47424954175949097, + "logps/chosen": -0.9387688636779785, + "logps/rejected": -1.5360345840454102, + "loss": 0.9592, + "nll_loss": 0.9094923734664917, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09387689083814621, + "rewards/margins": 0.059726566076278687, + "rewards/rejected": -0.1536034494638443, + "step": 6600 + }, + { + "epoch": 1.19, + "grad_norm": 1.734474778175354, + "learning_rate": 4.209055175425826e-06, + "log_odds_chosen": 1.0728756189346313, + "log_odds_ratio": -0.45263487100601196, + "logits/chosen": -0.43640464544296265, + "logits/rejected": -0.46173277497291565, + "logps/chosen": -0.8198699951171875, + "logps/rejected": -1.4813389778137207, + "loss": 0.9697, + "nll_loss": 0.9244702458381653, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08198700100183487, + "rewards/margins": 0.06614689528942108, + "rewards/rejected": -0.14813390374183655, + "step": 6610 + }, + { + "epoch": 1.2, + "grad_norm": 1.4955604076385498, + "learning_rate": 4.203231911486388e-06, + "log_odds_chosen": 0.6764736771583557, + "log_odds_ratio": -0.5236061811447144, + "logits/chosen": -0.5202940702438354, + "logits/rejected": -0.4843006134033203, + "logps/chosen": -0.9377977252006531, + "logps/rejected": -1.3767682313919067, + "loss": 0.983, + "nll_loss": 0.9306391477584839, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09377976506948471, + "rewards/margins": 0.043897055089473724, + "rewards/rejected": -0.13767683506011963, + "step": 6620 + }, + { + "epoch": 1.2, + "grad_norm": 1.9507471323013306, + "learning_rate": 4.197408647546949e-06, + "log_odds_chosen": 0.7487798929214478, + "log_odds_ratio": -0.5690258741378784, + "logits/chosen": -0.4605821669101715, + "logits/rejected": -0.45187997817993164, + "logps/chosen": -0.9927385449409485, + "logps/rejected": -1.4855639934539795, + "loss": 0.9683, + "nll_loss": 0.9114102125167847, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09927386045455933, + "rewards/margins": 0.04928254336118698, + "rewards/rejected": -0.1485564112663269, + "step": 6630 + }, + { + "epoch": 1.2, + "grad_norm": 1.5568227767944336, + "learning_rate": 4.191585383607511e-06, + "log_odds_chosen": 0.9163190722465515, + "log_odds_ratio": -0.49040189385414124, + "logits/chosen": -0.42363911867141724, + "logits/rejected": -0.4508935511112213, + "logps/chosen": -0.9086519479751587, + "logps/rejected": -1.5187523365020752, + "loss": 0.968, + "nll_loss": 0.9189130067825317, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09086520224809647, + "rewards/margins": 0.06101004406809807, + "rewards/rejected": -0.15187524259090424, + "step": 6640 + }, + { + "epoch": 1.2, + "grad_norm": 0.8964611291885376, + "learning_rate": 4.185762119668074e-06, + "log_odds_chosen": 0.7944979071617126, + "log_odds_ratio": -0.5299047231674194, + "logits/chosen": -0.3896244168281555, + "logits/rejected": -0.4423930048942566, + "logps/chosen": -0.9034556150436401, + "logps/rejected": -1.4044759273529053, + "loss": 0.8695, + "nll_loss": 0.8164774775505066, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09034556150436401, + "rewards/margins": 0.05010201781988144, + "rewards/rejected": -0.14044758677482605, + "step": 6650 + }, + { + "epoch": 1.2, + "grad_norm": 1.2127312421798706, + "learning_rate": 4.179938855728636e-06, + "log_odds_chosen": 0.7413554191589355, + "log_odds_ratio": -0.5506377220153809, + "logits/chosen": -0.4648379385471344, + "logits/rejected": -0.4867902398109436, + "logps/chosen": -0.9216586947441101, + "logps/rejected": -1.426811933517456, + "loss": 0.9425, + "nll_loss": 0.8874050378799438, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09216587245464325, + "rewards/margins": 0.0505153127014637, + "rewards/rejected": -0.14268119633197784, + "step": 6660 + }, + { + "epoch": 1.2, + "grad_norm": 1.0402570962905884, + "learning_rate": 4.1741155917891974e-06, + "log_odds_chosen": 0.9177875518798828, + "log_odds_ratio": -0.5133272409439087, + "logits/chosen": -0.3900728225708008, + "logits/rejected": -0.4346277117729187, + "logps/chosen": -0.9603956937789917, + "logps/rejected": -1.5792925357818604, + "loss": 0.9434, + "nll_loss": 0.8920313715934753, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09603957086801529, + "rewards/margins": 0.06188970059156418, + "rewards/rejected": -0.15792927145957947, + "step": 6670 + }, + { + "epoch": 1.21, + "grad_norm": 1.1733165979385376, + "learning_rate": 4.16829232784976e-06, + "log_odds_chosen": 0.5979320406913757, + "log_odds_ratio": -0.6295372843742371, + "logits/chosen": -0.46904540061950684, + "logits/rejected": -0.47716307640075684, + "logps/chosen": -0.9750441312789917, + "logps/rejected": -1.390572428703308, + "loss": 1.0124, + "nll_loss": 0.9493964314460754, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09750442206859589, + "rewards/margins": 0.0415528267621994, + "rewards/rejected": -0.1390572488307953, + "step": 6680 + }, + { + "epoch": 1.21, + "grad_norm": 1.071596622467041, + "learning_rate": 4.162469063910322e-06, + "log_odds_chosen": 0.8037103414535522, + "log_odds_ratio": -0.5370471477508545, + "logits/chosen": -0.4986805021762848, + "logits/rejected": -0.4829865097999573, + "logps/chosen": -0.9812761545181274, + "logps/rejected": -1.547245979309082, + "loss": 1.0159, + "nll_loss": 0.9622408747673035, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09812761843204498, + "rewards/margins": 0.05659698694944382, + "rewards/rejected": -0.1547246128320694, + "step": 6690 + }, + { + "epoch": 1.21, + "grad_norm": 0.7664772868156433, + "learning_rate": 4.1566457999708835e-06, + "log_odds_chosen": 0.9926007986068726, + "log_odds_ratio": -0.5328630208969116, + "logits/chosen": -0.4604420065879822, + "logits/rejected": -0.4595687985420227, + "logps/chosen": -1.0073707103729248, + "logps/rejected": -1.7252197265625, + "loss": 1.0122, + "nll_loss": 0.9589058756828308, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.1007370725274086, + "rewards/margins": 0.07178490608930588, + "rewards/rejected": -0.17252197861671448, + "step": 6700 + }, + { + "epoch": 1.21, + "grad_norm": 1.2537119388580322, + "learning_rate": 4.150822536031446e-06, + "log_odds_chosen": 0.9483833312988281, + "log_odds_ratio": -0.5262691378593445, + "logits/chosen": -0.4464438557624817, + "logits/rejected": -0.49215570092201233, + "logps/chosen": -0.8856107592582703, + "logps/rejected": -1.479950189590454, + "loss": 0.9288, + "nll_loss": 0.8761947751045227, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08856107294559479, + "rewards/margins": 0.05943392589688301, + "rewards/rejected": -0.1479950249195099, + "step": 6710 + }, + { + "epoch": 1.21, + "grad_norm": 1.239740014076233, + "learning_rate": 4.144999272092008e-06, + "log_odds_chosen": 1.1764709949493408, + "log_odds_ratio": -0.4934256970882416, + "logits/chosen": -0.38612398505210876, + "logits/rejected": -0.44745928049087524, + "logps/chosen": -0.7867048382759094, + "logps/rejected": -1.5437514781951904, + "loss": 0.8604, + "nll_loss": 0.811070442199707, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07867047935724258, + "rewards/margins": 0.0757046714425087, + "rewards/rejected": -0.15437515079975128, + "step": 6720 + }, + { + "epoch": 1.22, + "grad_norm": 1.7960892915725708, + "learning_rate": 4.139176008152569e-06, + "log_odds_chosen": 0.7462882995605469, + "log_odds_ratio": -0.6226873397827148, + "logits/chosen": -0.49681800603866577, + "logits/rejected": -0.510138988494873, + "logps/chosen": -0.8982499837875366, + "logps/rejected": -1.4710769653320312, + "loss": 1.01, + "nll_loss": 0.9477685689926147, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.08982499688863754, + "rewards/margins": 0.0572826974093914, + "rewards/rejected": -0.14710770547389984, + "step": 6730 + }, + { + "epoch": 1.22, + "grad_norm": 1.6915690898895264, + "learning_rate": 4.133352744213131e-06, + "log_odds_chosen": 1.1176029443740845, + "log_odds_ratio": -0.4465733468532562, + "logits/chosen": -0.45315223932266235, + "logits/rejected": -0.5397419333457947, + "logps/chosen": -0.9248179197311401, + "logps/rejected": -1.6554958820343018, + "loss": 0.951, + "nll_loss": 0.9063474535942078, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09248179197311401, + "rewards/margins": 0.0730677992105484, + "rewards/rejected": -0.1655496060848236, + "step": 6740 + }, + { + "epoch": 1.22, + "grad_norm": 1.6438920497894287, + "learning_rate": 4.127529480273693e-06, + "log_odds_chosen": 0.9469677209854126, + "log_odds_ratio": -0.5286572575569153, + "logits/chosen": -0.4755041003227234, + "logits/rejected": -0.4868949353694916, + "logps/chosen": -0.8673686981201172, + "logps/rejected": -1.5094770193099976, + "loss": 0.9609, + "nll_loss": 0.908068835735321, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08673687279224396, + "rewards/margins": 0.06421081721782684, + "rewards/rejected": -0.1509476900100708, + "step": 6750 + }, + { + "epoch": 1.22, + "grad_norm": 1.2185676097869873, + "learning_rate": 4.121706216334255e-06, + "log_odds_chosen": 0.9891728162765503, + "log_odds_ratio": -0.5200589299201965, + "logits/chosen": -0.44195041060447693, + "logits/rejected": -0.48202115297317505, + "logps/chosen": -0.9076203107833862, + "logps/rejected": -1.5685710906982422, + "loss": 1.0175, + "nll_loss": 0.965488076210022, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09076203405857086, + "rewards/margins": 0.06609507650136948, + "rewards/rejected": -0.15685710310935974, + "step": 6760 + }, + { + "epoch": 1.22, + "grad_norm": 1.3335249423980713, + "learning_rate": 4.115882952394817e-06, + "log_odds_chosen": 0.9775427579879761, + "log_odds_ratio": -0.48796018958091736, + "logits/chosen": -0.484336793422699, + "logits/rejected": -0.5003049373626709, + "logps/chosen": -0.9130972027778625, + "logps/rejected": -1.6077959537506104, + "loss": 0.9418, + "nll_loss": 0.8930259943008423, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09130971133708954, + "rewards/margins": 0.0694698840379715, + "rewards/rejected": -0.16077958047389984, + "step": 6770 + }, + { + "epoch": 1.22, + "grad_norm": 1.6787357330322266, + "learning_rate": 4.1100596884553794e-06, + "log_odds_chosen": 0.9635372161865234, + "log_odds_ratio": -0.5388367176055908, + "logits/chosen": -0.4794275164604187, + "logits/rejected": -0.5022672414779663, + "logps/chosen": -0.9136490821838379, + "logps/rejected": -1.597872018814087, + "loss": 0.912, + "nll_loss": 0.8581094741821289, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09136491268873215, + "rewards/margins": 0.06842230260372162, + "rewards/rejected": -0.15978720784187317, + "step": 6780 + }, + { + "epoch": 1.23, + "grad_norm": 1.1251144409179688, + "learning_rate": 4.104236424515941e-06, + "log_odds_chosen": 0.8386086225509644, + "log_odds_ratio": -0.5400186777114868, + "logits/chosen": -0.4414609372615814, + "logits/rejected": -0.46368885040283203, + "logps/chosen": -0.9598379135131836, + "logps/rejected": -1.497143268585205, + "loss": 0.9998, + "nll_loss": 0.9457993507385254, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09598378837108612, + "rewards/margins": 0.05373052880167961, + "rewards/rejected": -0.14971432089805603, + "step": 6790 + }, + { + "epoch": 1.23, + "grad_norm": 1.0504904985427856, + "learning_rate": 4.098413160576503e-06, + "log_odds_chosen": 0.8111333847045898, + "log_odds_ratio": -0.5414608120918274, + "logits/chosen": -0.43067970871925354, + "logits/rejected": -0.44196492433547974, + "logps/chosen": -0.9440352320671082, + "logps/rejected": -1.4980310201644897, + "loss": 0.9046, + "nll_loss": 0.8504649996757507, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09440352767705917, + "rewards/margins": 0.0553995780646801, + "rewards/rejected": -0.14980310201644897, + "step": 6800 + }, + { + "epoch": 1.23, + "grad_norm": 1.0189682245254517, + "learning_rate": 4.0925898966370655e-06, + "log_odds_chosen": 1.1220974922180176, + "log_odds_ratio": -0.48033151030540466, + "logits/chosen": -0.44528502225875854, + "logits/rejected": -0.512277364730835, + "logps/chosen": -0.8466246724128723, + "logps/rejected": -1.5640580654144287, + "loss": 0.9424, + "nll_loss": 0.8943877220153809, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08466245979070663, + "rewards/margins": 0.07174333184957504, + "rewards/rejected": -0.15640580654144287, + "step": 6810 + }, + { + "epoch": 1.23, + "grad_norm": 1.2099649906158447, + "learning_rate": 4.086766632697626e-06, + "log_odds_chosen": 0.9407919049263, + "log_odds_ratio": -0.49303555488586426, + "logits/chosen": -0.44150418043136597, + "logits/rejected": -0.4331357479095459, + "logps/chosen": -0.8563871383666992, + "logps/rejected": -1.5000990629196167, + "loss": 0.9265, + "nll_loss": 0.8772442936897278, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08563872426748276, + "rewards/margins": 0.06437118351459503, + "rewards/rejected": -0.1500099152326584, + "step": 6820 + }, + { + "epoch": 1.23, + "grad_norm": 2.6405341625213623, + "learning_rate": 4.0809433687581885e-06, + "log_odds_chosen": 0.6924955248832703, + "log_odds_ratio": -0.5537286996841431, + "logits/chosen": -0.5182186365127563, + "logits/rejected": -0.532181441783905, + "logps/chosen": -1.0400513410568237, + "logps/rejected": -1.528926968574524, + "loss": 1.0234, + "nll_loss": 0.967991054058075, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10400513559579849, + "rewards/margins": 0.04888755828142166, + "rewards/rejected": -0.15289269387722015, + "step": 6830 + }, + { + "epoch": 1.24, + "grad_norm": 1.5627719163894653, + "learning_rate": 4.075120104818751e-06, + "log_odds_chosen": 1.0674669742584229, + "log_odds_ratio": -0.4889269471168518, + "logits/chosen": -0.4403366148471832, + "logits/rejected": -0.4267563223838806, + "logps/chosen": -0.7794691324234009, + "logps/rejected": -1.4749410152435303, + "loss": 0.9037, + "nll_loss": 0.8547900319099426, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.07794691622257233, + "rewards/margins": 0.06954719126224518, + "rewards/rejected": -0.1474941074848175, + "step": 6840 + }, + { + "epoch": 1.24, + "grad_norm": 0.8014816045761108, + "learning_rate": 4.069296840879312e-06, + "log_odds_chosen": 0.739176869392395, + "log_odds_ratio": -0.5694806575775146, + "logits/chosen": -0.44575244188308716, + "logits/rejected": -0.4639630913734436, + "logps/chosen": -0.9097639322280884, + "logps/rejected": -1.4184764623641968, + "loss": 0.9335, + "nll_loss": 0.8765338659286499, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09097640216350555, + "rewards/margins": 0.05087127164006233, + "rewards/rejected": -0.1418476402759552, + "step": 6850 + }, + { + "epoch": 1.24, + "grad_norm": 1.7838681936264038, + "learning_rate": 4.0634735769398745e-06, + "log_odds_chosen": 0.8185930252075195, + "log_odds_ratio": -0.5115641355514526, + "logits/chosen": -0.441885769367218, + "logits/rejected": -0.4383307993412018, + "logps/chosen": -0.9322429895401001, + "logps/rejected": -1.4734580516815186, + "loss": 0.9531, + "nll_loss": 0.901964008808136, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09322428703308105, + "rewards/margins": 0.05412151664495468, + "rewards/rejected": -0.14734581112861633, + "step": 6860 + }, + { + "epoch": 1.24, + "grad_norm": 1.6935601234436035, + "learning_rate": 4.057650313000437e-06, + "log_odds_chosen": 0.47807177901268005, + "log_odds_ratio": -0.6113700866699219, + "logits/chosen": -0.47867828607559204, + "logits/rejected": -0.49842625856399536, + "logps/chosen": -1.120103120803833, + "logps/rejected": -1.4488173723220825, + "loss": 1.0677, + "nll_loss": 1.0065581798553467, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.11201032251119614, + "rewards/margins": 0.03287142515182495, + "rewards/rejected": -0.1448817402124405, + "step": 6870 + }, + { + "epoch": 1.24, + "grad_norm": 0.7489156723022461, + "learning_rate": 4.051827049060999e-06, + "log_odds_chosen": 0.8212149739265442, + "log_odds_ratio": -0.5638260841369629, + "logits/chosen": -0.5162757635116577, + "logits/rejected": -0.5030328035354614, + "logps/chosen": -0.901445746421814, + "logps/rejected": -1.3886324167251587, + "loss": 0.9795, + "nll_loss": 0.9231454133987427, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0901445671916008, + "rewards/margins": 0.048718664795160294, + "rewards/rejected": -0.1388632357120514, + "step": 6880 + }, + { + "epoch": 1.24, + "grad_norm": 0.9027697443962097, + "learning_rate": 4.046003785121561e-06, + "log_odds_chosen": 0.8782781362533569, + "log_odds_ratio": -0.5325084924697876, + "logits/chosen": -0.4884958863258362, + "logits/rejected": -0.5067233443260193, + "logps/chosen": -0.9407421350479126, + "logps/rejected": -1.5487650632858276, + "loss": 1.0885, + "nll_loss": 1.035233974456787, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09407420456409454, + "rewards/margins": 0.06080232933163643, + "rewards/rejected": -0.15487651526927948, + "step": 6890 + }, + { + "epoch": 1.25, + "grad_norm": 1.6402422189712524, + "learning_rate": 4.040180521182122e-06, + "log_odds_chosen": 0.7644470930099487, + "log_odds_ratio": -0.6131644248962402, + "logits/chosen": -0.5099159479141235, + "logits/rejected": -0.49645549058914185, + "logps/chosen": -0.9875893592834473, + "logps/rejected": -1.5628368854522705, + "loss": 1.0243, + "nll_loss": 0.9630203247070312, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09875893592834473, + "rewards/margins": 0.05752474069595337, + "rewards/rejected": -0.1562836766242981, + "step": 6900 + }, + { + "epoch": 1.25, + "grad_norm": 1.100881814956665, + "learning_rate": 4.034357257242684e-06, + "log_odds_chosen": 0.5976766347885132, + "log_odds_ratio": -0.6160573959350586, + "logits/chosen": -0.46787476539611816, + "logits/rejected": -0.48675742745399475, + "logps/chosen": -0.9335827827453613, + "logps/rejected": -1.3486616611480713, + "loss": 1.0044, + "nll_loss": 0.9428272247314453, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09335827827453613, + "rewards/margins": 0.041507888585329056, + "rewards/rejected": -0.1348661631345749, + "step": 6910 + }, + { + "epoch": 1.25, + "grad_norm": 1.0779989957809448, + "learning_rate": 4.028533993303246e-06, + "log_odds_chosen": 1.032034158706665, + "log_odds_ratio": -0.45201388001441956, + "logits/chosen": -0.5129601359367371, + "logits/rejected": -0.5174925923347473, + "logps/chosen": -0.8675662279129028, + "logps/rejected": -1.561211347579956, + "loss": 0.8998, + "nll_loss": 0.8546127080917358, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.0867566242814064, + "rewards/margins": 0.0693645179271698, + "rewards/rejected": -0.1561211496591568, + "step": 6920 + }, + { + "epoch": 1.25, + "grad_norm": 1.5758169889450073, + "learning_rate": 4.022710729363808e-06, + "log_odds_chosen": 1.4302197694778442, + "log_odds_ratio": -0.4029451310634613, + "logits/chosen": -0.41934436559677124, + "logits/rejected": -0.4693407416343689, + "logps/chosen": -0.855545163154602, + "logps/rejected": -1.8322776556015015, + "loss": 0.9478, + "nll_loss": 0.9074686169624329, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.08555451035499573, + "rewards/margins": 0.09767324477434158, + "rewards/rejected": -0.1832277774810791, + "step": 6930 + }, + { + "epoch": 1.25, + "grad_norm": 1.2255765199661255, + "learning_rate": 4.0168874654243705e-06, + "log_odds_chosen": 1.0592358112335205, + "log_odds_ratio": -0.48211097717285156, + "logits/chosen": -0.46458035707473755, + "logits/rejected": -0.4718129634857178, + "logps/chosen": -0.8804963827133179, + "logps/rejected": -1.60556161403656, + "loss": 0.9907, + "nll_loss": 0.9424525499343872, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08804963529109955, + "rewards/margins": 0.07250651717185974, + "rewards/rejected": -0.16055616736412048, + "step": 6940 + }, + { + "epoch": 1.26, + "grad_norm": 1.0770710706710815, + "learning_rate": 4.011064201484932e-06, + "log_odds_chosen": 0.8747438192367554, + "log_odds_ratio": -0.5370987057685852, + "logits/chosen": -0.4886086583137512, + "logits/rejected": -0.46743687987327576, + "logps/chosen": -0.9328964352607727, + "logps/rejected": -1.534489631652832, + "loss": 0.958, + "nll_loss": 0.904322624206543, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09328965097665787, + "rewards/margins": 0.06015932559967041, + "rewards/rejected": -0.15344896912574768, + "step": 6950 + }, + { + "epoch": 1.26, + "grad_norm": 1.0228257179260254, + "learning_rate": 4.005240937545494e-06, + "log_odds_chosen": 0.9547672271728516, + "log_odds_ratio": -0.5136852264404297, + "logits/chosen": -0.4253026843070984, + "logits/rejected": -0.44504547119140625, + "logps/chosen": -0.8749715089797974, + "logps/rejected": -1.517364263534546, + "loss": 0.914, + "nll_loss": 0.8625979423522949, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08749713748693466, + "rewards/margins": 0.06423927843570709, + "rewards/rejected": -0.15173643827438354, + "step": 6960 + }, + { + "epoch": 1.26, + "grad_norm": 1.1351262331008911, + "learning_rate": 3.999417673606056e-06, + "log_odds_chosen": 0.8336387872695923, + "log_odds_ratio": -0.5469530820846558, + "logits/chosen": -0.4293319582939148, + "logits/rejected": -0.47841334342956543, + "logps/chosen": -0.9202069044113159, + "logps/rejected": -1.4685187339782715, + "loss": 0.9341, + "nll_loss": 0.879433274269104, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.092020682990551, + "rewards/margins": 0.05483119562268257, + "rewards/rejected": -0.14685186743736267, + "step": 6970 + }, + { + "epoch": 1.26, + "grad_norm": 1.3760859966278076, + "learning_rate": 3.993594409666618e-06, + "log_odds_chosen": 0.973896324634552, + "log_odds_ratio": -0.5526650547981262, + "logits/chosen": -0.45984959602355957, + "logits/rejected": -0.48349887132644653, + "logps/chosen": -0.9893393516540527, + "logps/rejected": -1.6574366092681885, + "loss": 1.0807, + "nll_loss": 1.0254210233688354, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09893393516540527, + "rewards/margins": 0.06680972129106522, + "rewards/rejected": -0.1657436639070511, + "step": 6980 + }, + { + "epoch": 1.26, + "grad_norm": 0.7266592979431152, + "learning_rate": 3.9877711457271795e-06, + "log_odds_chosen": 0.8963996171951294, + "log_odds_ratio": -0.48495644330978394, + "logits/chosen": -0.3935561776161194, + "logits/rejected": -0.43216556310653687, + "logps/chosen": -0.8080177307128906, + "logps/rejected": -1.3844796419143677, + "loss": 0.8988, + "nll_loss": 0.8502942323684692, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08080177009105682, + "rewards/margins": 0.057646192610263824, + "rewards/rejected": -0.13844797015190125, + "step": 6990 + }, + { + "epoch": 1.26, + "grad_norm": 1.136324405670166, + "learning_rate": 3.981947881787742e-06, + "log_odds_chosen": 1.0977365970611572, + "log_odds_ratio": -0.5075836777687073, + "logits/chosen": -0.35787349939346313, + "logits/rejected": -0.43332797288894653, + "logps/chosen": -0.8275062441825867, + "logps/rejected": -1.5597859621047974, + "loss": 0.8396, + "nll_loss": 0.7887982130050659, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08275063335895538, + "rewards/margins": 0.07322796434164047, + "rewards/rejected": -0.15597859025001526, + "step": 7000 + }, + { + "epoch": 1.27, + "grad_norm": 1.6420483589172363, + "learning_rate": 3.976124617848304e-06, + "log_odds_chosen": 0.7088609933853149, + "log_odds_ratio": -0.5549465417861938, + "logits/chosen": -0.4889621138572693, + "logits/rejected": -0.5120013356208801, + "logps/chosen": -0.9765472412109375, + "logps/rejected": -1.474671483039856, + "loss": 0.9782, + "nll_loss": 0.9226747751235962, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09765471518039703, + "rewards/margins": 0.04981241747736931, + "rewards/rejected": -0.14746715128421783, + "step": 7010 + }, + { + "epoch": 1.27, + "grad_norm": 1.3024640083312988, + "learning_rate": 3.9703013539088656e-06, + "log_odds_chosen": 0.8062094449996948, + "log_odds_ratio": -0.5428799986839294, + "logits/chosen": -0.45093774795532227, + "logits/rejected": -0.47385063767433167, + "logps/chosen": -0.9071990847587585, + "logps/rejected": -1.4861849546432495, + "loss": 0.9166, + "nll_loss": 0.8622728586196899, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09071992337703705, + "rewards/margins": 0.05789857357740402, + "rewards/rejected": -0.14861848950386047, + "step": 7020 + }, + { + "epoch": 1.27, + "grad_norm": 1.2627145051956177, + "learning_rate": 3.964478089969428e-06, + "log_odds_chosen": 0.9427957534790039, + "log_odds_ratio": -0.4958980977535248, + "logits/chosen": -0.4174041152000427, + "logits/rejected": -0.47076454758644104, + "logps/chosen": -0.8594261407852173, + "logps/rejected": -1.5148569345474243, + "loss": 0.9342, + "nll_loss": 0.8845945596694946, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08594261854887009, + "rewards/margins": 0.06554307788610458, + "rewards/rejected": -0.15148569643497467, + "step": 7030 + }, + { + "epoch": 1.27, + "grad_norm": 1.116181492805481, + "learning_rate": 3.958654826029989e-06, + "log_odds_chosen": 1.0329385995864868, + "log_odds_ratio": -0.5539559721946716, + "logits/chosen": -0.4265444874763489, + "logits/rejected": -0.45913130044937134, + "logps/chosen": -0.8591364622116089, + "logps/rejected": -1.6048072576522827, + "loss": 0.9528, + "nll_loss": 0.8973916172981262, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08591364324092865, + "rewards/margins": 0.07456707954406738, + "rewards/rejected": -0.16048072278499603, + "step": 7040 + }, + { + "epoch": 1.27, + "grad_norm": 0.9981061816215515, + "learning_rate": 3.952831562090552e-06, + "log_odds_chosen": 1.2240138053894043, + "log_odds_ratio": -0.4347744584083557, + "logits/chosen": -0.41130954027175903, + "logits/rejected": -0.48269692063331604, + "logps/chosen": -0.7971197366714478, + "logps/rejected": -1.584804892539978, + "loss": 0.8706, + "nll_loss": 0.827093243598938, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07971197366714478, + "rewards/margins": 0.0787685215473175, + "rewards/rejected": -0.15848049521446228, + "step": 7050 + }, + { + "epoch": 1.28, + "grad_norm": 1.3474347591400146, + "learning_rate": 3.947008298151114e-06, + "log_odds_chosen": 0.9372537732124329, + "log_odds_ratio": -0.4520055651664734, + "logits/chosen": -0.43001455068588257, + "logits/rejected": -0.4487704336643219, + "logps/chosen": -0.853164792060852, + "logps/rejected": -1.4767754077911377, + "loss": 0.9032, + "nll_loss": 0.8579923510551453, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.0853164792060852, + "rewards/margins": 0.062361061573028564, + "rewards/rejected": -0.14767754077911377, + "step": 7060 + }, + { + "epoch": 1.28, + "grad_norm": 1.6226599216461182, + "learning_rate": 3.941185034211675e-06, + "log_odds_chosen": 0.7453482151031494, + "log_odds_ratio": -0.5842022895812988, + "logits/chosen": -0.49188828468322754, + "logits/rejected": -0.4837633967399597, + "logps/chosen": -0.9727983474731445, + "logps/rejected": -1.4930089712142944, + "loss": 1.0248, + "nll_loss": 0.9663785696029663, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09727983176708221, + "rewards/margins": 0.05202106386423111, + "rewards/rejected": -0.14930090308189392, + "step": 7070 + }, + { + "epoch": 1.28, + "grad_norm": 1.3624190092086792, + "learning_rate": 3.935361770272237e-06, + "log_odds_chosen": 0.7438120245933533, + "log_odds_ratio": -0.610444188117981, + "logits/chosen": -0.43259668350219727, + "logits/rejected": -0.48162850737571716, + "logps/chosen": -0.9150593876838684, + "logps/rejected": -1.4560168981552124, + "loss": 0.9653, + "nll_loss": 0.9042099118232727, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09150593727827072, + "rewards/margins": 0.05409575253725052, + "rewards/rejected": -0.14560168981552124, + "step": 7080 + }, + { + "epoch": 1.28, + "grad_norm": 2.6767237186431885, + "learning_rate": 3.929538506332799e-06, + "log_odds_chosen": 0.8162752389907837, + "log_odds_ratio": -0.5413010716438293, + "logits/chosen": -0.4502388834953308, + "logits/rejected": -0.49949997663497925, + "logps/chosen": -0.9828441739082336, + "logps/rejected": -1.5386993885040283, + "loss": 0.9733, + "nll_loss": 0.9192169904708862, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09828442335128784, + "rewards/margins": 0.05558552220463753, + "rewards/rejected": -0.15386994183063507, + "step": 7090 + }, + { + "epoch": 1.28, + "grad_norm": 2.3011538982391357, + "learning_rate": 3.9237152423933615e-06, + "log_odds_chosen": 0.6587690114974976, + "log_odds_ratio": -0.5885307788848877, + "logits/chosen": -0.5728673338890076, + "logits/rejected": -0.5504066348075867, + "logps/chosen": -1.0062379837036133, + "logps/rejected": -1.490363597869873, + "loss": 1.0529, + "nll_loss": 0.9940397143363953, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10062380135059357, + "rewards/margins": 0.04841255396604538, + "rewards/rejected": -0.14903636276721954, + "step": 7100 + }, + { + "epoch": 1.28, + "grad_norm": 1.0612140893936157, + "learning_rate": 3.917891978453923e-06, + "log_odds_chosen": 0.6910644769668579, + "log_odds_ratio": -0.5820239782333374, + "logits/chosen": -0.43986526131629944, + "logits/rejected": -0.47137826681137085, + "logps/chosen": -0.9189141988754272, + "logps/rejected": -1.391150951385498, + "loss": 0.9652, + "nll_loss": 0.9069743156433105, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09189142286777496, + "rewards/margins": 0.04722367227077484, + "rewards/rejected": -0.1391150951385498, + "step": 7110 + }, + { + "epoch": 1.29, + "grad_norm": 1.8805290460586548, + "learning_rate": 3.912068714514485e-06, + "log_odds_chosen": 0.646868109703064, + "log_odds_ratio": -0.6137688159942627, + "logits/chosen": -0.45590490102767944, + "logits/rejected": -0.469409316778183, + "logps/chosen": -0.9701493978500366, + "logps/rejected": -1.424383521080017, + "loss": 1.0222, + "nll_loss": 0.9608350992202759, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09701494127511978, + "rewards/margins": 0.04542340710759163, + "rewards/rejected": -0.1424383670091629, + "step": 7120 + }, + { + "epoch": 1.29, + "grad_norm": 1.5245620012283325, + "learning_rate": 3.906245450575047e-06, + "log_odds_chosen": 1.120469331741333, + "log_odds_ratio": -0.47017669677734375, + "logits/chosen": -0.44969311356544495, + "logits/rejected": -0.45316869020462036, + "logps/chosen": -0.8377715349197388, + "logps/rejected": -1.6501190662384033, + "loss": 0.9008, + "nll_loss": 0.8537575006484985, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08377715200185776, + "rewards/margins": 0.08123474568128586, + "rewards/rejected": -0.16501189768314362, + "step": 7130 + }, + { + "epoch": 1.29, + "grad_norm": 1.402364730834961, + "learning_rate": 3.900422186635609e-06, + "log_odds_chosen": 0.48328810930252075, + "log_odds_ratio": -0.6531627774238586, + "logits/chosen": -0.4681634306907654, + "logits/rejected": -0.5032951235771179, + "logps/chosen": -0.9557656049728394, + "logps/rejected": -1.3002371788024902, + "loss": 1.0143, + "nll_loss": 0.9489529728889465, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09557655453681946, + "rewards/margins": 0.034447163343429565, + "rewards/rejected": -0.13002371788024902, + "step": 7140 + }, + { + "epoch": 1.29, + "grad_norm": 0.9971950650215149, + "learning_rate": 3.894598922696171e-06, + "log_odds_chosen": 0.8737391233444214, + "log_odds_ratio": -0.5511624217033386, + "logits/chosen": -0.418578565120697, + "logits/rejected": -0.45996397733688354, + "logps/chosen": -0.8235437273979187, + "logps/rejected": -1.4508552551269531, + "loss": 0.9171, + "nll_loss": 0.8619489669799805, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08235438168048859, + "rewards/margins": 0.06273116171360016, + "rewards/rejected": -0.14508552849292755, + "step": 7150 + }, + { + "epoch": 1.29, + "grad_norm": 1.293363332748413, + "learning_rate": 3.888775658756733e-06, + "log_odds_chosen": 0.6994448900222778, + "log_odds_ratio": -0.5278793573379517, + "logits/chosen": -0.46094202995300293, + "logits/rejected": -0.5015737414360046, + "logps/chosen": -0.8974512815475464, + "logps/rejected": -1.3594824075698853, + "loss": 0.9091, + "nll_loss": 0.8563462495803833, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08974512666463852, + "rewards/margins": 0.046203114092350006, + "rewards/rejected": -0.13594824075698853, + "step": 7160 + }, + { + "epoch": 1.3, + "grad_norm": 1.8436368703842163, + "learning_rate": 3.882952394817295e-06, + "log_odds_chosen": 0.8397830724716187, + "log_odds_ratio": -0.5210739374160767, + "logits/chosen": -0.475078284740448, + "logits/rejected": -0.4907829761505127, + "logps/chosen": -0.9214905500411987, + "logps/rejected": -1.4428390264511108, + "loss": 0.9722, + "nll_loss": 0.9201291799545288, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09214906394481659, + "rewards/margins": 0.05213485285639763, + "rewards/rejected": -0.14428392052650452, + "step": 7170 + }, + { + "epoch": 1.3, + "grad_norm": 1.1829735040664673, + "learning_rate": 3.877129130877857e-06, + "log_odds_chosen": 0.6583563089370728, + "log_odds_ratio": -0.5702995657920837, + "logits/chosen": -0.5196170210838318, + "logits/rejected": -0.5385341644287109, + "logps/chosen": -0.9533084034919739, + "logps/rejected": -1.4245009422302246, + "loss": 0.9988, + "nll_loss": 0.9417839050292969, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09533083438873291, + "rewards/margins": 0.04711926728487015, + "rewards/rejected": -0.14245007932186127, + "step": 7180 + }, + { + "epoch": 1.3, + "grad_norm": 4.115557670593262, + "learning_rate": 3.871305866938419e-06, + "log_odds_chosen": 0.8950411677360535, + "log_odds_ratio": -0.5178020000457764, + "logits/chosen": -0.5039768815040588, + "logits/rejected": -0.5022481083869934, + "logps/chosen": -0.9849491119384766, + "logps/rejected": -1.6610597372055054, + "loss": 1.0442, + "nll_loss": 0.9924230575561523, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09849490970373154, + "rewards/margins": 0.06761106103658676, + "rewards/rejected": -0.1661059856414795, + "step": 7190 + }, + { + "epoch": 1.3, + "grad_norm": 1.6129990816116333, + "learning_rate": 3.865482602998981e-06, + "log_odds_chosen": 0.977696418762207, + "log_odds_ratio": -0.5619850158691406, + "logits/chosen": -0.48574066162109375, + "logits/rejected": -0.5287076234817505, + "logps/chosen": -0.8981271982192993, + "logps/rejected": -1.5921859741210938, + "loss": 0.9912, + "nll_loss": 0.9349862933158875, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08981271088123322, + "rewards/margins": 0.06940589845180511, + "rewards/rejected": -0.15921860933303833, + "step": 7200 + }, + { + "epoch": 1.3, + "grad_norm": 0.7514549493789673, + "learning_rate": 3.859659339059543e-06, + "log_odds_chosen": 1.0510715246200562, + "log_odds_ratio": -0.4970950186252594, + "logits/chosen": -0.45134028792381287, + "logits/rejected": -0.48873621225357056, + "logps/chosen": -0.8820649981498718, + "logps/rejected": -1.5363123416900635, + "loss": 0.9205, + "nll_loss": 0.8707484006881714, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08820649981498718, + "rewards/margins": 0.06542472541332245, + "rewards/rejected": -0.15363122522830963, + "step": 7210 + }, + { + "epoch": 1.3, + "grad_norm": 1.2355408668518066, + "learning_rate": 3.853836075120104e-06, + "log_odds_chosen": 0.9166771769523621, + "log_odds_ratio": -0.5141069889068604, + "logits/chosen": -0.49904727935791016, + "logits/rejected": -0.5441471934318542, + "logps/chosen": -0.9830909967422485, + "logps/rejected": -1.62188720703125, + "loss": 1.0317, + "nll_loss": 0.9803188443183899, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09830910712480545, + "rewards/margins": 0.06387962400913239, + "rewards/rejected": -0.16218872368335724, + "step": 7220 + }, + { + "epoch": 1.31, + "grad_norm": 2.117945909500122, + "learning_rate": 3.8480128111806664e-06, + "log_odds_chosen": 0.984078049659729, + "log_odds_ratio": -0.44726839661598206, + "logits/chosen": -0.4131518006324768, + "logits/rejected": -0.4934825003147125, + "logps/chosen": -0.9141210317611694, + "logps/rejected": -1.603629469871521, + "loss": 0.9391, + "nll_loss": 0.8943251371383667, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.09141210466623306, + "rewards/margins": 0.06895085424184799, + "rewards/rejected": -0.16036295890808105, + "step": 7230 + }, + { + "epoch": 1.31, + "grad_norm": 1.4795246124267578, + "learning_rate": 3.842189547241229e-06, + "log_odds_chosen": 1.179312825202942, + "log_odds_ratio": -0.4878261983394623, + "logits/chosen": -0.415322482585907, + "logits/rejected": -0.4562760889530182, + "logps/chosen": -0.8761876821517944, + "logps/rejected": -1.7031415700912476, + "loss": 0.8902, + "nll_loss": 0.8413738012313843, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08761876821517944, + "rewards/margins": 0.08269539475440979, + "rewards/rejected": -0.17031416296958923, + "step": 7240 + }, + { + "epoch": 1.31, + "grad_norm": 1.564605474472046, + "learning_rate": 3.836366283301791e-06, + "log_odds_chosen": 0.7795203924179077, + "log_odds_ratio": -0.5619980096817017, + "logits/chosen": -0.4421234726905823, + "logits/rejected": -0.4827519357204437, + "logps/chosen": -0.926150918006897, + "logps/rejected": -1.481336236000061, + "loss": 0.9577, + "nll_loss": 0.901451587677002, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09261508285999298, + "rewards/margins": 0.05551854521036148, + "rewards/rejected": -0.14813363552093506, + "step": 7250 + }, + { + "epoch": 1.31, + "grad_norm": 1.023547887802124, + "learning_rate": 3.8305430193623525e-06, + "log_odds_chosen": 0.9588610529899597, + "log_odds_ratio": -0.4990147650241852, + "logits/chosen": -0.48034173250198364, + "logits/rejected": -0.5247659683227539, + "logps/chosen": -0.901258647441864, + "logps/rejected": -1.540440320968628, + "loss": 0.9929, + "nll_loss": 0.9430160522460938, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09012585878372192, + "rewards/margins": 0.06391817331314087, + "rewards/rejected": -0.1540440171957016, + "step": 7260 + }, + { + "epoch": 1.31, + "grad_norm": 0.862139105796814, + "learning_rate": 3.824719755422914e-06, + "log_odds_chosen": 0.7220529317855835, + "log_odds_ratio": -0.5099185109138489, + "logits/chosen": -0.4641505181789398, + "logits/rejected": -0.44089174270629883, + "logps/chosen": -0.9471620321273804, + "logps/rejected": -1.4334056377410889, + "loss": 0.8787, + "nll_loss": 0.8277288675308228, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09471620619297028, + "rewards/margins": 0.04862435534596443, + "rewards/rejected": -0.1433405578136444, + "step": 7270 + }, + { + "epoch": 1.32, + "grad_norm": 1.5787487030029297, + "learning_rate": 3.818896491483476e-06, + "log_odds_chosen": 1.0601729154586792, + "log_odds_ratio": -0.4927380681037903, + "logits/chosen": -0.444486141204834, + "logits/rejected": -0.43789857625961304, + "logps/chosen": -0.9262102842330933, + "logps/rejected": -1.684975028038025, + "loss": 0.9764, + "nll_loss": 0.9271078109741211, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09262104332447052, + "rewards/margins": 0.07587646692991257, + "rewards/rejected": -0.1684975028038025, + "step": 7280 + }, + { + "epoch": 1.32, + "grad_norm": 1.860785722732544, + "learning_rate": 3.813073227544038e-06, + "log_odds_chosen": 0.9285827875137329, + "log_odds_ratio": -0.5077587962150574, + "logits/chosen": -0.40383124351501465, + "logits/rejected": -0.4806482195854187, + "logps/chosen": -0.946780800819397, + "logps/rejected": -1.6201133728027344, + "loss": 0.949, + "nll_loss": 0.8982681035995483, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09467808157205582, + "rewards/margins": 0.06733326613903046, + "rewards/rejected": -0.16201135516166687, + "step": 7290 + }, + { + "epoch": 1.32, + "grad_norm": 4.042755603790283, + "learning_rate": 3.8072499636046e-06, + "log_odds_chosen": 0.7702414393424988, + "log_odds_ratio": -0.5221496820449829, + "logits/chosen": -0.4408508241176605, + "logits/rejected": -0.4835754930973053, + "logps/chosen": -0.860028088092804, + "logps/rejected": -1.3842127323150635, + "loss": 0.9616, + "nll_loss": 0.9093489646911621, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08600281178951263, + "rewards/margins": 0.05241847038269043, + "rewards/rejected": -0.13842126727104187, + "step": 7300 + }, + { + "epoch": 1.32, + "grad_norm": 1.9792332649230957, + "learning_rate": 3.8014266996651624e-06, + "log_odds_chosen": 1.1855796575546265, + "log_odds_ratio": -0.4438776969909668, + "logits/chosen": -0.3634795546531677, + "logits/rejected": -0.44674357771873474, + "logps/chosen": -0.7828920483589172, + "logps/rejected": -1.5813146829605103, + "loss": 0.8686, + "nll_loss": 0.8241745829582214, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.0782892033457756, + "rewards/margins": 0.07984226942062378, + "rewards/rejected": -0.1581314504146576, + "step": 7310 + }, + { + "epoch": 1.32, + "grad_norm": 1.8787983655929565, + "learning_rate": 3.795603435725724e-06, + "log_odds_chosen": 0.9159332513809204, + "log_odds_ratio": -0.5088370442390442, + "logits/chosen": -0.4633324146270752, + "logits/rejected": -0.49445921182632446, + "logps/chosen": -0.9663828015327454, + "logps/rejected": -1.586145043373108, + "loss": 0.9774, + "nll_loss": 0.9265311360359192, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0966382771730423, + "rewards/margins": 0.06197623163461685, + "rewards/rejected": -0.15861448645591736, + "step": 7320 + }, + { + "epoch": 1.32, + "grad_norm": 1.74463951587677, + "learning_rate": 3.7897801717862857e-06, + "log_odds_chosen": 0.9995774030685425, + "log_odds_ratio": -0.5128060579299927, + "logits/chosen": -0.43940553069114685, + "logits/rejected": -0.4527904987335205, + "logps/chosen": -0.899426281452179, + "logps/rejected": -1.5987919569015503, + "loss": 0.9602, + "nll_loss": 0.9089180827140808, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08994264900684357, + "rewards/margins": 0.06993655860424042, + "rewards/rejected": -0.15987920761108398, + "step": 7330 + }, + { + "epoch": 1.33, + "grad_norm": 1.7805492877960205, + "learning_rate": 3.783956907846848e-06, + "log_odds_chosen": 1.0778847932815552, + "log_odds_ratio": -0.4922080636024475, + "logits/chosen": -0.4460074305534363, + "logits/rejected": -0.48577815294265747, + "logps/chosen": -0.8337495923042297, + "logps/rejected": -1.5919334888458252, + "loss": 1.0109, + "nll_loss": 0.9617268443107605, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08337496221065521, + "rewards/margins": 0.07581837475299835, + "rewards/rejected": -0.15919332206249237, + "step": 7340 + }, + { + "epoch": 1.33, + "grad_norm": 1.7505688667297363, + "learning_rate": 3.77813364390741e-06, + "log_odds_chosen": 0.6708268523216248, + "log_odds_ratio": -0.6100478768348694, + "logits/chosen": -0.4991052746772766, + "logits/rejected": -0.5121651887893677, + "logps/chosen": -1.0308465957641602, + "logps/rejected": -1.5460246801376343, + "loss": 1.0535, + "nll_loss": 0.9924944043159485, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10308466851711273, + "rewards/margins": 0.05151781439781189, + "rewards/rejected": -0.15460246801376343, + "step": 7350 + }, + { + "epoch": 1.33, + "grad_norm": 1.5388883352279663, + "learning_rate": 3.7723103799679722e-06, + "log_odds_chosen": 0.7845932841300964, + "log_odds_ratio": -0.5693210363388062, + "logits/chosen": -0.4648071825504303, + "logits/rejected": -0.47867363691329956, + "logps/chosen": -0.9348169565200806, + "logps/rejected": -1.4865964651107788, + "loss": 0.9791, + "nll_loss": 0.9221285581588745, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09348170459270477, + "rewards/margins": 0.055177945643663406, + "rewards/rejected": -0.14865966141223907, + "step": 7360 + }, + { + "epoch": 1.33, + "grad_norm": 1.6391617059707642, + "learning_rate": 3.7664871160285337e-06, + "log_odds_chosen": 0.9377862215042114, + "log_odds_ratio": -0.5311064124107361, + "logits/chosen": -0.4713362753391266, + "logits/rejected": -0.48883286118507385, + "logps/chosen": -0.890163242816925, + "logps/rejected": -1.537257194519043, + "loss": 0.9828, + "nll_loss": 0.9296792149543762, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08901633322238922, + "rewards/margins": 0.0647093877196312, + "rewards/rejected": -0.15372571349143982, + "step": 7370 + }, + { + "epoch": 1.33, + "grad_norm": 1.7904300689697266, + "learning_rate": 3.7606638520890956e-06, + "log_odds_chosen": 0.9551296234130859, + "log_odds_ratio": -0.5431476831436157, + "logits/chosen": -0.47295600175857544, + "logits/rejected": -0.475273996591568, + "logps/chosen": -0.9500619173049927, + "logps/rejected": -1.6278291940689087, + "loss": 0.9819, + "nll_loss": 0.9276124238967896, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09500618278980255, + "rewards/margins": 0.06777672469615936, + "rewards/rejected": -0.16278290748596191, + "step": 7380 + }, + { + "epoch": 1.33, + "grad_norm": 1.3705064058303833, + "learning_rate": 3.754840588149658e-06, + "log_odds_chosen": 0.8254842758178711, + "log_odds_ratio": -0.5407508015632629, + "logits/chosen": -0.4489668011665344, + "logits/rejected": -0.4651058614253998, + "logps/chosen": -1.0021662712097168, + "logps/rejected": -1.5822681188583374, + "loss": 0.9814, + "nll_loss": 0.9273598790168762, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10021662712097168, + "rewards/margins": 0.05801018327474594, + "rewards/rejected": -0.15822681784629822, + "step": 7390 + }, + { + "epoch": 1.34, + "grad_norm": 1.5565133094787598, + "learning_rate": 3.7490173242102198e-06, + "log_odds_chosen": 0.780811071395874, + "log_odds_ratio": -0.5810168385505676, + "logits/chosen": -0.4458600580692291, + "logits/rejected": -0.47487956285476685, + "logps/chosen": -0.9416858553886414, + "logps/rejected": -1.492948293685913, + "loss": 0.9911, + "nll_loss": 0.9330151677131653, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09416858851909637, + "rewards/margins": 0.055126238614320755, + "rewards/rejected": -0.14929482340812683, + "step": 7400 + }, + { + "epoch": 1.34, + "grad_norm": 1.3818910121917725, + "learning_rate": 3.7431940602707812e-06, + "log_odds_chosen": 1.0065568685531616, + "log_odds_ratio": -0.5279130935668945, + "logits/chosen": -0.44704675674438477, + "logits/rejected": -0.45673441886901855, + "logps/chosen": -0.8805797696113586, + "logps/rejected": -1.5971896648406982, + "loss": 0.9482, + "nll_loss": 0.8954331278800964, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0880579799413681, + "rewards/margins": 0.07166098058223724, + "rewards/rejected": -0.15971896052360535, + "step": 7410 + }, + { + "epoch": 1.34, + "grad_norm": 1.7497045993804932, + "learning_rate": 3.7373707963313436e-06, + "log_odds_chosen": 0.8069828152656555, + "log_odds_ratio": -0.5470659136772156, + "logits/chosen": -0.41347962617874146, + "logits/rejected": -0.42065295577049255, + "logps/chosen": -0.8937679529190063, + "logps/rejected": -1.451737403869629, + "loss": 0.9324, + "nll_loss": 0.8776780962944031, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0893767923116684, + "rewards/margins": 0.05579695850610733, + "rewards/rejected": -0.14517375826835632, + "step": 7420 + }, + { + "epoch": 1.34, + "grad_norm": 2.3993964195251465, + "learning_rate": 3.7315475323919054e-06, + "log_odds_chosen": 0.6908426880836487, + "log_odds_ratio": -0.6134909391403198, + "logits/chosen": -0.4729135036468506, + "logits/rejected": -0.4644432067871094, + "logps/chosen": -1.006283164024353, + "logps/rejected": -1.5135084390640259, + "loss": 1.0261, + "nll_loss": 0.9647462964057922, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.1006283164024353, + "rewards/margins": 0.050722528249025345, + "rewards/rejected": -0.15135084092617035, + "step": 7430 + }, + { + "epoch": 1.34, + "grad_norm": 2.989941358566284, + "learning_rate": 3.7257242684524673e-06, + "log_odds_chosen": 1.004838228225708, + "log_odds_ratio": -0.4893857538700104, + "logits/chosen": -0.47101831436157227, + "logits/rejected": -0.47765296697616577, + "logps/chosen": -0.8820838928222656, + "logps/rejected": -1.536239504814148, + "loss": 0.8653, + "nll_loss": 0.8163647651672363, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0882083922624588, + "rewards/margins": 0.06541556119918823, + "rewards/rejected": -0.15362393856048584, + "step": 7440 + }, + { + "epoch": 1.35, + "grad_norm": 1.9193861484527588, + "learning_rate": 3.7199010045130296e-06, + "log_odds_chosen": 0.8586718440055847, + "log_odds_ratio": -0.5084580779075623, + "logits/chosen": -0.4453280568122864, + "logits/rejected": -0.45878106355667114, + "logps/chosen": -0.8870725631713867, + "logps/rejected": -1.4322541952133179, + "loss": 0.9746, + "nll_loss": 0.9237484931945801, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08870726078748703, + "rewards/margins": 0.05451815202832222, + "rewards/rejected": -0.14322540163993835, + "step": 7450 + }, + { + "epoch": 1.35, + "grad_norm": 1.2012253999710083, + "learning_rate": 3.714077740573591e-06, + "log_odds_chosen": 0.8122612237930298, + "log_odds_ratio": -0.5620238780975342, + "logits/chosen": -0.43501463532447815, + "logits/rejected": -0.45105236768722534, + "logps/chosen": -0.9191938638687134, + "logps/rejected": -1.440507173538208, + "loss": 0.9885, + "nll_loss": 0.9323150515556335, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09191938489675522, + "rewards/margins": 0.05213134363293648, + "rewards/rejected": -0.144050732254982, + "step": 7460 + }, + { + "epoch": 1.35, + "grad_norm": 1.6287994384765625, + "learning_rate": 3.7082544766341534e-06, + "log_odds_chosen": 0.8829139471054077, + "log_odds_ratio": -0.5368236899375916, + "logits/chosen": -0.46378859877586365, + "logits/rejected": -0.47032395005226135, + "logps/chosen": -1.0091739892959595, + "logps/rejected": -1.637733817100525, + "loss": 0.9935, + "nll_loss": 0.9397771954536438, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10091741383075714, + "rewards/margins": 0.06285597383975983, + "rewards/rejected": -0.16377338767051697, + "step": 7470 + }, + { + "epoch": 1.35, + "grad_norm": 1.6688724756240845, + "learning_rate": 3.7024312126947153e-06, + "log_odds_chosen": 0.9815562963485718, + "log_odds_ratio": -0.4945163130760193, + "logits/chosen": -0.4513029158115387, + "logits/rejected": -0.45173224806785583, + "logps/chosen": -0.9311221241950989, + "logps/rejected": -1.545288324356079, + "loss": 0.9696, + "nll_loss": 0.9201422929763794, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09311220794916153, + "rewards/margins": 0.0614166185259819, + "rewards/rejected": -0.15452882647514343, + "step": 7480 + }, + { + "epoch": 1.35, + "grad_norm": 1.660881519317627, + "learning_rate": 3.696607948755277e-06, + "log_odds_chosen": 1.0118606090545654, + "log_odds_ratio": -0.4933605194091797, + "logits/chosen": -0.4922306537628174, + "logits/rejected": -0.46402543783187866, + "logps/chosen": -0.8814403414726257, + "logps/rejected": -1.5738918781280518, + "loss": 0.8907, + "nll_loss": 0.841367244720459, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08814402669668198, + "rewards/margins": 0.06924516707658768, + "rewards/rejected": -0.15738919377326965, + "step": 7490 + }, + { + "epoch": 1.35, + "grad_norm": 0.7219038605690002, + "learning_rate": 3.690784684815839e-06, + "log_odds_chosen": 0.5756661891937256, + "log_odds_ratio": -0.6396831274032593, + "logits/chosen": -0.4645994305610657, + "logits/rejected": -0.48954564332962036, + "logps/chosen": -0.9166939854621887, + "logps/rejected": -1.3150080442428589, + "loss": 0.951, + "nll_loss": 0.887010931968689, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09166939556598663, + "rewards/margins": 0.03983139619231224, + "rewards/rejected": -0.13150081038475037, + "step": 7500 + }, + { + "epoch": 1.36, + "grad_norm": 1.2778137922286987, + "learning_rate": 3.684961420876401e-06, + "log_odds_chosen": 0.9894605875015259, + "log_odds_ratio": -0.5202837586402893, + "logits/chosen": -0.4389236569404602, + "logits/rejected": -0.4709576964378357, + "logps/chosen": -0.8941949009895325, + "logps/rejected": -1.6198762655258179, + "loss": 0.9531, + "nll_loss": 0.9010677337646484, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08941948413848877, + "rewards/margins": 0.0725681334733963, + "rewards/rejected": -0.16198763251304626, + "step": 7510 + }, + { + "epoch": 1.36, + "grad_norm": 2.6866061687469482, + "learning_rate": 3.679138156936963e-06, + "log_odds_chosen": 0.891405463218689, + "log_odds_ratio": -0.5774060487747192, + "logits/chosen": -0.4057556986808777, + "logits/rejected": -0.4247608184814453, + "logps/chosen": -0.9688380360603333, + "logps/rejected": -1.5794672966003418, + "loss": 0.9128, + "nll_loss": 0.8550981283187866, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09688380360603333, + "rewards/margins": 0.06106293946504593, + "rewards/rejected": -0.15794673562049866, + "step": 7520 + }, + { + "epoch": 1.36, + "grad_norm": 0.8768310546875, + "learning_rate": 3.673314892997525e-06, + "log_odds_chosen": 0.8042165637016296, + "log_odds_ratio": -0.5780975818634033, + "logits/chosen": -0.4777071475982666, + "logits/rejected": -0.4684552550315857, + "logps/chosen": -0.9875958561897278, + "logps/rejected": -1.5576165914535522, + "loss": 0.9796, + "nll_loss": 0.9217915534973145, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09875957667827606, + "rewards/margins": 0.057002078741788864, + "rewards/rejected": -0.15576167404651642, + "step": 7530 + }, + { + "epoch": 1.36, + "grad_norm": 0.6389343738555908, + "learning_rate": 3.667491629058087e-06, + "log_odds_chosen": 1.0354812145233154, + "log_odds_ratio": -0.518434464931488, + "logits/chosen": -0.45959019660949707, + "logits/rejected": -0.4531663954257965, + "logps/chosen": -0.8949222564697266, + "logps/rejected": -1.6369860172271729, + "loss": 0.917, + "nll_loss": 0.8651984930038452, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08949221670627594, + "rewards/margins": 0.07420636713504791, + "rewards/rejected": -0.16369858384132385, + "step": 7540 + }, + { + "epoch": 1.36, + "grad_norm": 0.9316779971122742, + "learning_rate": 3.6616683651186485e-06, + "log_odds_chosen": 0.6489259600639343, + "log_odds_ratio": -0.6223701238632202, + "logits/chosen": -0.4836137890815735, + "logits/rejected": -0.4736199975013733, + "logps/chosen": -0.9614918828010559, + "logps/rejected": -1.3946425914764404, + "loss": 1.0313, + "nll_loss": 0.9690502285957336, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09614919126033783, + "rewards/margins": 0.04331507161259651, + "rewards/rejected": -0.13946424424648285, + "step": 7550 + }, + { + "epoch": 1.37, + "grad_norm": 1.4206702709197998, + "learning_rate": 3.655845101179211e-06, + "log_odds_chosen": 1.0469117164611816, + "log_odds_ratio": -0.5183277726173401, + "logits/chosen": -0.4339308738708496, + "logits/rejected": -0.433371365070343, + "logps/chosen": -0.9971551895141602, + "logps/rejected": -1.678015112876892, + "loss": 1.036, + "nll_loss": 0.9841548204421997, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09971550107002258, + "rewards/margins": 0.06808601319789886, + "rewards/rejected": -0.16780151426792145, + "step": 7560 + }, + { + "epoch": 1.37, + "grad_norm": 1.2541790008544922, + "learning_rate": 3.6500218372397727e-06, + "log_odds_chosen": 0.9297458529472351, + "log_odds_ratio": -0.5026808977127075, + "logits/chosen": -0.407135009765625, + "logits/rejected": -0.4344724118709564, + "logps/chosen": -0.8167446851730347, + "logps/rejected": -1.452601671218872, + "loss": 0.8584, + "nll_loss": 0.8081638216972351, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08167446404695511, + "rewards/margins": 0.06358569860458374, + "rewards/rejected": -0.14526017010211945, + "step": 7570 + }, + { + "epoch": 1.37, + "grad_norm": 0.9388701915740967, + "learning_rate": 3.644198573300335e-06, + "log_odds_chosen": 0.7810580730438232, + "log_odds_ratio": -0.5430953502655029, + "logits/chosen": -0.5032345652580261, + "logits/rejected": -0.4799131453037262, + "logps/chosen": -0.8443433046340942, + "logps/rejected": -1.3685932159423828, + "loss": 0.9451, + "nll_loss": 0.8908378481864929, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08443433791399002, + "rewards/margins": 0.05242498964071274, + "rewards/rejected": -0.13685932755470276, + "step": 7580 + }, + { + "epoch": 1.37, + "grad_norm": 2.0603744983673096, + "learning_rate": 3.6383753093608965e-06, + "log_odds_chosen": 0.9048040509223938, + "log_odds_ratio": -0.5384871959686279, + "logits/chosen": -0.46712374687194824, + "logits/rejected": -0.46890658140182495, + "logps/chosen": -0.8679774403572083, + "logps/rejected": -1.3940092325210571, + "loss": 0.9477, + "nll_loss": 0.8938709497451782, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08679774403572083, + "rewards/margins": 0.05260317772626877, + "rewards/rejected": -0.1394009292125702, + "step": 7590 + }, + { + "epoch": 1.37, + "grad_norm": 1.5286774635314941, + "learning_rate": 3.6325520454214584e-06, + "log_odds_chosen": 1.362618088722229, + "log_odds_ratio": -0.4447413384914398, + "logits/chosen": -0.45346957445144653, + "logits/rejected": -0.4838164448738098, + "logps/chosen": -0.9041000604629517, + "logps/rejected": -1.8852291107177734, + "loss": 0.8869, + "nll_loss": 0.842419445514679, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09040998667478561, + "rewards/margins": 0.09811293333768845, + "rewards/rejected": -0.18852293491363525, + "step": 7600 + }, + { + "epoch": 1.37, + "grad_norm": 0.9117116332054138, + "learning_rate": 3.6267287814820207e-06, + "log_odds_chosen": 0.7849031686782837, + "log_odds_ratio": -0.5902391076087952, + "logits/chosen": -0.4828110635280609, + "logits/rejected": -0.45946455001831055, + "logps/chosen": -0.8875317573547363, + "logps/rejected": -1.431365728378296, + "loss": 0.9272, + "nll_loss": 0.8681669235229492, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.08875317126512527, + "rewards/margins": 0.05438339710235596, + "rewards/rejected": -0.14313657581806183, + "step": 7610 + }, + { + "epoch": 1.38, + "grad_norm": 1.7428468465805054, + "learning_rate": 3.6209055175425826e-06, + "log_odds_chosen": 0.7643271684646606, + "log_odds_ratio": -0.6414823532104492, + "logits/chosen": -0.48790162801742554, + "logits/rejected": -0.4671143591403961, + "logps/chosen": -0.942896842956543, + "logps/rejected": -1.4927890300750732, + "loss": 1.0018, + "nll_loss": 0.9376304745674133, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09428969025611877, + "rewards/margins": 0.05498921126127243, + "rewards/rejected": -0.1492789089679718, + "step": 7620 + }, + { + "epoch": 1.38, + "grad_norm": 1.4223955869674683, + "learning_rate": 3.6150822536031444e-06, + "log_odds_chosen": 0.9342612028121948, + "log_odds_ratio": -0.5149013996124268, + "logits/chosen": -0.4245794713497162, + "logits/rejected": -0.42594870924949646, + "logps/chosen": -0.8617804646492004, + "logps/rejected": -1.5337378978729248, + "loss": 0.9272, + "nll_loss": 0.875745415687561, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08617803454399109, + "rewards/margins": 0.06719574332237244, + "rewards/rejected": -0.1533738076686859, + "step": 7630 + }, + { + "epoch": 1.38, + "grad_norm": 2.223407745361328, + "learning_rate": 3.6092589896637063e-06, + "log_odds_chosen": 1.340857744216919, + "log_odds_ratio": -0.4616336226463318, + "logits/chosen": -0.4703386425971985, + "logits/rejected": -0.5048006176948547, + "logps/chosen": -0.846632182598114, + "logps/rejected": -1.7307153940200806, + "loss": 0.9162, + "nll_loss": 0.8700374364852905, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08466322720050812, + "rewards/margins": 0.08840831369161606, + "rewards/rejected": -0.17307154834270477, + "step": 7640 + }, + { + "epoch": 1.38, + "grad_norm": 0.9719432592391968, + "learning_rate": 3.603435725724268e-06, + "log_odds_chosen": 0.9791809916496277, + "log_odds_ratio": -0.5275226831436157, + "logits/chosen": -0.4798418879508972, + "logits/rejected": -0.4937385022640228, + "logps/chosen": -0.9020121693611145, + "logps/rejected": -1.598841905593872, + "loss": 0.9018, + "nll_loss": 0.8490481376647949, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09020121395587921, + "rewards/margins": 0.06968297064304352, + "rewards/rejected": -0.15988418459892273, + "step": 7650 + }, + { + "epoch": 1.38, + "grad_norm": 1.381014108657837, + "learning_rate": 3.59761246178483e-06, + "log_odds_chosen": 0.7422740459442139, + "log_odds_ratio": -0.529722273349762, + "logits/chosen": -0.4818175435066223, + "logits/rejected": -0.4888008236885071, + "logps/chosen": -1.0153032541275024, + "logps/rejected": -1.5096291303634644, + "loss": 0.9453, + "nll_loss": 0.8923282623291016, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10153033584356308, + "rewards/margins": 0.04943258687853813, + "rewards/rejected": -0.15096290409564972, + "step": 7660 + }, + { + "epoch": 1.39, + "grad_norm": 2.111315965652466, + "learning_rate": 3.5917891978453924e-06, + "log_odds_chosen": 0.7107936143875122, + "log_odds_ratio": -0.5873027443885803, + "logits/chosen": -0.4801076352596283, + "logits/rejected": -0.4742940366268158, + "logps/chosen": -0.9502488374710083, + "logps/rejected": -1.4334582090377808, + "loss": 0.9856, + "nll_loss": 0.9268776774406433, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09502488374710083, + "rewards/margins": 0.048320937901735306, + "rewards/rejected": -0.14334581792354584, + "step": 7670 + }, + { + "epoch": 1.39, + "grad_norm": 1.6548705101013184, + "learning_rate": 3.585965933905954e-06, + "log_odds_chosen": 1.0609567165374756, + "log_odds_ratio": -0.47992807626724243, + "logits/chosen": -0.47569042444229126, + "logits/rejected": -0.4778749346733093, + "logps/chosen": -0.8274517059326172, + "logps/rejected": -1.529815912246704, + "loss": 0.8857, + "nll_loss": 0.8376930356025696, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08274517953395844, + "rewards/margins": 0.07023642957210541, + "rewards/rejected": -0.15298160910606384, + "step": 7680 + }, + { + "epoch": 1.39, + "grad_norm": 0.842616856098175, + "learning_rate": 3.580142669966516e-06, + "log_odds_chosen": 0.9939400553703308, + "log_odds_ratio": -0.5352992415428162, + "logits/chosen": -0.4965585768222809, + "logits/rejected": -0.4912574291229248, + "logps/chosen": -0.9213212132453918, + "logps/rejected": -1.6018670797348022, + "loss": 0.9687, + "nll_loss": 0.9151374697685242, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09213212877511978, + "rewards/margins": 0.06805459409952164, + "rewards/rejected": -0.16018672287464142, + "step": 7690 + }, + { + "epoch": 1.39, + "grad_norm": 1.7768384218215942, + "learning_rate": 3.574319406027078e-06, + "log_odds_chosen": 0.7124654054641724, + "log_odds_ratio": -0.5752750039100647, + "logits/chosen": -0.46104294061660767, + "logits/rejected": -0.4451174736022949, + "logps/chosen": -0.9411187171936035, + "logps/rejected": -1.430299997329712, + "loss": 0.9115, + "nll_loss": 0.8539952039718628, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09411187469959259, + "rewards/margins": 0.048918113112449646, + "rewards/rejected": -0.14302998781204224, + "step": 7700 + }, + { + "epoch": 1.39, + "grad_norm": 1.1206591129302979, + "learning_rate": 3.56849614208764e-06, + "log_odds_chosen": 1.004553198814392, + "log_odds_ratio": -0.5533329248428345, + "logits/chosen": -0.41821640729904175, + "logits/rejected": -0.4664444029331207, + "logps/chosen": -0.9269332885742188, + "logps/rejected": -1.6279127597808838, + "loss": 0.9, + "nll_loss": 0.844649612903595, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09269334375858307, + "rewards/margins": 0.07009793072938919, + "rewards/rejected": -0.16279128193855286, + "step": 7710 + }, + { + "epoch": 1.39, + "grad_norm": 1.933883547782898, + "learning_rate": 3.5626728781482023e-06, + "log_odds_chosen": 0.9759089350700378, + "log_odds_ratio": -0.4841943383216858, + "logits/chosen": -0.457724392414093, + "logits/rejected": -0.4634011387825012, + "logps/chosen": -0.8931747674942017, + "logps/rejected": -1.5288536548614502, + "loss": 0.9143, + "nll_loss": 0.8658315539360046, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08931747823953629, + "rewards/margins": 0.0635678842663765, + "rewards/rejected": -0.1528853476047516, + "step": 7720 + }, + { + "epoch": 1.4, + "grad_norm": 1.167686939239502, + "learning_rate": 3.5568496142087637e-06, + "log_odds_chosen": 1.0014417171478271, + "log_odds_ratio": -0.48971033096313477, + "logits/chosen": -0.449578195810318, + "logits/rejected": -0.4728933870792389, + "logps/chosen": -0.8632867932319641, + "logps/rejected": -1.5455224514007568, + "loss": 0.9463, + "nll_loss": 0.8973382115364075, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08632868528366089, + "rewards/margins": 0.06822358071804047, + "rewards/rejected": -0.15455226600170135, + "step": 7730 + }, + { + "epoch": 1.4, + "grad_norm": 0.8326305150985718, + "learning_rate": 3.5510263502693256e-06, + "log_odds_chosen": 0.9664969444274902, + "log_odds_ratio": -0.48367372155189514, + "logits/chosen": -0.47377943992614746, + "logits/rejected": -0.511856734752655, + "logps/chosen": -0.9200240969657898, + "logps/rejected": -1.578240156173706, + "loss": 0.9693, + "nll_loss": 0.9209035038948059, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09200242906808853, + "rewards/margins": 0.06582160294055939, + "rewards/rejected": -0.15782400965690613, + "step": 7740 + }, + { + "epoch": 1.4, + "grad_norm": 1.0749437808990479, + "learning_rate": 3.545203086329888e-06, + "log_odds_chosen": 0.525017499923706, + "log_odds_ratio": -0.5907121896743774, + "logits/chosen": -0.5242208242416382, + "logits/rejected": -0.5184580087661743, + "logps/chosen": -0.9486880302429199, + "logps/rejected": -1.2981536388397217, + "loss": 0.969, + "nll_loss": 0.9098953008651733, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09486880898475647, + "rewards/margins": 0.03494657203555107, + "rewards/rejected": -0.12981536984443665, + "step": 7750 + }, + { + "epoch": 1.4, + "grad_norm": 1.736507773399353, + "learning_rate": 3.53937982239045e-06, + "log_odds_chosen": 0.7098814249038696, + "log_odds_ratio": -0.6239954829216003, + "logits/chosen": -0.48016709089279175, + "logits/rejected": -0.48714813590049744, + "logps/chosen": -1.0151746273040771, + "logps/rejected": -1.536481499671936, + "loss": 1.0214, + "nll_loss": 0.9589971303939819, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1015174612402916, + "rewards/margins": 0.05213068798184395, + "rewards/rejected": -0.15364815294742584, + "step": 7760 + }, + { + "epoch": 1.4, + "grad_norm": 1.0307977199554443, + "learning_rate": 3.5335565584510113e-06, + "log_odds_chosen": 1.121014952659607, + "log_odds_ratio": -0.5014979243278503, + "logits/chosen": -0.39459139108657837, + "logits/rejected": -0.4565156400203705, + "logps/chosen": -0.8604658246040344, + "logps/rejected": -1.6737045049667358, + "loss": 0.8733, + "nll_loss": 0.823115348815918, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08604658395051956, + "rewards/margins": 0.08132388442754745, + "rewards/rejected": -0.16737046837806702, + "step": 7770 + }, + { + "epoch": 1.41, + "grad_norm": 1.0171877145767212, + "learning_rate": 3.5277332945115736e-06, + "log_odds_chosen": 0.9733405113220215, + "log_odds_ratio": -0.4627884328365326, + "logits/chosen": -0.38336968421936035, + "logits/rejected": -0.4400951862335205, + "logps/chosen": -0.9231967926025391, + "logps/rejected": -1.585914969444275, + "loss": 0.9007, + "nll_loss": 0.8543741106987, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.09231968224048615, + "rewards/margins": 0.0662718415260315, + "rewards/rejected": -0.15859152376651764, + "step": 7780 + }, + { + "epoch": 1.41, + "grad_norm": 0.9106153845787048, + "learning_rate": 3.5219100305721355e-06, + "log_odds_chosen": 1.0205250978469849, + "log_odds_ratio": -0.5149970054626465, + "logits/chosen": -0.4142359793186188, + "logits/rejected": -0.411867618560791, + "logps/chosen": -0.8714788556098938, + "logps/rejected": -1.5835577249526978, + "loss": 0.9246, + "nll_loss": 0.8730686902999878, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08714788407087326, + "rewards/margins": 0.07120787352323532, + "rewards/rejected": -0.15835575759410858, + "step": 7790 + }, + { + "epoch": 1.41, + "grad_norm": 1.4677820205688477, + "learning_rate": 3.5160867666326978e-06, + "log_odds_chosen": 0.8735268712043762, + "log_odds_ratio": -0.5531715154647827, + "logits/chosen": -0.48417219519615173, + "logits/rejected": -0.4957372546195984, + "logps/chosen": -0.9382551312446594, + "logps/rejected": -1.514022946357727, + "loss": 1.0231, + "nll_loss": 0.9677915573120117, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09382550418376923, + "rewards/margins": 0.057576775550842285, + "rewards/rejected": -0.1514022946357727, + "step": 7800 + }, + { + "epoch": 1.41, + "grad_norm": 3.8200325965881348, + "learning_rate": 3.5102635026932592e-06, + "log_odds_chosen": 1.2385016679763794, + "log_odds_ratio": -0.4712151885032654, + "logits/chosen": -0.42320528626441956, + "logits/rejected": -0.48704639077186584, + "logps/chosen": -0.851686954498291, + "logps/rejected": -1.7134358882904053, + "loss": 0.889, + "nll_loss": 0.8419039845466614, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08516870439052582, + "rewards/margins": 0.08617488294839859, + "rewards/rejected": -0.171343594789505, + "step": 7810 + }, + { + "epoch": 1.41, + "grad_norm": 1.053245186805725, + "learning_rate": 3.504440238753821e-06, + "log_odds_chosen": 0.9881025552749634, + "log_odds_ratio": -0.5334082841873169, + "logits/chosen": -0.4514409601688385, + "logits/rejected": -0.4625988006591797, + "logps/chosen": -0.8842605352401733, + "logps/rejected": -1.5331977605819702, + "loss": 0.9572, + "nll_loss": 0.9038169980049133, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08842606842517853, + "rewards/margins": 0.06489372253417969, + "rewards/rejected": -0.15331977605819702, + "step": 7820 + }, + { + "epoch": 1.41, + "grad_norm": 2.9712746143341064, + "learning_rate": 3.4986169748143834e-06, + "log_odds_chosen": 1.1356464624404907, + "log_odds_ratio": -0.450702428817749, + "logits/chosen": -0.4527707099914551, + "logits/rejected": -0.48675090074539185, + "logps/chosen": -0.8333941698074341, + "logps/rejected": -1.6239534616470337, + "loss": 0.8964, + "nll_loss": 0.851354718208313, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08333941549062729, + "rewards/margins": 0.07905593514442444, + "rewards/rejected": -0.16239535808563232, + "step": 7830 + }, + { + "epoch": 1.42, + "grad_norm": 0.9804763197898865, + "learning_rate": 3.4927937108749453e-06, + "log_odds_chosen": 0.8883824348449707, + "log_odds_ratio": -0.5219795107841492, + "logits/chosen": -0.45694953203201294, + "logits/rejected": -0.46481671929359436, + "logps/chosen": -0.9310160875320435, + "logps/rejected": -1.554607629776001, + "loss": 0.943, + "nll_loss": 0.8907995223999023, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0931016057729721, + "rewards/margins": 0.06235916540026665, + "rewards/rejected": -0.15546075999736786, + "step": 7840 + }, + { + "epoch": 1.42, + "grad_norm": 1.1108657121658325, + "learning_rate": 3.486970446935507e-06, + "log_odds_chosen": 0.9498538970947266, + "log_odds_ratio": -0.5386672616004944, + "logits/chosen": -0.4198557734489441, + "logits/rejected": -0.4490021765232086, + "logps/chosen": -0.8262109756469727, + "logps/rejected": -1.4365177154541016, + "loss": 0.8896, + "nll_loss": 0.8356889486312866, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08262109756469727, + "rewards/margins": 0.06103065609931946, + "rewards/rejected": -0.14365175366401672, + "step": 7850 + }, + { + "epoch": 1.42, + "grad_norm": 0.9033617973327637, + "learning_rate": 3.481147182996069e-06, + "log_odds_chosen": 1.0498912334442139, + "log_odds_ratio": -0.5304676294326782, + "logits/chosen": -0.4416961669921875, + "logits/rejected": -0.47163066267967224, + "logps/chosen": -0.8820828199386597, + "logps/rejected": -1.591942548751831, + "loss": 0.9277, + "nll_loss": 0.874676525592804, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08820828795433044, + "rewards/margins": 0.07098597288131714, + "rewards/rejected": -0.15919426083564758, + "step": 7860 + }, + { + "epoch": 1.42, + "grad_norm": 1.2365633249282837, + "learning_rate": 3.475323919056631e-06, + "log_odds_chosen": 0.8197916746139526, + "log_odds_ratio": -0.5315700769424438, + "logits/chosen": -0.4701627790927887, + "logits/rejected": -0.5030553340911865, + "logps/chosen": -0.9004766345024109, + "logps/rejected": -1.4492244720458984, + "loss": 1.0026, + "nll_loss": 0.9494752883911133, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0900476723909378, + "rewards/margins": 0.05487479642033577, + "rewards/rejected": -0.14492245018482208, + "step": 7870 + }, + { + "epoch": 1.42, + "grad_norm": 0.8577108979225159, + "learning_rate": 3.469500655117193e-06, + "log_odds_chosen": 1.1933759450912476, + "log_odds_ratio": -0.4663127064704895, + "logits/chosen": -0.45539942383766174, + "logits/rejected": -0.4741978049278259, + "logps/chosen": -0.8282696008682251, + "logps/rejected": -1.6477653980255127, + "loss": 0.9535, + "nll_loss": 0.9068788290023804, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08282694965600967, + "rewards/margins": 0.08194959908723831, + "rewards/rejected": -0.16477656364440918, + "step": 7880 + }, + { + "epoch": 1.43, + "grad_norm": 1.227028489112854, + "learning_rate": 3.463677391177755e-06, + "log_odds_chosen": 0.8997465372085571, + "log_odds_ratio": -0.5082308650016785, + "logits/chosen": -0.46594834327697754, + "logits/rejected": -0.5129804611206055, + "logps/chosen": -0.9946556091308594, + "logps/rejected": -1.6416242122650146, + "loss": 1.0179, + "nll_loss": 0.9670284390449524, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09946557134389877, + "rewards/margins": 0.06469685584306717, + "rewards/rejected": -0.16416242718696594, + "step": 7890 + }, + { + "epoch": 1.43, + "grad_norm": 2.529578447341919, + "learning_rate": 3.4578541272383166e-06, + "log_odds_chosen": 0.9256827235221863, + "log_odds_ratio": -0.5562437772750854, + "logits/chosen": -0.48142653703689575, + "logits/rejected": -0.4927385449409485, + "logps/chosen": -0.8803023099899292, + "logps/rejected": -1.54433274269104, + "loss": 0.9602, + "nll_loss": 0.9045834541320801, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08803024142980576, + "rewards/margins": 0.06640304625034332, + "rewards/rejected": -0.15443329513072968, + "step": 7900 + }, + { + "epoch": 1.43, + "grad_norm": 1.199773907661438, + "learning_rate": 3.452030863298879e-06, + "log_odds_chosen": 0.9792470932006836, + "log_odds_ratio": -0.49643927812576294, + "logits/chosen": -0.4774385392665863, + "logits/rejected": -0.4975582957267761, + "logps/chosen": -0.8605278730392456, + "logps/rejected": -1.5568548440933228, + "loss": 0.9923, + "nll_loss": 0.9426447153091431, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0860527977347374, + "rewards/margins": 0.06963268667459488, + "rewards/rejected": -0.15568546950817108, + "step": 7910 + }, + { + "epoch": 1.43, + "grad_norm": 1.121840238571167, + "learning_rate": 3.446207599359441e-06, + "log_odds_chosen": 0.9563971757888794, + "log_odds_ratio": -0.5624344944953918, + "logits/chosen": -0.44062572717666626, + "logits/rejected": -0.4463469982147217, + "logps/chosen": -0.956190288066864, + "logps/rejected": -1.6515365839004517, + "loss": 0.9733, + "nll_loss": 0.9170805811882019, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09561903029680252, + "rewards/margins": 0.06953462213277817, + "rewards/rejected": -0.1651536524295807, + "step": 7920 + }, + { + "epoch": 1.43, + "grad_norm": 1.251677393913269, + "learning_rate": 3.4403843354200027e-06, + "log_odds_chosen": 0.9738191366195679, + "log_odds_ratio": -0.49310868978500366, + "logits/chosen": -0.446247398853302, + "logits/rejected": -0.5060548782348633, + "logps/chosen": -1.005068063735962, + "logps/rejected": -1.673824667930603, + "loss": 0.9543, + "nll_loss": 0.9050275683403015, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.10050680488348007, + "rewards/margins": 0.0668756514787674, + "rewards/rejected": -0.16738246381282806, + "step": 7930 + }, + { + "epoch": 1.43, + "grad_norm": 0.9788984060287476, + "learning_rate": 3.434561071480565e-06, + "log_odds_chosen": 0.7685114145278931, + "log_odds_ratio": -0.6038535833358765, + "logits/chosen": -0.45945462584495544, + "logits/rejected": -0.4910176694393158, + "logps/chosen": -1.03463613986969, + "logps/rejected": -1.5275377035140991, + "loss": 1.0876, + "nll_loss": 1.0272212028503418, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10346361249685287, + "rewards/margins": 0.049290161579847336, + "rewards/rejected": -0.15275375545024872, + "step": 7940 + }, + { + "epoch": 1.44, + "grad_norm": 2.345649480819702, + "learning_rate": 3.4287378075411265e-06, + "log_odds_chosen": 0.7906461954116821, + "log_odds_ratio": -0.5344957113265991, + "logits/chosen": -0.49102646112442017, + "logits/rejected": -0.48199811577796936, + "logps/chosen": -1.0372166633605957, + "logps/rejected": -1.5727733373641968, + "loss": 0.9831, + "nll_loss": 0.9296104311943054, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10372166335582733, + "rewards/margins": 0.05355566740036011, + "rewards/rejected": -0.15727733075618744, + "step": 7950 + }, + { + "epoch": 1.44, + "grad_norm": 3.018699884414673, + "learning_rate": 3.4229145436016884e-06, + "log_odds_chosen": 0.9407013058662415, + "log_odds_ratio": -0.5151436924934387, + "logits/chosen": -0.4773307740688324, + "logits/rejected": -0.5138453841209412, + "logps/chosen": -0.9273196458816528, + "logps/rejected": -1.6137325763702393, + "loss": 0.9307, + "nll_loss": 0.8791698217391968, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09273196011781693, + "rewards/margins": 0.0686412900686264, + "rewards/rejected": -0.16137325763702393, + "step": 7960 + }, + { + "epoch": 1.44, + "grad_norm": 1.5302883386611938, + "learning_rate": 3.4170912796622507e-06, + "log_odds_chosen": 1.0637813806533813, + "log_odds_ratio": -0.48986929655075073, + "logits/chosen": -0.47258901596069336, + "logits/rejected": -0.49703383445739746, + "logps/chosen": -0.8885113000869751, + "logps/rejected": -1.644118309020996, + "loss": 0.9679, + "nll_loss": 0.9189218282699585, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08885113149881363, + "rewards/margins": 0.07556071877479553, + "rewards/rejected": -0.16441184282302856, + "step": 7970 + }, + { + "epoch": 1.44, + "grad_norm": 1.6340556144714355, + "learning_rate": 3.4112680157228126e-06, + "log_odds_chosen": 0.9519853591918945, + "log_odds_ratio": -0.5761106014251709, + "logits/chosen": -0.4162136912345886, + "logits/rejected": -0.44225525856018066, + "logps/chosen": -0.899207592010498, + "logps/rejected": -1.5692665576934814, + "loss": 0.8714, + "nll_loss": 0.8137787580490112, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0899207666516304, + "rewards/margins": 0.06700590997934341, + "rewards/rejected": -0.15692667663097382, + "step": 7980 + }, + { + "epoch": 1.44, + "grad_norm": 1.172593116760254, + "learning_rate": 3.4054447517833745e-06, + "log_odds_chosen": 0.8323512077331543, + "log_odds_ratio": -0.5102376937866211, + "logits/chosen": -0.4562395513057709, + "logits/rejected": -0.49356168508529663, + "logps/chosen": -0.9360699653625488, + "logps/rejected": -1.473852276802063, + "loss": 0.9427, + "nll_loss": 0.8917063474655151, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09360699355602264, + "rewards/margins": 0.05377823859453201, + "rewards/rejected": -0.14738522469997406, + "step": 7990 + }, + { + "epoch": 1.45, + "grad_norm": 1.285165548324585, + "learning_rate": 3.3996214878439363e-06, + "log_odds_chosen": 0.9385267496109009, + "log_odds_ratio": -0.5298964381217957, + "logits/chosen": -0.44690507650375366, + "logits/rejected": -0.4662812650203705, + "logps/chosen": -0.8296586275100708, + "logps/rejected": -1.4835478067398071, + "loss": 0.9901, + "nll_loss": 0.9370955228805542, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08296586573123932, + "rewards/margins": 0.06538892537355423, + "rewards/rejected": -0.14835476875305176, + "step": 8000 + }, + { + "epoch": 1.45, + "grad_norm": 1.7378290891647339, + "learning_rate": 3.3937982239044982e-06, + "log_odds_chosen": 0.8786460161209106, + "log_odds_ratio": -0.5547272562980652, + "logits/chosen": -0.5031970739364624, + "logits/rejected": -0.4877137243747711, + "logps/chosen": -0.9259228706359863, + "logps/rejected": -1.5466254949569702, + "loss": 1.0045, + "nll_loss": 0.9490045309066772, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09259229898452759, + "rewards/margins": 0.062070250511169434, + "rewards/rejected": -0.15466253459453583, + "step": 8010 + }, + { + "epoch": 1.45, + "grad_norm": 1.2688935995101929, + "learning_rate": 3.3879749599650605e-06, + "log_odds_chosen": 0.7960363030433655, + "log_odds_ratio": -0.5690153241157532, + "logits/chosen": -0.4590454697608948, + "logits/rejected": -0.4879334568977356, + "logps/chosen": -0.8963125348091125, + "logps/rejected": -1.4358001947402954, + "loss": 1.0057, + "nll_loss": 0.9488385915756226, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08963125199079514, + "rewards/margins": 0.053948771208524704, + "rewards/rejected": -0.14358003437519073, + "step": 8020 + }, + { + "epoch": 1.45, + "grad_norm": 1.5005302429199219, + "learning_rate": 3.3821516960256224e-06, + "log_odds_chosen": 0.8584071397781372, + "log_odds_ratio": -0.5413273572921753, + "logits/chosen": -0.47158876061439514, + "logits/rejected": -0.49479609727859497, + "logps/chosen": -0.9288450479507446, + "logps/rejected": -1.5227404832839966, + "loss": 0.9851, + "nll_loss": 0.9309417605400085, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09288450330495834, + "rewards/margins": 0.05938952416181564, + "rewards/rejected": -0.15227404236793518, + "step": 8030 + }, + { + "epoch": 1.45, + "grad_norm": 1.1802349090576172, + "learning_rate": 3.376328432086184e-06, + "log_odds_chosen": 1.0664393901824951, + "log_odds_ratio": -0.5075581669807434, + "logits/chosen": -0.423412561416626, + "logits/rejected": -0.44078055024147034, + "logps/chosen": -0.8771320581436157, + "logps/rejected": -1.672170639038086, + "loss": 0.8723, + "nll_loss": 0.8215177655220032, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08771320432424545, + "rewards/margins": 0.0795038640499115, + "rewards/rejected": -0.16721707582473755, + "step": 8040 + }, + { + "epoch": 1.45, + "grad_norm": 3.1907968521118164, + "learning_rate": 3.370505168146746e-06, + "log_odds_chosen": 0.9271900057792664, + "log_odds_ratio": -0.5006861686706543, + "logits/chosen": -0.45901423692703247, + "logits/rejected": -0.45146292448043823, + "logps/chosen": -1.0008279085159302, + "logps/rejected": -1.6424148082733154, + "loss": 0.9053, + "nll_loss": 0.8551861047744751, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10008279979228973, + "rewards/margins": 0.06415869295597076, + "rewards/rejected": -0.1642414927482605, + "step": 8050 + }, + { + "epoch": 1.46, + "grad_norm": 0.8206511735916138, + "learning_rate": 3.364681904207308e-06, + "log_odds_chosen": 1.1431403160095215, + "log_odds_ratio": -0.47963443398475647, + "logits/chosen": -0.4267405569553375, + "logits/rejected": -0.4710633158683777, + "logps/chosen": -0.9099753499031067, + "logps/rejected": -1.7252388000488281, + "loss": 0.9361, + "nll_loss": 0.8881762623786926, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09099753946065903, + "rewards/margins": 0.08152634650468826, + "rewards/rejected": -0.1725238859653473, + "step": 8060 + }, + { + "epoch": 1.46, + "grad_norm": 1.217850685119629, + "learning_rate": 3.35885864026787e-06, + "log_odds_chosen": 1.0072505474090576, + "log_odds_ratio": -0.5082023739814758, + "logits/chosen": -0.423494815826416, + "logits/rejected": -0.4322236180305481, + "logps/chosen": -0.885513186454773, + "logps/rejected": -1.5974544286727905, + "loss": 0.9448, + "nll_loss": 0.8939436674118042, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08855132758617401, + "rewards/margins": 0.071194127202034, + "rewards/rejected": -0.15974543988704681, + "step": 8070 + }, + { + "epoch": 1.46, + "grad_norm": 1.512110948562622, + "learning_rate": 3.353035376328432e-06, + "log_odds_chosen": 0.8098253011703491, + "log_odds_ratio": -0.5478814840316772, + "logits/chosen": -0.4838322103023529, + "logits/rejected": -0.4989416003227234, + "logps/chosen": -0.8810871243476868, + "logps/rejected": -1.42708420753479, + "loss": 0.9925, + "nll_loss": 0.9376763105392456, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08810871094465256, + "rewards/margins": 0.05459970235824585, + "rewards/rejected": -0.142708420753479, + "step": 8080 + }, + { + "epoch": 1.46, + "grad_norm": 1.6474279165267944, + "learning_rate": 3.3472121123889937e-06, + "log_odds_chosen": 1.0930445194244385, + "log_odds_ratio": -0.46995505690574646, + "logits/chosen": -0.4111822545528412, + "logits/rejected": -0.45124974846839905, + "logps/chosen": -0.8773530125617981, + "logps/rejected": -1.6252126693725586, + "loss": 0.9204, + "nll_loss": 0.8734337687492371, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08773529529571533, + "rewards/margins": 0.07478597015142441, + "rewards/rejected": -0.16252127289772034, + "step": 8090 + }, + { + "epoch": 1.46, + "grad_norm": 1.075158953666687, + "learning_rate": 3.341388848449556e-06, + "log_odds_chosen": 0.9450801610946655, + "log_odds_ratio": -0.5371429920196533, + "logits/chosen": -0.4427841305732727, + "logits/rejected": -0.4451626241207123, + "logps/chosen": -0.9524284601211548, + "logps/rejected": -1.6333316564559937, + "loss": 0.8849, + "nll_loss": 0.8311794996261597, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09524284303188324, + "rewards/margins": 0.06809031963348389, + "rewards/rejected": -0.16333314776420593, + "step": 8100 + }, + { + "epoch": 1.46, + "grad_norm": 0.8650028705596924, + "learning_rate": 3.335565584510118e-06, + "log_odds_chosen": 0.9066849946975708, + "log_odds_ratio": -0.5334414839744568, + "logits/chosen": -0.4809895157814026, + "logits/rejected": -0.4950200021266937, + "logps/chosen": -0.9143193960189819, + "logps/rejected": -1.5417639017105103, + "loss": 0.985, + "nll_loss": 0.9316898584365845, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09143194556236267, + "rewards/margins": 0.06274445354938507, + "rewards/rejected": -0.15417639911174774, + "step": 8110 + }, + { + "epoch": 1.47, + "grad_norm": 1.051186442375183, + "learning_rate": 3.3297423205706794e-06, + "log_odds_chosen": 1.2927372455596924, + "log_odds_ratio": -0.43638482689857483, + "logits/chosen": -0.398875892162323, + "logits/rejected": -0.45413732528686523, + "logps/chosen": -0.9220579862594604, + "logps/rejected": -1.8598989248275757, + "loss": 0.9553, + "nll_loss": 0.9116722345352173, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09220579266548157, + "rewards/margins": 0.09378410875797272, + "rewards/rejected": -0.18598990142345428, + "step": 8120 + }, + { + "epoch": 1.47, + "grad_norm": 0.780241072177887, + "learning_rate": 3.3239190566312417e-06, + "log_odds_chosen": 0.8707435727119446, + "log_odds_ratio": -0.5559987425804138, + "logits/chosen": -0.4056832194328308, + "logits/rejected": -0.43125540018081665, + "logps/chosen": -0.8941282033920288, + "logps/rejected": -1.4603755474090576, + "loss": 0.9445, + "nll_loss": 0.8889206051826477, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08941281586885452, + "rewards/margins": 0.05662474036216736, + "rewards/rejected": -0.14603756368160248, + "step": 8130 + }, + { + "epoch": 1.47, + "grad_norm": 1.1260215044021606, + "learning_rate": 3.3180957926918036e-06, + "log_odds_chosen": 0.682217001914978, + "log_odds_ratio": -0.6097729802131653, + "logits/chosen": -0.39974793791770935, + "logits/rejected": -0.45179304480552673, + "logps/chosen": -0.9107609987258911, + "logps/rejected": -1.3272377252578735, + "loss": 0.9954, + "nll_loss": 0.9344407320022583, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0910760909318924, + "rewards/margins": 0.04164768010377884, + "rewards/rejected": -0.13272377848625183, + "step": 8140 + }, + { + "epoch": 1.47, + "grad_norm": 2.6169352531433105, + "learning_rate": 3.3122725287523655e-06, + "log_odds_chosen": 0.8187638521194458, + "log_odds_ratio": -0.555806040763855, + "logits/chosen": -0.4935298562049866, + "logits/rejected": -0.46518078446388245, + "logps/chosen": -0.9738367795944214, + "logps/rejected": -1.536250114440918, + "loss": 0.9823, + "nll_loss": 0.9266853332519531, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09738368541002274, + "rewards/margins": 0.05624132603406906, + "rewards/rejected": -0.1536250114440918, + "step": 8150 + }, + { + "epoch": 1.47, + "grad_norm": 1.5755623579025269, + "learning_rate": 3.306449264812928e-06, + "log_odds_chosen": 0.8507640957832336, + "log_odds_ratio": -0.5479308366775513, + "logits/chosen": -0.45997118949890137, + "logits/rejected": -0.48409271240234375, + "logps/chosen": -0.8985317945480347, + "logps/rejected": -1.4782774448394775, + "loss": 0.9323, + "nll_loss": 0.8775039911270142, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0898531898856163, + "rewards/margins": 0.057974569499492645, + "rewards/rejected": -0.14782774448394775, + "step": 8160 + }, + { + "epoch": 1.48, + "grad_norm": 0.7219186425209045, + "learning_rate": 3.3006260008734893e-06, + "log_odds_chosen": 1.332714319229126, + "log_odds_ratio": -0.453878790140152, + "logits/chosen": -0.41151317954063416, + "logits/rejected": -0.4791542887687683, + "logps/chosen": -0.8005874752998352, + "logps/rejected": -1.7394930124282837, + "loss": 0.8922, + "nll_loss": 0.8467932939529419, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0800587460398674, + "rewards/margins": 0.09389055520296097, + "rewards/rejected": -0.17394930124282837, + "step": 8170 + }, + { + "epoch": 1.48, + "grad_norm": 1.6263483762741089, + "learning_rate": 3.294802736934051e-06, + "log_odds_chosen": 0.953560471534729, + "log_odds_ratio": -0.5101215243339539, + "logits/chosen": -0.44410592317581177, + "logits/rejected": -0.4457341134548187, + "logps/chosen": -0.9805153608322144, + "logps/rejected": -1.6686265468597412, + "loss": 0.9395, + "nll_loss": 0.8884419202804565, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09805154800415039, + "rewards/margins": 0.06881112605333328, + "rewards/rejected": -0.16686268150806427, + "step": 8180 + }, + { + "epoch": 1.48, + "grad_norm": 1.2888286113739014, + "learning_rate": 3.2889794729946135e-06, + "log_odds_chosen": 1.142336130142212, + "log_odds_ratio": -0.4796481728553772, + "logits/chosen": -0.4701244831085205, + "logits/rejected": -0.4900820255279541, + "logps/chosen": -0.9152582287788391, + "logps/rejected": -1.7305917739868164, + "loss": 0.9902, + "nll_loss": 0.9422494769096375, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09152581542730331, + "rewards/margins": 0.08153336495161057, + "rewards/rejected": -0.17305919528007507, + "step": 8190 + }, + { + "epoch": 1.48, + "grad_norm": 2.0623779296875, + "learning_rate": 3.2831562090551753e-06, + "log_odds_chosen": 0.8945195078849792, + "log_odds_ratio": -0.5508736968040466, + "logits/chosen": -0.46278437972068787, + "logits/rejected": -0.4786438047885895, + "logps/chosen": -0.9259538650512695, + "logps/rejected": -1.5728238821029663, + "loss": 0.9739, + "nll_loss": 0.9188405871391296, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09259538352489471, + "rewards/margins": 0.06468699872493744, + "rewards/rejected": -0.15728238224983215, + "step": 8200 + }, + { + "epoch": 1.48, + "grad_norm": 1.12440025806427, + "learning_rate": 3.2773329451157372e-06, + "log_odds_chosen": 1.2054493427276611, + "log_odds_ratio": -0.4695872366428375, + "logits/chosen": -0.46673083305358887, + "logits/rejected": -0.4775332808494568, + "logps/chosen": -0.8975292444229126, + "logps/rejected": -1.7807804346084595, + "loss": 0.891, + "nll_loss": 0.8440417051315308, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0897529199719429, + "rewards/margins": 0.0883251205086708, + "rewards/rejected": -0.17807802557945251, + "step": 8210 + }, + { + "epoch": 1.48, + "grad_norm": 1.0653307437896729, + "learning_rate": 3.271509681176299e-06, + "log_odds_chosen": 0.9999645352363586, + "log_odds_ratio": -0.4829765856266022, + "logits/chosen": -0.42667704820632935, + "logits/rejected": -0.4870891571044922, + "logps/chosen": -0.8960220217704773, + "logps/rejected": -1.6177982091903687, + "loss": 0.8606, + "nll_loss": 0.8122758865356445, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08960221707820892, + "rewards/margins": 0.07217760384082794, + "rewards/rejected": -0.16177980601787567, + "step": 8220 + }, + { + "epoch": 1.49, + "grad_norm": 0.6214142441749573, + "learning_rate": 3.265686417236861e-06, + "log_odds_chosen": 1.1718106269836426, + "log_odds_ratio": -0.46951428055763245, + "logits/chosen": -0.44821277260780334, + "logits/rejected": -0.4748601019382477, + "logps/chosen": -0.8514217138290405, + "logps/rejected": -1.6607784032821655, + "loss": 0.9352, + "nll_loss": 0.8882169723510742, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08514218032360077, + "rewards/margins": 0.08093566447496414, + "rewards/rejected": -0.1660778522491455, + "step": 8230 + }, + { + "epoch": 1.49, + "grad_norm": 1.8574119806289673, + "learning_rate": 3.2598631532974233e-06, + "log_odds_chosen": 1.05377995967865, + "log_odds_ratio": -0.48320484161376953, + "logits/chosen": -0.457883358001709, + "logits/rejected": -0.4863380789756775, + "logps/chosen": -0.9301978945732117, + "logps/rejected": -1.6607166528701782, + "loss": 0.9512, + "nll_loss": 0.9028828740119934, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09301978349685669, + "rewards/margins": 0.07305187731981277, + "rewards/rejected": -0.16607165336608887, + "step": 8240 + }, + { + "epoch": 1.49, + "grad_norm": 1.0782403945922852, + "learning_rate": 3.254039889357985e-06, + "log_odds_chosen": 0.5579296350479126, + "log_odds_ratio": -0.6075290441513062, + "logits/chosen": -0.4750538766384125, + "logits/rejected": -0.46040016412734985, + "logps/chosen": -0.9509231448173523, + "logps/rejected": -1.3567991256713867, + "loss": 0.9805, + "nll_loss": 0.9197883605957031, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.09509231895208359, + "rewards/margins": 0.04058759659528732, + "rewards/rejected": -0.1356799304485321, + "step": 8250 + }, + { + "epoch": 1.49, + "grad_norm": 1.754353642463684, + "learning_rate": 3.2482166254185467e-06, + "log_odds_chosen": 1.289215087890625, + "log_odds_ratio": -0.48846206068992615, + "logits/chosen": -0.42231351137161255, + "logits/rejected": -0.45326024293899536, + "logps/chosen": -0.9167190790176392, + "logps/rejected": -1.7700271606445312, + "loss": 0.9213, + "nll_loss": 0.8724179267883301, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0916719064116478, + "rewards/margins": 0.08533082902431488, + "rewards/rejected": -0.17700272798538208, + "step": 8260 + }, + { + "epoch": 1.49, + "grad_norm": 1.4862349033355713, + "learning_rate": 3.242393361479109e-06, + "log_odds_chosen": 0.8918800354003906, + "log_odds_ratio": -0.5321205854415894, + "logits/chosen": -0.48621082305908203, + "logits/rejected": -0.5018815398216248, + "logps/chosen": -1.01481032371521, + "logps/rejected": -1.704506516456604, + "loss": 0.9819, + "nll_loss": 0.9286412000656128, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10148103535175323, + "rewards/margins": 0.06896961480379105, + "rewards/rejected": -0.17045065760612488, + "step": 8270 + }, + { + "epoch": 1.5, + "grad_norm": 1.2697803974151611, + "learning_rate": 3.236570097539671e-06, + "log_odds_chosen": 0.6413637399673462, + "log_odds_ratio": -0.5947942137718201, + "logits/chosen": -0.5209168195724487, + "logits/rejected": -0.5378480553627014, + "logps/chosen": -0.9370816349983215, + "logps/rejected": -1.3779325485229492, + "loss": 1.0176, + "nll_loss": 0.9581189155578613, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09370815753936768, + "rewards/margins": 0.044085096567869186, + "rewards/rejected": -0.13779327273368835, + "step": 8280 + }, + { + "epoch": 1.5, + "grad_norm": 0.9943523406982422, + "learning_rate": 3.2307468336002327e-06, + "log_odds_chosen": 0.7377707362174988, + "log_odds_ratio": -0.57826167345047, + "logits/chosen": -0.455329030752182, + "logits/rejected": -0.48436814546585083, + "logps/chosen": -0.8952816724777222, + "logps/rejected": -1.4014275074005127, + "loss": 1.0055, + "nll_loss": 0.9476629495620728, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.0895281732082367, + "rewards/margins": 0.05061458796262741, + "rewards/rejected": -0.1401427686214447, + "step": 8290 + }, + { + "epoch": 1.5, + "grad_norm": 1.187412977218628, + "learning_rate": 3.2249235696607946e-06, + "log_odds_chosen": 1.1886249780654907, + "log_odds_ratio": -0.4324370324611664, + "logits/chosen": -0.33722493052482605, + "logits/rejected": -0.39880436658859253, + "logps/chosen": -0.8110214471817017, + "logps/rejected": -1.6159849166870117, + "loss": 0.8536, + "nll_loss": 0.8103251457214355, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.0811021476984024, + "rewards/margins": 0.08049634844064713, + "rewards/rejected": -0.16159851849079132, + "step": 8300 + }, + { + "epoch": 1.5, + "eval_log_odds_chosen": 0.9150215983390808, + "eval_log_odds_ratio": -0.5380129218101501, + "eval_logits/chosen": -0.4303891062736511, + "eval_logits/rejected": -0.45025941729545593, + "eval_logps/chosen": -0.9312528967857361, + "eval_logps/rejected": -1.5771743059158325, + "eval_loss": 0.9696574211120605, + "eval_nll_loss": 0.915856122970581, + "eval_rewards/accuracies": 0.6705276966094971, + "eval_rewards/chosen": -0.09312529861927032, + "eval_rewards/margins": 0.0645921528339386, + "eval_rewards/rejected": -0.15771742165088654, + "eval_runtime": 2274.4333, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 1.025, + "step": 8304 + }, + { + "epoch": 1.5, + "grad_norm": 1.3505573272705078, + "learning_rate": 3.2191003057213565e-06, + "log_odds_chosen": 0.7017135620117188, + "log_odds_ratio": -0.5855156779289246, + "logits/chosen": -0.4498369097709656, + "logits/rejected": -0.47008219361305237, + "logps/chosen": -1.0464216470718384, + "logps/rejected": -1.5228334665298462, + "loss": 0.989, + "nll_loss": 0.9304904937744141, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10464215278625488, + "rewards/margins": 0.04764118790626526, + "rewards/rejected": -0.15228334069252014, + "step": 8310 + }, + { + "epoch": 1.5, + "grad_norm": 1.1382750272750854, + "learning_rate": 3.213277041781919e-06, + "log_odds_chosen": 1.1573059558868408, + "log_odds_ratio": -0.5401102304458618, + "logits/chosen": -0.44930943846702576, + "logits/rejected": -0.49644845724105835, + "logps/chosen": -0.8247823715209961, + "logps/rejected": -1.6526029109954834, + "loss": 0.9384, + "nll_loss": 0.8843981623649597, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08247824013233185, + "rewards/margins": 0.08278205245733261, + "rewards/rejected": -0.16526028513908386, + "step": 8320 + }, + { + "epoch": 1.5, + "grad_norm": 1.3331044912338257, + "learning_rate": 3.2074537778424807e-06, + "log_odds_chosen": 0.8306789398193359, + "log_odds_ratio": -0.565879225730896, + "logits/chosen": -0.43868565559387207, + "logits/rejected": -0.4368368983268738, + "logps/chosen": -0.8823145627975464, + "logps/rejected": -1.4312303066253662, + "loss": 0.9337, + "nll_loss": 0.87712162733078, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08823145925998688, + "rewards/margins": 0.05489157885313034, + "rewards/rejected": -0.14312303066253662, + "step": 8330 + }, + { + "epoch": 1.51, + "grad_norm": 2.8568496704101562, + "learning_rate": 3.2016305139030426e-06, + "log_odds_chosen": 0.9271842837333679, + "log_odds_ratio": -0.5752115845680237, + "logits/chosen": -0.4455347955226898, + "logits/rejected": -0.48211875557899475, + "logps/chosen": -0.9204484820365906, + "logps/rejected": -1.6012827157974243, + "loss": 0.9359, + "nll_loss": 0.878348171710968, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09204484522342682, + "rewards/margins": 0.06808345019817352, + "rewards/rejected": -0.16012828052043915, + "step": 8340 + }, + { + "epoch": 1.51, + "grad_norm": 2.5041916370391846, + "learning_rate": 3.1958072499636045e-06, + "log_odds_chosen": 0.8253329992294312, + "log_odds_ratio": -0.5409625768661499, + "logits/chosen": -0.42137661576271057, + "logits/rejected": -0.4258691668510437, + "logps/chosen": -0.9433619379997253, + "logps/rejected": -1.4537885189056396, + "loss": 0.8955, + "nll_loss": 0.8414531946182251, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09433619678020477, + "rewards/margins": 0.05104265362024307, + "rewards/rejected": -0.14537884294986725, + "step": 8350 + }, + { + "epoch": 1.51, + "grad_norm": 1.5704162120819092, + "learning_rate": 3.1899839860241664e-06, + "log_odds_chosen": 0.7973772287368774, + "log_odds_ratio": -0.5633861422538757, + "logits/chosen": -0.5092782974243164, + "logits/rejected": -0.506787896156311, + "logps/chosen": -0.9273924827575684, + "logps/rejected": -1.4619529247283936, + "loss": 0.9969, + "nll_loss": 0.940531849861145, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09273925423622131, + "rewards/margins": 0.053456056863069534, + "rewards/rejected": -0.14619530737400055, + "step": 8360 + }, + { + "epoch": 1.51, + "grad_norm": 1.4402172565460205, + "learning_rate": 3.1841607220847283e-06, + "log_odds_chosen": 1.0130927562713623, + "log_odds_ratio": -0.5257831811904907, + "logits/chosen": -0.43476539850234985, + "logits/rejected": -0.4592816233634949, + "logps/chosen": -0.8787569999694824, + "logps/rejected": -1.5716125965118408, + "loss": 0.9334, + "nll_loss": 0.8807721138000488, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08787570148706436, + "rewards/margins": 0.0692855566740036, + "rewards/rejected": -0.15716125071048737, + "step": 8370 + }, + { + "epoch": 1.51, + "grad_norm": 1.170516014099121, + "learning_rate": 3.1783374581452906e-06, + "log_odds_chosen": 1.0823752880096436, + "log_odds_ratio": -0.5135545134544373, + "logits/chosen": -0.46290579438209534, + "logits/rejected": -0.4929911494255066, + "logps/chosen": -0.9144316911697388, + "logps/rejected": -1.6458345651626587, + "loss": 0.8758, + "nll_loss": 0.8243969082832336, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09144316613674164, + "rewards/margins": 0.07314030826091766, + "rewards/rejected": -0.1645834743976593, + "step": 8380 + }, + { + "epoch": 1.52, + "grad_norm": 1.599992275238037, + "learning_rate": 3.172514194205852e-06, + "log_odds_chosen": 1.2497516870498657, + "log_odds_ratio": -0.48808449506759644, + "logits/chosen": -0.48384079337120056, + "logits/rejected": -0.512252688407898, + "logps/chosen": -0.8950152397155762, + "logps/rejected": -1.8100101947784424, + "loss": 0.9544, + "nll_loss": 0.905626654624939, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0895015150308609, + "rewards/margins": 0.09149952232837677, + "rewards/rejected": -0.18100103735923767, + "step": 8390 + }, + { + "epoch": 1.52, + "grad_norm": 1.133752465248108, + "learning_rate": 3.166690930266414e-06, + "log_odds_chosen": 0.7259795665740967, + "log_odds_ratio": -0.5618212819099426, + "logits/chosen": -0.420632541179657, + "logits/rejected": -0.48596158623695374, + "logps/chosen": -0.9481450319290161, + "logps/rejected": -1.441080093383789, + "loss": 1.002, + "nll_loss": 0.9458674192428589, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09481450170278549, + "rewards/margins": 0.04929352179169655, + "rewards/rejected": -0.14410802721977234, + "step": 8400 + }, + { + "epoch": 1.52, + "grad_norm": 2.7289183139801025, + "learning_rate": 3.1608676663269762e-06, + "log_odds_chosen": 1.017851710319519, + "log_odds_ratio": -0.5462450981140137, + "logits/chosen": -0.46232685446739197, + "logits/rejected": -0.473463773727417, + "logps/chosen": -1.0255944728851318, + "logps/rejected": -1.7579063177108765, + "loss": 1.0151, + "nll_loss": 0.9605172276496887, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10255946218967438, + "rewards/margins": 0.07323118299245834, + "rewards/rejected": -0.17579063773155212, + "step": 8410 + }, + { + "epoch": 1.52, + "grad_norm": 1.4500068426132202, + "learning_rate": 3.155044402387538e-06, + "log_odds_chosen": 0.6885607838630676, + "log_odds_ratio": -0.6273342370986938, + "logits/chosen": -0.4968503415584564, + "logits/rejected": -0.5165926218032837, + "logps/chosen": -0.9877980947494507, + "logps/rejected": -1.4659242630004883, + "loss": 1.0864, + "nll_loss": 1.0236256122589111, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0987798199057579, + "rewards/margins": 0.047812603414058685, + "rewards/rejected": -0.1465924233198166, + "step": 8420 + }, + { + "epoch": 1.52, + "grad_norm": 1.252193808555603, + "learning_rate": 3.1492211384481004e-06, + "log_odds_chosen": 0.9662941694259644, + "log_odds_ratio": -0.4978674054145813, + "logits/chosen": -0.41275423765182495, + "logits/rejected": -0.4935991168022156, + "logps/chosen": -0.8912984728813171, + "logps/rejected": -1.511781930923462, + "loss": 0.9413, + "nll_loss": 0.8915241360664368, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08912985026836395, + "rewards/margins": 0.06204833835363388, + "rewards/rejected": -0.15117818117141724, + "step": 8430 + }, + { + "epoch": 1.52, + "grad_norm": 1.68789541721344, + "learning_rate": 3.143397874508662e-06, + "log_odds_chosen": 0.9445701837539673, + "log_odds_ratio": -0.5349358320236206, + "logits/chosen": -0.4067501425743103, + "logits/rejected": -0.4603399336338043, + "logps/chosen": -0.9744407534599304, + "logps/rejected": -1.6701091527938843, + "loss": 1.015, + "nll_loss": 0.9614804983139038, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0974440798163414, + "rewards/margins": 0.06956683844327927, + "rewards/rejected": -0.16701093316078186, + "step": 8440 + }, + { + "epoch": 1.53, + "grad_norm": 1.3491225242614746, + "learning_rate": 3.1375746105692238e-06, + "log_odds_chosen": 0.6654536128044128, + "log_odds_ratio": -0.5875769853591919, + "logits/chosen": -0.47851261496543884, + "logits/rejected": -0.5094243288040161, + "logps/chosen": -0.969096839427948, + "logps/rejected": -1.4325406551361084, + "loss": 0.939, + "nll_loss": 0.8802341222763062, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09690967947244644, + "rewards/margins": 0.04634439945220947, + "rewards/rejected": -0.14325407147407532, + "step": 8450 + }, + { + "epoch": 1.53, + "grad_norm": 1.4580334424972534, + "learning_rate": 3.131751346629786e-06, + "log_odds_chosen": 0.9235858917236328, + "log_odds_ratio": -0.4925766885280609, + "logits/chosen": -0.439488023519516, + "logits/rejected": -0.46981319785118103, + "logps/chosen": -0.8824410438537598, + "logps/rejected": -1.5106605291366577, + "loss": 0.9123, + "nll_loss": 0.8630245327949524, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08824410289525986, + "rewards/margins": 0.06282194703817368, + "rewards/rejected": -0.15106606483459473, + "step": 8460 + }, + { + "epoch": 1.53, + "grad_norm": 1.3175749778747559, + "learning_rate": 3.125928082690348e-06, + "log_odds_chosen": 1.1061830520629883, + "log_odds_ratio": -0.49878525733947754, + "logits/chosen": -0.39459967613220215, + "logits/rejected": -0.44091662764549255, + "logps/chosen": -0.8368655443191528, + "logps/rejected": -1.6044450998306274, + "loss": 0.9248, + "nll_loss": 0.8748943209648132, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08368656039237976, + "rewards/margins": 0.07675794512033463, + "rewards/rejected": -0.1604444980621338, + "step": 8470 + }, + { + "epoch": 1.53, + "grad_norm": 1.8184188604354858, + "learning_rate": 3.1201048187509094e-06, + "log_odds_chosen": 1.3821189403533936, + "log_odds_ratio": -0.4116799235343933, + "logits/chosen": -0.3508300185203552, + "logits/rejected": -0.374864399433136, + "logps/chosen": -0.8738387227058411, + "logps/rejected": -1.8437020778656006, + "loss": 0.862, + "nll_loss": 0.8208430409431458, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08738387376070023, + "rewards/margins": 0.09698633849620819, + "rewards/rejected": -0.18437020480632782, + "step": 8480 + }, + { + "epoch": 1.53, + "grad_norm": 1.1924893856048584, + "learning_rate": 3.1142815548114717e-06, + "log_odds_chosen": 1.1683502197265625, + "log_odds_ratio": -0.4778009057044983, + "logits/chosen": -0.35688087344169617, + "logits/rejected": -0.40984684228897095, + "logps/chosen": -0.8918578028678894, + "logps/rejected": -1.6981723308563232, + "loss": 0.908, + "nll_loss": 0.8602396249771118, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08918578922748566, + "rewards/margins": 0.08063144981861115, + "rewards/rejected": -0.1698172390460968, + "step": 8490 + }, + { + "epoch": 1.54, + "grad_norm": 1.0544793605804443, + "learning_rate": 3.1084582908720336e-06, + "log_odds_chosen": 1.074487566947937, + "log_odds_ratio": -0.5217118859291077, + "logits/chosen": -0.4763789772987366, + "logits/rejected": -0.47022953629493713, + "logps/chosen": -0.9271343350410461, + "logps/rejected": -1.6965866088867188, + "loss": 1.0064, + "nll_loss": 0.9542236328125, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09271343052387238, + "rewards/margins": 0.0769452303647995, + "rewards/rejected": -0.16965866088867188, + "step": 8500 + }, + { + "epoch": 1.54, + "grad_norm": 1.2964205741882324, + "learning_rate": 3.1026350269325955e-06, + "log_odds_chosen": 1.1331610679626465, + "log_odds_ratio": -0.43135523796081543, + "logits/chosen": -0.4021880626678467, + "logits/rejected": -0.44057130813598633, + "logps/chosen": -0.8385679125785828, + "logps/rejected": -1.58628249168396, + "loss": 0.9221, + "nll_loss": 0.8789209127426147, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08385679870843887, + "rewards/margins": 0.074771448969841, + "rewards/rejected": -0.15862825512886047, + "step": 8510 + }, + { + "epoch": 1.54, + "grad_norm": 2.1097097396850586, + "learning_rate": 3.096811762993158e-06, + "log_odds_chosen": 0.8757414817810059, + "log_odds_ratio": -0.5435615181922913, + "logits/chosen": -0.43857377767562866, + "logits/rejected": -0.4581020772457123, + "logps/chosen": -0.9276224970817566, + "logps/rejected": -1.4827194213867188, + "loss": 1.0161, + "nll_loss": 0.9617037773132324, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09276226162910461, + "rewards/margins": 0.055509693920612335, + "rewards/rejected": -0.14827194809913635, + "step": 8520 + }, + { + "epoch": 1.54, + "grad_norm": 1.5196740627288818, + "learning_rate": 3.0909884990537193e-06, + "log_odds_chosen": 0.7281922101974487, + "log_odds_ratio": -0.5750101208686829, + "logits/chosen": -0.4723603129386902, + "logits/rejected": -0.47291263937950134, + "logps/chosen": -0.9452505111694336, + "logps/rejected": -1.4306466579437256, + "loss": 0.9857, + "nll_loss": 0.9282245635986328, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0945250540971756, + "rewards/margins": 0.04853961616754532, + "rewards/rejected": -0.1430646777153015, + "step": 8530 + }, + { + "epoch": 1.54, + "grad_norm": 1.1302449703216553, + "learning_rate": 3.0851652351142816e-06, + "log_odds_chosen": 1.0263969898223877, + "log_odds_ratio": -0.5024701356887817, + "logits/chosen": -0.4505384564399719, + "logits/rejected": -0.4901925027370453, + "logps/chosen": -0.9041630029678345, + "logps/rejected": -1.636066198348999, + "loss": 0.9306, + "nll_loss": 0.880358874797821, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09041629731655121, + "rewards/margins": 0.07319033890962601, + "rewards/rejected": -0.1636066436767578, + "step": 8540 + }, + { + "epoch": 1.54, + "grad_norm": 1.616918921470642, + "learning_rate": 3.0793419711748435e-06, + "log_odds_chosen": 0.7561261057853699, + "log_odds_ratio": -0.5439547896385193, + "logits/chosen": -0.4394764006137848, + "logits/rejected": -0.42637330293655396, + "logps/chosen": -0.8382787704467773, + "logps/rejected": -1.2982994318008423, + "loss": 0.9224, + "nll_loss": 0.8679793477058411, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08382787555456161, + "rewards/margins": 0.04600207880139351, + "rewards/rejected": -0.12982995808124542, + "step": 8550 + }, + { + "epoch": 1.55, + "grad_norm": 1.5272725820541382, + "learning_rate": 3.0735187072354054e-06, + "log_odds_chosen": 0.8503424525260925, + "log_odds_ratio": -0.6126225590705872, + "logits/chosen": -0.45884138345718384, + "logits/rejected": -0.4699079096317291, + "logps/chosen": -1.0466539859771729, + "logps/rejected": -1.6558605432510376, + "loss": 1.0305, + "nll_loss": 0.9692071676254272, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10466538369655609, + "rewards/margins": 0.06092067435383797, + "rewards/rejected": -0.16558606922626495, + "step": 8560 + }, + { + "epoch": 1.55, + "grad_norm": 1.7004966735839844, + "learning_rate": 3.0676954432959672e-06, + "log_odds_chosen": 1.068210244178772, + "log_odds_ratio": -0.535982608795166, + "logits/chosen": -0.43431219458580017, + "logits/rejected": -0.47066861391067505, + "logps/chosen": -1.005386233329773, + "logps/rejected": -1.7771772146224976, + "loss": 0.9732, + "nll_loss": 0.9196001887321472, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10053862631320953, + "rewards/margins": 0.07717911154031754, + "rewards/rejected": -0.17771773040294647, + "step": 8570 + }, + { + "epoch": 1.55, + "grad_norm": 1.500475287437439, + "learning_rate": 3.061872179356529e-06, + "log_odds_chosen": 1.255358099937439, + "log_odds_ratio": -0.4606574475765228, + "logits/chosen": -0.38432154059410095, + "logits/rejected": -0.4414575695991516, + "logps/chosen": -0.8076623678207397, + "logps/rejected": -1.679369330406189, + "loss": 0.9022, + "nll_loss": 0.8561829328536987, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08076624572277069, + "rewards/margins": 0.08717069029808044, + "rewards/rejected": -0.16793695092201233, + "step": 8580 + }, + { + "epoch": 1.55, + "grad_norm": 1.182091236114502, + "learning_rate": 3.056048915417091e-06, + "log_odds_chosen": 0.8635237812995911, + "log_odds_ratio": -0.5242233276367188, + "logits/chosen": -0.4404246211051941, + "logits/rejected": -0.4629266858100891, + "logps/chosen": -0.8578693270683289, + "logps/rejected": -1.4637280702590942, + "loss": 0.9752, + "nll_loss": 0.9227339625358582, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08578693866729736, + "rewards/margins": 0.060585867613554, + "rewards/rejected": -0.14637281000614166, + "step": 8590 + }, + { + "epoch": 1.55, + "grad_norm": 1.498415231704712, + "learning_rate": 3.0502256514776533e-06, + "log_odds_chosen": 0.7312828898429871, + "log_odds_ratio": -0.524425208568573, + "logits/chosen": -0.4358999729156494, + "logits/rejected": -0.4618275761604309, + "logps/chosen": -0.9581681489944458, + "logps/rejected": -1.448223352432251, + "loss": 0.9313, + "nll_loss": 0.8788281679153442, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09581682085990906, + "rewards/margins": 0.04900550842285156, + "rewards/rejected": -0.14482232928276062, + "step": 8600 + }, + { + "epoch": 1.56, + "grad_norm": 2.19492769241333, + "learning_rate": 3.044402387538215e-06, + "log_odds_chosen": 0.8028427958488464, + "log_odds_ratio": -0.5451768636703491, + "logits/chosen": -0.4661959707736969, + "logits/rejected": -0.43399032950401306, + "logps/chosen": -0.9671751856803894, + "logps/rejected": -1.5540294647216797, + "loss": 0.9725, + "nll_loss": 0.9180120229721069, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09671752154827118, + "rewards/margins": 0.05868542194366455, + "rewards/rejected": -0.15540295839309692, + "step": 8610 + }, + { + "epoch": 1.56, + "grad_norm": 2.256150484085083, + "learning_rate": 3.0385791235987767e-06, + "log_odds_chosen": 0.8371201753616333, + "log_odds_ratio": -0.5272762179374695, + "logits/chosen": -0.460941880941391, + "logits/rejected": -0.47110190987586975, + "logps/chosen": -0.9126766920089722, + "logps/rejected": -1.4907863140106201, + "loss": 0.883, + "nll_loss": 0.8302776217460632, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0912676751613617, + "rewards/margins": 0.057810962200164795, + "rewards/rejected": -0.1490786373615265, + "step": 8620 + }, + { + "epoch": 1.56, + "grad_norm": 1.4068905115127563, + "learning_rate": 3.032755859659339e-06, + "log_odds_chosen": 0.9666420221328735, + "log_odds_ratio": -0.5143001675605774, + "logits/chosen": -0.4444963037967682, + "logits/rejected": -0.4654978811740875, + "logps/chosen": -0.861626148223877, + "logps/rejected": -1.5031956434249878, + "loss": 0.9535, + "nll_loss": 0.9020698666572571, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08616261184215546, + "rewards/margins": 0.06415696442127228, + "rewards/rejected": -0.15031957626342773, + "step": 8630 + }, + { + "epoch": 1.56, + "grad_norm": 1.0998035669326782, + "learning_rate": 3.026932595719901e-06, + "log_odds_chosen": 0.9140321612358093, + "log_odds_ratio": -0.5074976682662964, + "logits/chosen": -0.4687994420528412, + "logits/rejected": -0.47800904512405396, + "logps/chosen": -0.8865720629692078, + "logps/rejected": -1.4897964000701904, + "loss": 0.9641, + "nll_loss": 0.9133057594299316, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08865721523761749, + "rewards/margins": 0.06032242625951767, + "rewards/rejected": -0.14897963404655457, + "step": 8640 + }, + { + "epoch": 1.56, + "grad_norm": 1.6498323678970337, + "learning_rate": 3.021109331780463e-06, + "log_odds_chosen": 0.9924195408821106, + "log_odds_ratio": -0.562451183795929, + "logits/chosen": -0.4367518424987793, + "logits/rejected": -0.40149760246276855, + "logps/chosen": -0.8667885661125183, + "logps/rejected": -1.558685064315796, + "loss": 0.8615, + "nll_loss": 0.8052981495857239, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.0866788700222969, + "rewards/margins": 0.06918965280056, + "rewards/rejected": -0.1558685153722763, + "step": 8650 + }, + { + "epoch": 1.56, + "grad_norm": 1.2060490846633911, + "learning_rate": 3.0152860678410246e-06, + "log_odds_chosen": 0.7882088422775269, + "log_odds_ratio": -0.562271773815155, + "logits/chosen": -0.4282412528991699, + "logits/rejected": -0.45196279883384705, + "logps/chosen": -0.8789991140365601, + "logps/rejected": -1.443538784980774, + "loss": 0.9452, + "nll_loss": 0.8889563679695129, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08789992332458496, + "rewards/margins": 0.05645396560430527, + "rewards/rejected": -0.14435386657714844, + "step": 8660 + }, + { + "epoch": 1.57, + "grad_norm": 1.8238736391067505, + "learning_rate": 3.0094628039015865e-06, + "log_odds_chosen": 1.1830130815505981, + "log_odds_ratio": -0.4648486077785492, + "logits/chosen": -0.40792226791381836, + "logits/rejected": -0.4477695822715759, + "logps/chosen": -0.9096108675003052, + "logps/rejected": -1.7371028661727905, + "loss": 0.915, + "nll_loss": 0.8684867024421692, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09096109867095947, + "rewards/margins": 0.08274922519922256, + "rewards/rejected": -0.17371031641960144, + "step": 8670 + }, + { + "epoch": 1.57, + "grad_norm": 1.2026002407073975, + "learning_rate": 3.003639539962149e-06, + "log_odds_chosen": 1.1199390888214111, + "log_odds_ratio": -0.5301384329795837, + "logits/chosen": -0.4494144916534424, + "logits/rejected": -0.4676884710788727, + "logps/chosen": -1.0210682153701782, + "logps/rejected": -1.905693769454956, + "loss": 1.0464, + "nll_loss": 0.993392288684845, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.10210682451725006, + "rewards/margins": 0.08846259117126465, + "rewards/rejected": -0.1905694305896759, + "step": 8680 + }, + { + "epoch": 1.57, + "grad_norm": 1.8002581596374512, + "learning_rate": 2.9978162760227107e-06, + "log_odds_chosen": 0.8180960416793823, + "log_odds_ratio": -0.5964370965957642, + "logits/chosen": -0.4645138680934906, + "logits/rejected": -0.415048748254776, + "logps/chosen": -0.9569841623306274, + "logps/rejected": -1.5479731559753418, + "loss": 0.9345, + "nll_loss": 0.8748123049736023, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.09569840133190155, + "rewards/margins": 0.05909890681505203, + "rewards/rejected": -0.15479730069637299, + "step": 8690 + }, + { + "epoch": 1.57, + "grad_norm": 1.1029233932495117, + "learning_rate": 2.991993012083272e-06, + "log_odds_chosen": 0.8041459321975708, + "log_odds_ratio": -0.5897382497787476, + "logits/chosen": -0.49731239676475525, + "logits/rejected": -0.4990456998348236, + "logps/chosen": -0.9147828817367554, + "logps/rejected": -1.5065150260925293, + "loss": 0.9862, + "nll_loss": 0.9271799325942993, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09147828817367554, + "rewards/margins": 0.059173207730054855, + "rewards/rejected": -0.1506514996290207, + "step": 8700 + }, + { + "epoch": 1.57, + "grad_norm": 1.7081480026245117, + "learning_rate": 2.9861697481438345e-06, + "log_odds_chosen": 0.8758634328842163, + "log_odds_ratio": -0.5533775687217712, + "logits/chosen": -0.45495352149009705, + "logits/rejected": -0.46826472878456116, + "logps/chosen": -0.9041604995727539, + "logps/rejected": -1.5035583972930908, + "loss": 0.9567, + "nll_loss": 0.9013868570327759, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09041605144739151, + "rewards/margins": 0.059939801692962646, + "rewards/rejected": -0.15035584568977356, + "step": 8710 + }, + { + "epoch": 1.58, + "grad_norm": 0.9252613186836243, + "learning_rate": 2.9803464842043964e-06, + "log_odds_chosen": 0.9316560626029968, + "log_odds_ratio": -0.5237702131271362, + "logits/chosen": -0.4001654088497162, + "logits/rejected": -0.4136221408843994, + "logps/chosen": -0.8662740588188171, + "logps/rejected": -1.4723564386367798, + "loss": 0.9343, + "nll_loss": 0.8818756341934204, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08662740886211395, + "rewards/margins": 0.060608237981796265, + "rewards/rejected": -0.1472356617450714, + "step": 8720 + }, + { + "epoch": 1.58, + "grad_norm": 1.780712366104126, + "learning_rate": 2.9745232202649583e-06, + "log_odds_chosen": 0.9977982640266418, + "log_odds_ratio": -0.49909859895706177, + "logits/chosen": -0.4587010443210602, + "logits/rejected": -0.49831119179725647, + "logps/chosen": -0.927727997303009, + "logps/rejected": -1.643951654434204, + "loss": 0.9722, + "nll_loss": 0.9222747683525085, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09277280420064926, + "rewards/margins": 0.07162235677242279, + "rewards/rejected": -0.16439515352249146, + "step": 8730 + }, + { + "epoch": 1.58, + "grad_norm": 1.4339555501937866, + "learning_rate": 2.9686999563255206e-06, + "log_odds_chosen": 0.9230550527572632, + "log_odds_ratio": -0.5562025308609009, + "logits/chosen": -0.4476137161254883, + "logits/rejected": -0.47444948554039, + "logps/chosen": -0.9881235957145691, + "logps/rejected": -1.6829208135604858, + "loss": 1.0277, + "nll_loss": 0.9721002578735352, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09881236404180527, + "rewards/margins": 0.06947972625494003, + "rewards/rejected": -0.1682920902967453, + "step": 8740 + }, + { + "epoch": 1.58, + "grad_norm": 2.0522637367248535, + "learning_rate": 2.962876692386082e-06, + "log_odds_chosen": 1.1478326320648193, + "log_odds_ratio": -0.48287123441696167, + "logits/chosen": -0.4314839243888855, + "logits/rejected": -0.4502708911895752, + "logps/chosen": -0.8756439089775085, + "logps/rejected": -1.6750850677490234, + "loss": 0.9321, + "nll_loss": 0.8838540315628052, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0875643938779831, + "rewards/margins": 0.07994408905506134, + "rewards/rejected": -0.16750849783420563, + "step": 8750 + }, + { + "epoch": 1.58, + "grad_norm": 0.9408858418464661, + "learning_rate": 2.9570534284466444e-06, + "log_odds_chosen": 0.7013543844223022, + "log_odds_ratio": -0.5859674215316772, + "logits/chosen": -0.4224696159362793, + "logits/rejected": -0.4474635124206543, + "logps/chosen": -0.9406406283378601, + "logps/rejected": -1.4665216207504272, + "loss": 0.9571, + "nll_loss": 0.8984783887863159, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09406407177448273, + "rewards/margins": 0.052588094025850296, + "rewards/rejected": -0.14665216207504272, + "step": 8760 + }, + { + "epoch": 1.58, + "grad_norm": 2.0329740047454834, + "learning_rate": 2.9512301645072062e-06, + "log_odds_chosen": 0.6817148923873901, + "log_odds_ratio": -0.6015090942382812, + "logits/chosen": -0.45950907468795776, + "logits/rejected": -0.4683297276496887, + "logps/chosen": -0.9494982957839966, + "logps/rejected": -1.4097508192062378, + "loss": 0.9656, + "nll_loss": 0.9054635763168335, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09494982659816742, + "rewards/margins": 0.04602526128292084, + "rewards/rejected": -0.14097508788108826, + "step": 8770 + }, + { + "epoch": 1.59, + "grad_norm": 1.3164424896240234, + "learning_rate": 2.945406900567768e-06, + "log_odds_chosen": 0.7522329092025757, + "log_odds_ratio": -0.5482112765312195, + "logits/chosen": -0.48109740018844604, + "logits/rejected": -0.4810869097709656, + "logps/chosen": -0.9835283160209656, + "logps/rejected": -1.5171191692352295, + "loss": 1.013, + "nll_loss": 0.9581700563430786, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0983528345823288, + "rewards/margins": 0.05335908383131027, + "rewards/rejected": -0.15171192586421967, + "step": 8780 + }, + { + "epoch": 1.59, + "grad_norm": 1.5961617231369019, + "learning_rate": 2.93958363662833e-06, + "log_odds_chosen": 1.1709040403366089, + "log_odds_ratio": -0.47756949067115784, + "logits/chosen": -0.428290456533432, + "logits/rejected": -0.44113603234291077, + "logps/chosen": -0.9102188348770142, + "logps/rejected": -1.7039642333984375, + "loss": 0.9331, + "nll_loss": 0.8853162527084351, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09102188050746918, + "rewards/margins": 0.0793745368719101, + "rewards/rejected": -0.17039641737937927, + "step": 8790 + }, + { + "epoch": 1.59, + "grad_norm": 0.7731590270996094, + "learning_rate": 2.933760372688892e-06, + "log_odds_chosen": 0.7696908712387085, + "log_odds_ratio": -0.5423186421394348, + "logits/chosen": -0.44001665711402893, + "logits/rejected": -0.4458009600639343, + "logps/chosen": -0.8562023043632507, + "logps/rejected": -1.3373987674713135, + "loss": 0.9334, + "nll_loss": 0.8791570663452148, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08562023192644119, + "rewards/margins": 0.048119645565748215, + "rewards/rejected": -0.1337398737668991, + "step": 8800 + }, + { + "epoch": 1.59, + "grad_norm": 1.536658525466919, + "learning_rate": 2.927937108749454e-06, + "log_odds_chosen": 0.7098883986473083, + "log_odds_ratio": -0.5896470546722412, + "logits/chosen": -0.47359657287597656, + "logits/rejected": -0.47002777457237244, + "logps/chosen": -0.9735046625137329, + "logps/rejected": -1.4823663234710693, + "loss": 1.06, + "nll_loss": 1.0009952783584595, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09735047817230225, + "rewards/margins": 0.05088616535067558, + "rewards/rejected": -0.14823663234710693, + "step": 8810 + }, + { + "epoch": 1.59, + "grad_norm": 1.0456104278564453, + "learning_rate": 2.922113844810016e-06, + "log_odds_chosen": 0.876265823841095, + "log_odds_ratio": -0.5287500619888306, + "logits/chosen": -0.3807294964790344, + "logits/rejected": -0.41154319047927856, + "logps/chosen": -0.899895966053009, + "logps/rejected": -1.4913125038146973, + "loss": 0.9526, + "nll_loss": 0.8997262120246887, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08998960256576538, + "rewards/margins": 0.05914165824651718, + "rewards/rejected": -0.14913125336170197, + "step": 8820 + }, + { + "epoch": 1.6, + "grad_norm": 1.8245923519134521, + "learning_rate": 2.916290580870578e-06, + "log_odds_chosen": 0.9418030977249146, + "log_odds_ratio": -0.54529869556427, + "logits/chosen": -0.4727003574371338, + "logits/rejected": -0.4494483470916748, + "logps/chosen": -0.9340311288833618, + "logps/rejected": -1.609712839126587, + "loss": 0.9158, + "nll_loss": 0.8612591028213501, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09340310096740723, + "rewards/margins": 0.06756816804409027, + "rewards/rejected": -0.1609712690114975, + "step": 8830 + }, + { + "epoch": 1.6, + "grad_norm": 2.012312650680542, + "learning_rate": 2.9104673169311394e-06, + "log_odds_chosen": 0.8511514663696289, + "log_odds_ratio": -0.5999525189399719, + "logits/chosen": -0.4229847490787506, + "logits/rejected": -0.43726539611816406, + "logps/chosen": -0.9030078053474426, + "logps/rejected": -1.5179579257965088, + "loss": 0.9474, + "nll_loss": 0.8874059915542603, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0903007760643959, + "rewards/margins": 0.06149500608444214, + "rewards/rejected": -0.15179578959941864, + "step": 8840 + }, + { + "epoch": 1.6, + "grad_norm": 1.1240602731704712, + "learning_rate": 2.9046440529917018e-06, + "log_odds_chosen": 0.7420207858085632, + "log_odds_ratio": -0.5621017813682556, + "logits/chosen": -0.39741191267967224, + "logits/rejected": -0.4037668704986572, + "logps/chosen": -0.8269944190979004, + "logps/rejected": -1.315613031387329, + "loss": 0.9393, + "nll_loss": 0.8831076622009277, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08269945532083511, + "rewards/margins": 0.048861853778362274, + "rewards/rejected": -0.1315612941980362, + "step": 8850 + }, + { + "epoch": 1.6, + "grad_norm": 1.4114630222320557, + "learning_rate": 2.8988207890522636e-06, + "log_odds_chosen": 1.1417728662490845, + "log_odds_ratio": -0.46272262930870056, + "logits/chosen": -0.4243449568748474, + "logits/rejected": -0.44843998551368713, + "logps/chosen": -0.7630897760391235, + "logps/rejected": -1.504429578781128, + "loss": 0.842, + "nll_loss": 0.7956916689872742, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07630898058414459, + "rewards/margins": 0.0741339921951294, + "rewards/rejected": -0.150442972779274, + "step": 8860 + }, + { + "epoch": 1.6, + "grad_norm": 1.9253188371658325, + "learning_rate": 2.892997525112826e-06, + "log_odds_chosen": 0.8595142364501953, + "log_odds_ratio": -0.5433337092399597, + "logits/chosen": -0.4623066782951355, + "logits/rejected": -0.44778022170066833, + "logps/chosen": -0.8706346750259399, + "logps/rejected": -1.444229245185852, + "loss": 0.9428, + "nll_loss": 0.8884536623954773, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.08706346899271011, + "rewards/margins": 0.057359449565410614, + "rewards/rejected": -0.14442291855812073, + "step": 8870 + }, + { + "epoch": 1.6, + "grad_norm": 1.1447951793670654, + "learning_rate": 2.8871742611733874e-06, + "log_odds_chosen": 1.263009786605835, + "log_odds_ratio": -0.4398309588432312, + "logits/chosen": -0.43632984161376953, + "logits/rejected": -0.46356701850891113, + "logps/chosen": -0.846616268157959, + "logps/rejected": -1.691075086593628, + "loss": 0.9603, + "nll_loss": 0.9162972569465637, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08466162532567978, + "rewards/margins": 0.08444588631391525, + "rewards/rejected": -0.16910752654075623, + "step": 8880 + }, + { + "epoch": 1.61, + "grad_norm": 1.8909757137298584, + "learning_rate": 2.8813509972339493e-06, + "log_odds_chosen": 0.8786866068840027, + "log_odds_ratio": -0.5085036158561707, + "logits/chosen": -0.42095041275024414, + "logits/rejected": -0.4742346405982971, + "logps/chosen": -0.9058700799942017, + "logps/rejected": -1.4910926818847656, + "loss": 1.0104, + "nll_loss": 0.9595681428909302, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09058699756860733, + "rewards/margins": 0.05852225422859192, + "rewards/rejected": -0.14910925924777985, + "step": 8890 + }, + { + "epoch": 1.61, + "grad_norm": 2.0623555183410645, + "learning_rate": 2.8755277332945116e-06, + "log_odds_chosen": 0.9732965230941772, + "log_odds_ratio": -0.5587271451950073, + "logits/chosen": -0.42247653007507324, + "logits/rejected": -0.41432175040245056, + "logps/chosen": -0.8597976565361023, + "logps/rejected": -1.5737887620925903, + "loss": 0.9569, + "nll_loss": 0.9010313153266907, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08597976714372635, + "rewards/margins": 0.07139910757541656, + "rewards/rejected": -0.1573788821697235, + "step": 8900 + }, + { + "epoch": 1.61, + "grad_norm": 1.143496036529541, + "learning_rate": 2.8697044693550735e-06, + "log_odds_chosen": 0.9191802740097046, + "log_odds_ratio": -0.5371075868606567, + "logits/chosen": -0.43688470125198364, + "logits/rejected": -0.4470733106136322, + "logps/chosen": -0.9116252660751343, + "logps/rejected": -1.576756477355957, + "loss": 0.9379, + "nll_loss": 0.884225070476532, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09116252511739731, + "rewards/margins": 0.06651312112808228, + "rewards/rejected": -0.15767565369606018, + "step": 8910 + }, + { + "epoch": 1.61, + "grad_norm": 1.6985821723937988, + "learning_rate": 2.863881205415635e-06, + "log_odds_chosen": 0.8602274656295776, + "log_odds_ratio": -0.500104546546936, + "logits/chosen": -0.45361995697021484, + "logits/rejected": -0.4823782444000244, + "logps/chosen": -0.8865026235580444, + "logps/rejected": -1.4952237606048584, + "loss": 0.9312, + "nll_loss": 0.881218433380127, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08865025639533997, + "rewards/margins": 0.060872115194797516, + "rewards/rejected": -0.14952236413955688, + "step": 8920 + }, + { + "epoch": 1.61, + "grad_norm": 1.4285775423049927, + "learning_rate": 2.8580579414761973e-06, + "log_odds_chosen": 0.898389458656311, + "log_odds_ratio": -0.5091103315353394, + "logits/chosen": -0.43957382440567017, + "logits/rejected": -0.47268661856651306, + "logps/chosen": -0.887039840221405, + "logps/rejected": -1.5192763805389404, + "loss": 1.055, + "nll_loss": 1.0040767192840576, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08870398998260498, + "rewards/margins": 0.06322365999221802, + "rewards/rejected": -0.1519276648759842, + "step": 8930 + }, + { + "epoch": 1.61, + "grad_norm": 2.452505111694336, + "learning_rate": 2.852234677536759e-06, + "log_odds_chosen": 0.8278576731681824, + "log_odds_ratio": -0.5373989343643188, + "logits/chosen": -0.44924673438072205, + "logits/rejected": -0.450298547744751, + "logps/chosen": -0.9012764096260071, + "logps/rejected": -1.4678680896759033, + "loss": 0.9021, + "nll_loss": 0.848312258720398, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09012763947248459, + "rewards/margins": 0.05665916949510574, + "rewards/rejected": -0.14678680896759033, + "step": 8940 + }, + { + "epoch": 1.62, + "grad_norm": 1.5261414051055908, + "learning_rate": 2.846411413597321e-06, + "log_odds_chosen": 0.8018957376480103, + "log_odds_ratio": -0.5665901899337769, + "logits/chosen": -0.43171626329421997, + "logits/rejected": -0.4220046401023865, + "logps/chosen": -0.862989068031311, + "logps/rejected": -1.4592608213424683, + "loss": 0.9395, + "nll_loss": 0.8827921152114868, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08629890531301498, + "rewards/margins": 0.05962717533111572, + "rewards/rejected": -0.1459260880947113, + "step": 8950 + }, + { + "epoch": 1.62, + "grad_norm": 1.0437229871749878, + "learning_rate": 2.8405881496578834e-06, + "log_odds_chosen": 1.2830219268798828, + "log_odds_ratio": -0.48459410667419434, + "logits/chosen": -0.37114351987838745, + "logits/rejected": -0.40893077850341797, + "logps/chosen": -0.848936915397644, + "logps/rejected": -1.6950023174285889, + "loss": 0.9134, + "nll_loss": 0.8649131655693054, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08489370346069336, + "rewards/margins": 0.08460653573274612, + "rewards/rejected": -0.1695002317428589, + "step": 8960 + }, + { + "epoch": 1.62, + "grad_norm": 1.8305917978286743, + "learning_rate": 2.834764885718445e-06, + "log_odds_chosen": 1.3773221969604492, + "log_odds_ratio": -0.47211503982543945, + "logits/chosen": -0.3935226500034332, + "logits/rejected": -0.38803496956825256, + "logps/chosen": -0.9036442041397095, + "logps/rejected": -1.9123483896255493, + "loss": 0.9417, + "nll_loss": 0.8944932818412781, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09036441147327423, + "rewards/margins": 0.10087043046951294, + "rewards/rejected": -0.19123484194278717, + "step": 8970 + }, + { + "epoch": 1.62, + "grad_norm": 1.0194758176803589, + "learning_rate": 2.828941621779007e-06, + "log_odds_chosen": 0.6662958860397339, + "log_odds_ratio": -0.6157139539718628, + "logits/chosen": -0.44147419929504395, + "logits/rejected": -0.441021591424942, + "logps/chosen": -0.9676446914672852, + "logps/rejected": -1.4576784372329712, + "loss": 1.0337, + "nll_loss": 0.9721538424491882, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0967644602060318, + "rewards/margins": 0.04900338873267174, + "rewards/rejected": -0.14576785266399384, + "step": 8980 + }, + { + "epoch": 1.62, + "grad_norm": 1.9268519878387451, + "learning_rate": 2.823118357839569e-06, + "log_odds_chosen": 1.1404383182525635, + "log_odds_ratio": -0.48538607358932495, + "logits/chosen": -0.4639511704444885, + "logits/rejected": -0.4613065719604492, + "logps/chosen": -0.9420214891433716, + "logps/rejected": -1.7059471607208252, + "loss": 0.9394, + "nll_loss": 0.8908447027206421, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09420214593410492, + "rewards/margins": 0.07639256864786148, + "rewards/rejected": -0.1705947071313858, + "step": 8990 + }, + { + "epoch": 1.63, + "grad_norm": 1.572337031364441, + "learning_rate": 2.817295093900131e-06, + "log_odds_chosen": 0.6553648710250854, + "log_odds_ratio": -0.5741513967514038, + "logits/chosen": -0.47056522965431213, + "logits/rejected": -0.46010923385620117, + "logps/chosen": -0.9969121217727661, + "logps/rejected": -1.453446865081787, + "loss": 1.0014, + "nll_loss": 0.944003701210022, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09969121217727661, + "rewards/margins": 0.045653462409973145, + "rewards/rejected": -0.14534467458724976, + "step": 9000 + }, + { + "epoch": 1.63, + "grad_norm": 1.9777848720550537, + "learning_rate": 2.8114718299606928e-06, + "log_odds_chosen": 0.803533673286438, + "log_odds_ratio": -0.5443662405014038, + "logits/chosen": -0.4233883321285248, + "logits/rejected": -0.4425373673439026, + "logps/chosen": -0.9458332061767578, + "logps/rejected": -1.5298562049865723, + "loss": 0.9113, + "nll_loss": 0.8568891286849976, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09458333253860474, + "rewards/margins": 0.05840228870511055, + "rewards/rejected": -0.1529856026172638, + "step": 9010 + }, + { + "epoch": 1.63, + "grad_norm": 1.8121646642684937, + "learning_rate": 2.8056485660212547e-06, + "log_odds_chosen": 1.1956027746200562, + "log_odds_ratio": -0.5159161686897278, + "logits/chosen": -0.38231539726257324, + "logits/rejected": -0.3957478404045105, + "logps/chosen": -0.7938886284828186, + "logps/rejected": -1.6044238805770874, + "loss": 0.8469, + "nll_loss": 0.7953472137451172, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.07938887178897858, + "rewards/margins": 0.08105353266000748, + "rewards/rejected": -0.16044239699840546, + "step": 9020 + }, + { + "epoch": 1.63, + "grad_norm": 1.4421942234039307, + "learning_rate": 2.7998253020818166e-06, + "log_odds_chosen": 0.8288308382034302, + "log_odds_ratio": -0.5798860788345337, + "logits/chosen": -0.47319093346595764, + "logits/rejected": -0.47874951362609863, + "logps/chosen": -0.9910385012626648, + "logps/rejected": -1.6084444522857666, + "loss": 1.0151, + "nll_loss": 0.9570778012275696, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09910385310649872, + "rewards/margins": 0.06174057722091675, + "rewards/rejected": -0.16084444522857666, + "step": 9030 + }, + { + "epoch": 1.63, + "grad_norm": 1.8703491687774658, + "learning_rate": 2.794002038142379e-06, + "log_odds_chosen": 0.8713129758834839, + "log_odds_ratio": -0.5886852741241455, + "logits/chosen": -0.4230705201625824, + "logits/rejected": -0.4477715492248535, + "logps/chosen": -0.9506195187568665, + "logps/rejected": -1.5880171060562134, + "loss": 0.9558, + "nll_loss": 0.8969224095344543, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09506195783615112, + "rewards/margins": 0.06373975425958633, + "rewards/rejected": -0.15880170464515686, + "step": 9040 + }, + { + "epoch": 1.63, + "grad_norm": 1.8566488027572632, + "learning_rate": 2.7881787742029408e-06, + "log_odds_chosen": 0.9162837862968445, + "log_odds_ratio": -0.5790117979049683, + "logits/chosen": -0.39925554394721985, + "logits/rejected": -0.4360648989677429, + "logps/chosen": -0.8828533887863159, + "logps/rejected": -1.5269739627838135, + "loss": 0.9833, + "nll_loss": 0.9254306554794312, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.08828534185886383, + "rewards/margins": 0.06441205739974976, + "rewards/rejected": -0.15269741415977478, + "step": 9050 + }, + { + "epoch": 1.64, + "grad_norm": 2.002180576324463, + "learning_rate": 2.7823555102635022e-06, + "log_odds_chosen": 0.6226949095726013, + "log_odds_ratio": -0.5851758122444153, + "logits/chosen": -0.42530399560928345, + "logits/rejected": -0.44639119505882263, + "logps/chosen": -0.9246541261672974, + "logps/rejected": -1.3595139980316162, + "loss": 0.9609, + "nll_loss": 0.9023898243904114, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09246542304754257, + "rewards/margins": 0.043485984206199646, + "rewards/rejected": -0.13595139980316162, + "step": 9060 + }, + { + "epoch": 1.64, + "grad_norm": 1.6212151050567627, + "learning_rate": 2.7765322463240645e-06, + "log_odds_chosen": 1.1360653638839722, + "log_odds_ratio": -0.5271188020706177, + "logits/chosen": -0.36451902985572815, + "logits/rejected": -0.40917712450027466, + "logps/chosen": -0.8709207773208618, + "logps/rejected": -1.6949536800384521, + "loss": 0.944, + "nll_loss": 0.8913170099258423, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0870920792222023, + "rewards/margins": 0.08240329474210739, + "rewards/rejected": -0.1694953739643097, + "step": 9070 + }, + { + "epoch": 1.64, + "grad_norm": 1.0041602849960327, + "learning_rate": 2.7707089823846264e-06, + "log_odds_chosen": 1.0473252534866333, + "log_odds_ratio": -0.527521014213562, + "logits/chosen": -0.39088183641433716, + "logits/rejected": -0.42638707160949707, + "logps/chosen": -0.9256051778793335, + "logps/rejected": -1.677172064781189, + "loss": 0.9516, + "nll_loss": 0.8988776206970215, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0925605297088623, + "rewards/margins": 0.07515668123960495, + "rewards/rejected": -0.16771721839904785, + "step": 9080 + }, + { + "epoch": 1.64, + "grad_norm": 1.1408195495605469, + "learning_rate": 2.7648857184451887e-06, + "log_odds_chosen": 1.3326835632324219, + "log_odds_ratio": -0.4328557848930359, + "logits/chosen": -0.3287041485309601, + "logits/rejected": -0.38711437582969666, + "logps/chosen": -0.837693989276886, + "logps/rejected": -1.7427314519882202, + "loss": 0.8344, + "nll_loss": 0.7911303639411926, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.08376939594745636, + "rewards/margins": 0.0905037596821785, + "rewards/rejected": -0.17427316308021545, + "step": 9090 + }, + { + "epoch": 1.64, + "grad_norm": 1.2761762142181396, + "learning_rate": 2.75906245450575e-06, + "log_odds_chosen": 0.9336652755737305, + "log_odds_ratio": -0.551077663898468, + "logits/chosen": -0.4241195321083069, + "logits/rejected": -0.42867010831832886, + "logps/chosen": -0.8934443593025208, + "logps/rejected": -1.5422897338867188, + "loss": 0.9204, + "nll_loss": 0.8653148412704468, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08934443444013596, + "rewards/margins": 0.06488454341888428, + "rewards/rejected": -0.15422898530960083, + "step": 9100 + }, + { + "epoch": 1.65, + "grad_norm": 1.2373225688934326, + "learning_rate": 2.753239190566312e-06, + "log_odds_chosen": 0.9416915774345398, + "log_odds_ratio": -0.520602822303772, + "logits/chosen": -0.39296430349349976, + "logits/rejected": -0.41580742597579956, + "logps/chosen": -1.0003538131713867, + "logps/rejected": -1.6513385772705078, + "loss": 0.9442, + "nll_loss": 0.8921074867248535, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.10003536939620972, + "rewards/margins": 0.06509849429130554, + "rewards/rejected": -0.16513387858867645, + "step": 9110 + }, + { + "epoch": 1.65, + "grad_norm": 1.3835368156433105, + "learning_rate": 2.7474159266268744e-06, + "log_odds_chosen": 0.9459671974182129, + "log_odds_ratio": -0.48943382501602173, + "logits/chosen": -0.4307325482368469, + "logits/rejected": -0.44662827253341675, + "logps/chosen": -0.9193538427352905, + "logps/rejected": -1.5974020957946777, + "loss": 0.9734, + "nll_loss": 0.9244106411933899, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09193538129329681, + "rewards/margins": 0.06780483573675156, + "rewards/rejected": -0.15974020957946777, + "step": 9120 + }, + { + "epoch": 1.65, + "grad_norm": 2.2756528854370117, + "learning_rate": 2.7415926626874363e-06, + "log_odds_chosen": 0.8301292657852173, + "log_odds_ratio": -0.5760712027549744, + "logits/chosen": -0.4282712936401367, + "logits/rejected": -0.4218481183052063, + "logps/chosen": -0.922931969165802, + "logps/rejected": -1.4867548942565918, + "loss": 0.9996, + "nll_loss": 0.9419782757759094, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09229318052530289, + "rewards/margins": 0.05638228729367256, + "rewards/rejected": -0.14867548644542694, + "step": 9130 + }, + { + "epoch": 1.65, + "grad_norm": 0.843051552772522, + "learning_rate": 2.735769398747998e-06, + "log_odds_chosen": 1.2046692371368408, + "log_odds_ratio": -0.4969426095485687, + "logits/chosen": -0.40997037291526794, + "logits/rejected": -0.4544478952884674, + "logps/chosen": -0.878393828868866, + "logps/rejected": -1.7253586053848267, + "loss": 0.8817, + "nll_loss": 0.8320406675338745, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08783938735723495, + "rewards/margins": 0.08469647914171219, + "rewards/rejected": -0.17253586649894714, + "step": 9140 + }, + { + "epoch": 1.65, + "grad_norm": 1.9792512655258179, + "learning_rate": 2.72994613480856e-06, + "log_odds_chosen": 1.3317620754241943, + "log_odds_ratio": -0.4221356511116028, + "logits/chosen": -0.41216516494750977, + "logits/rejected": -0.4431169629096985, + "logps/chosen": -0.8426889181137085, + "logps/rejected": -1.7870067358016968, + "loss": 0.8691, + "nll_loss": 0.8268448114395142, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08426889777183533, + "rewards/margins": 0.0944317951798439, + "rewards/rejected": -0.17870070040225983, + "step": 9150 + }, + { + "epoch": 1.65, + "grad_norm": 2.411402940750122, + "learning_rate": 2.724122870869122e-06, + "log_odds_chosen": 0.6889928579330444, + "log_odds_ratio": -0.5732430219650269, + "logits/chosen": -0.4233470559120178, + "logits/rejected": -0.4282502233982086, + "logps/chosen": -0.9491702318191528, + "logps/rejected": -1.455941081047058, + "loss": 0.9649, + "nll_loss": 0.9075853228569031, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09491701424121857, + "rewards/margins": 0.0506770983338356, + "rewards/rejected": -0.14559412002563477, + "step": 9160 + }, + { + "epoch": 1.66, + "grad_norm": 1.2510401010513306, + "learning_rate": 2.7182996069296842e-06, + "log_odds_chosen": 0.7998046875, + "log_odds_ratio": -0.5143226981163025, + "logits/chosen": -0.4312712252140045, + "logits/rejected": -0.4719986915588379, + "logps/chosen": -0.8872987627983093, + "logps/rejected": -1.4246270656585693, + "loss": 0.8871, + "nll_loss": 0.835619330406189, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08872988075017929, + "rewards/margins": 0.05373283475637436, + "rewards/rejected": -0.14246270060539246, + "step": 9170 + }, + { + "epoch": 1.66, + "grad_norm": 1.4718730449676514, + "learning_rate": 2.712476342990246e-06, + "log_odds_chosen": 0.8228703737258911, + "log_odds_ratio": -0.5644127130508423, + "logits/chosen": -0.43898096680641174, + "logits/rejected": -0.4336237907409668, + "logps/chosen": -0.8965182304382324, + "logps/rejected": -1.4306434392929077, + "loss": 0.9294, + "nll_loss": 0.8729426264762878, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08965182304382324, + "rewards/margins": 0.053412534296512604, + "rewards/rejected": -0.14306434988975525, + "step": 9180 + }, + { + "epoch": 1.66, + "grad_norm": 1.5248944759368896, + "learning_rate": 2.7066530790508076e-06, + "log_odds_chosen": 0.9961272478103638, + "log_odds_ratio": -0.4423357844352722, + "logits/chosen": -0.4377953112125397, + "logits/rejected": -0.4404810070991516, + "logps/chosen": -0.9476546049118042, + "logps/rejected": -1.602682113647461, + "loss": 1.0167, + "nll_loss": 0.9725112915039062, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.09476545453071594, + "rewards/margins": 0.06550275534391403, + "rewards/rejected": -0.16026821732521057, + "step": 9190 + }, + { + "epoch": 1.66, + "grad_norm": 1.005291223526001, + "learning_rate": 2.70082981511137e-06, + "log_odds_chosen": 0.8400213122367859, + "log_odds_ratio": -0.5274345278739929, + "logits/chosen": -0.4171048104763031, + "logits/rejected": -0.4462948739528656, + "logps/chosen": -0.9608150720596313, + "logps/rejected": -1.5505651235580444, + "loss": 0.9606, + "nll_loss": 0.9078795313835144, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09608151018619537, + "rewards/margins": 0.05897500365972519, + "rewards/rejected": -0.15505652129650116, + "step": 9200 + }, + { + "epoch": 1.66, + "grad_norm": 3.342343330383301, + "learning_rate": 2.6950065511719318e-06, + "log_odds_chosen": 0.9597989916801453, + "log_odds_ratio": -0.5116977691650391, + "logits/chosen": -0.4211476445198059, + "logits/rejected": -0.4668583869934082, + "logps/chosen": -0.9648736119270325, + "logps/rejected": -1.6064956188201904, + "loss": 0.9446, + "nll_loss": 0.8934422731399536, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09648735821247101, + "rewards/margins": 0.06416221708059311, + "rewards/rejected": -0.16064956784248352, + "step": 9210 + }, + { + "epoch": 1.67, + "grad_norm": 1.7705780267715454, + "learning_rate": 2.6891832872324937e-06, + "log_odds_chosen": 0.861775279045105, + "log_odds_ratio": -0.5697323679924011, + "logits/chosen": -0.44634613394737244, + "logits/rejected": -0.4355178475379944, + "logps/chosen": -0.8917131423950195, + "logps/rejected": -1.510679006576538, + "loss": 0.9564, + "nll_loss": 0.8994709253311157, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08917130529880524, + "rewards/margins": 0.06189659237861633, + "rewards/rejected": -0.15106788277626038, + "step": 9220 + }, + { + "epoch": 1.67, + "grad_norm": 0.8565041422843933, + "learning_rate": 2.683360023293056e-06, + "log_odds_chosen": 0.9789537191390991, + "log_odds_ratio": -0.5268505215644836, + "logits/chosen": -0.4173552095890045, + "logits/rejected": -0.4489436745643616, + "logps/chosen": -0.9345428347587585, + "logps/rejected": -1.6449741125106812, + "loss": 0.9736, + "nll_loss": 0.9208728671073914, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0934542864561081, + "rewards/margins": 0.07104314863681793, + "rewards/rejected": -0.16449742019176483, + "step": 9230 + }, + { + "epoch": 1.67, + "grad_norm": 2.507345199584961, + "learning_rate": 2.6775367593536174e-06, + "log_odds_chosen": 1.2452746629714966, + "log_odds_ratio": -0.493520587682724, + "logits/chosen": -0.416081964969635, + "logits/rejected": -0.4274824559688568, + "logps/chosen": -0.9222303628921509, + "logps/rejected": -1.828566551208496, + "loss": 0.9633, + "nll_loss": 0.9139355421066284, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09222304075956345, + "rewards/margins": 0.09063363820314407, + "rewards/rejected": -0.18285667896270752, + "step": 9240 + }, + { + "epoch": 1.67, + "grad_norm": 1.6871551275253296, + "learning_rate": 2.6717134954141793e-06, + "log_odds_chosen": 1.0459063053131104, + "log_odds_ratio": -0.513852596282959, + "logits/chosen": -0.3885519504547119, + "logits/rejected": -0.40226420760154724, + "logps/chosen": -0.8817213773727417, + "logps/rejected": -1.6192827224731445, + "loss": 0.9459, + "nll_loss": 0.8944852948188782, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08817213773727417, + "rewards/margins": 0.07375612109899521, + "rewards/rejected": -0.16192826628684998, + "step": 9250 + }, + { + "epoch": 1.67, + "grad_norm": 1.1326770782470703, + "learning_rate": 2.6658902314747416e-06, + "log_odds_chosen": 0.49963292479515076, + "log_odds_ratio": -0.6053067445755005, + "logits/chosen": -0.459317147731781, + "logits/rejected": -0.4486822187900543, + "logps/chosen": -1.020218849182129, + "logps/rejected": -1.3288309574127197, + "loss": 1.0477, + "nll_loss": 0.9871250987052917, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10202188789844513, + "rewards/margins": 0.030861202627420425, + "rewards/rejected": -0.13288308680057526, + "step": 9260 + }, + { + "epoch": 1.67, + "grad_norm": 2.13757061958313, + "learning_rate": 2.6600669675353035e-06, + "log_odds_chosen": 1.017632246017456, + "log_odds_ratio": -0.5229853391647339, + "logits/chosen": -0.4418388307094574, + "logits/rejected": -0.4649677276611328, + "logps/chosen": -0.8779067993164062, + "logps/rejected": -1.5976464748382568, + "loss": 0.9128, + "nll_loss": 0.860500693321228, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08779068291187286, + "rewards/margins": 0.07197396457195282, + "rewards/rejected": -0.15976464748382568, + "step": 9270 + }, + { + "epoch": 1.68, + "grad_norm": 1.8838378190994263, + "learning_rate": 2.6542437035958654e-06, + "log_odds_chosen": 1.0137577056884766, + "log_odds_ratio": -0.5111135244369507, + "logits/chosen": -0.4243897497653961, + "logits/rejected": -0.479489803314209, + "logps/chosen": -0.9173609614372253, + "logps/rejected": -1.6629959344863892, + "loss": 0.9531, + "nll_loss": 0.901984691619873, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09173610806465149, + "rewards/margins": 0.07456348836421967, + "rewards/rejected": -0.16629959642887115, + "step": 9280 + }, + { + "epoch": 1.68, + "grad_norm": 0.8887907862663269, + "learning_rate": 2.6484204396564273e-06, + "log_odds_chosen": 1.2527879476547241, + "log_odds_ratio": -0.4821585714817047, + "logits/chosen": -0.3953564763069153, + "logits/rejected": -0.41715526580810547, + "logps/chosen": -0.8679831624031067, + "logps/rejected": -1.749703049659729, + "loss": 0.9136, + "nll_loss": 0.8653751611709595, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08679831027984619, + "rewards/margins": 0.08817199617624283, + "rewards/rejected": -0.17497031390666962, + "step": 9290 + }, + { + "epoch": 1.68, + "grad_norm": 3.8302605152130127, + "learning_rate": 2.642597175716989e-06, + "log_odds_chosen": 1.1876541376113892, + "log_odds_ratio": -0.49830159544944763, + "logits/chosen": -0.4293610155582428, + "logits/rejected": -0.45646485686302185, + "logps/chosen": -0.8713734745979309, + "logps/rejected": -1.6922852993011475, + "loss": 1.0033, + "nll_loss": 0.9534996151924133, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08713734894990921, + "rewards/margins": 0.08209117501974106, + "rewards/rejected": -0.16922852396965027, + "step": 9300 + }, + { + "epoch": 1.68, + "grad_norm": 2.138373613357544, + "learning_rate": 2.6367739117775515e-06, + "log_odds_chosen": 1.16695237159729, + "log_odds_ratio": -0.46931418776512146, + "logits/chosen": -0.39013200998306274, + "logits/rejected": -0.41718345880508423, + "logps/chosen": -0.8600034713745117, + "logps/rejected": -1.6642802953720093, + "loss": 0.8434, + "nll_loss": 0.7965083718299866, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08600034564733505, + "rewards/margins": 0.08042767643928528, + "rewards/rejected": -0.16642801463603973, + "step": 9310 + }, + { + "epoch": 1.68, + "grad_norm": 1.1626324653625488, + "learning_rate": 2.630950647838113e-06, + "log_odds_chosen": 0.8146978616714478, + "log_odds_ratio": -0.5507025718688965, + "logits/chosen": -0.4694862365722656, + "logits/rejected": -0.4689878821372986, + "logps/chosen": -0.948291003704071, + "logps/rejected": -1.5086925029754639, + "loss": 0.9501, + "nll_loss": 0.8950142860412598, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09482909739017487, + "rewards/margins": 0.056040145456790924, + "rewards/rejected": -0.150869220495224, + "step": 9320 + }, + { + "epoch": 1.69, + "grad_norm": 2.426990509033203, + "learning_rate": 2.625127383898675e-06, + "log_odds_chosen": 0.7397544384002686, + "log_odds_ratio": -0.5745836496353149, + "logits/chosen": -0.4914630949497223, + "logits/rejected": -0.48593616485595703, + "logps/chosen": -0.9268978238105774, + "logps/rejected": -1.4810400009155273, + "loss": 1.0306, + "nll_loss": 0.9731773138046265, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09268978983163834, + "rewards/margins": 0.05541421100497246, + "rewards/rejected": -0.1481039822101593, + "step": 9330 + }, + { + "epoch": 1.69, + "grad_norm": 1.5938129425048828, + "learning_rate": 2.619304119959237e-06, + "log_odds_chosen": 0.9346494674682617, + "log_odds_ratio": -0.491645872592926, + "logits/chosen": -0.4693544805049896, + "logits/rejected": -0.46639999747276306, + "logps/chosen": -0.9760904312133789, + "logps/rejected": -1.650472640991211, + "loss": 0.9957, + "nll_loss": 0.9465456008911133, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09760904312133789, + "rewards/margins": 0.06743822246789932, + "rewards/rejected": -0.16504724323749542, + "step": 9340 + }, + { + "epoch": 1.69, + "grad_norm": 1.5292596817016602, + "learning_rate": 2.613480856019799e-06, + "log_odds_chosen": 0.6154406666755676, + "log_odds_ratio": -0.5639275312423706, + "logits/chosen": -0.4870131015777588, + "logits/rejected": -0.4909876883029938, + "logps/chosen": -0.8956283330917358, + "logps/rejected": -1.3006092309951782, + "loss": 0.9209, + "nll_loss": 0.8644782900810242, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08956284075975418, + "rewards/margins": 0.04049808531999588, + "rewards/rejected": -0.13006091117858887, + "step": 9350 + }, + { + "epoch": 1.69, + "grad_norm": 0.821647047996521, + "learning_rate": 2.607657592080361e-06, + "log_odds_chosen": 1.121782660484314, + "log_odds_ratio": -0.4775637090206146, + "logits/chosen": -0.4000754952430725, + "logits/rejected": -0.4561656415462494, + "logps/chosen": -0.8684180974960327, + "logps/rejected": -1.652954339981079, + "loss": 0.947, + "nll_loss": 0.8992889523506165, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08684180676937103, + "rewards/margins": 0.07845362275838852, + "rewards/rejected": -0.16529543697834015, + "step": 9360 + }, + { + "epoch": 1.69, + "grad_norm": 1.416046380996704, + "learning_rate": 2.601834328140923e-06, + "log_odds_chosen": 1.0560827255249023, + "log_odds_ratio": -0.48382264375686646, + "logits/chosen": -0.485282838344574, + "logits/rejected": -0.4757692217826843, + "logps/chosen": -0.85932856798172, + "logps/rejected": -1.613471269607544, + "loss": 0.9261, + "nll_loss": 0.8776809573173523, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08593286573886871, + "rewards/margins": 0.0754142552614212, + "rewards/rejected": -0.16134712100028992, + "step": 9370 + }, + { + "epoch": 1.69, + "grad_norm": 1.1823846101760864, + "learning_rate": 2.5960110642014847e-06, + "log_odds_chosen": 1.0517069101333618, + "log_odds_ratio": -0.49165159463882446, + "logits/chosen": -0.4569578170776367, + "logits/rejected": -0.41528910398483276, + "logps/chosen": -0.9491796493530273, + "logps/rejected": -1.6688644886016846, + "loss": 0.934, + "nll_loss": 0.8848093152046204, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09491796791553497, + "rewards/margins": 0.07196847349405289, + "rewards/rejected": -0.16688643395900726, + "step": 9380 + }, + { + "epoch": 1.7, + "grad_norm": 1.1237273216247559, + "learning_rate": 2.590187800262047e-06, + "log_odds_chosen": 1.1736353635787964, + "log_odds_ratio": -0.47512874007225037, + "logits/chosen": -0.41880425810813904, + "logits/rejected": -0.39208707213401794, + "logps/chosen": -0.8113826513290405, + "logps/rejected": -1.6218392848968506, + "loss": 0.9054, + "nll_loss": 0.8578837513923645, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08113826811313629, + "rewards/margins": 0.08104566484689713, + "rewards/rejected": -0.162183940410614, + "step": 9390 + }, + { + "epoch": 1.7, + "grad_norm": 1.7111291885375977, + "learning_rate": 2.584364536322609e-06, + "log_odds_chosen": 0.8534847497940063, + "log_odds_ratio": -0.5059072375297546, + "logits/chosen": -0.48690375685691833, + "logits/rejected": -0.4755523204803467, + "logps/chosen": -0.9035647511482239, + "logps/rejected": -1.4580377340316772, + "loss": 0.9428, + "nll_loss": 0.8922485113143921, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09035647660493851, + "rewards/margins": 0.05544731765985489, + "rewards/rejected": -0.1458037793636322, + "step": 9400 + }, + { + "epoch": 1.7, + "grad_norm": 1.1456893682479858, + "learning_rate": 2.5785412723831704e-06, + "log_odds_chosen": 1.171532392501831, + "log_odds_ratio": -0.50825035572052, + "logits/chosen": -0.370635986328125, + "logits/rejected": -0.3998872935771942, + "logps/chosen": -0.8561755418777466, + "logps/rejected": -1.715456247329712, + "loss": 0.9125, + "nll_loss": 0.8616338968276978, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0856175571680069, + "rewards/margins": 0.08592807501554489, + "rewards/rejected": -0.1715456247329712, + "step": 9410 + }, + { + "epoch": 1.7, + "grad_norm": 1.8371309041976929, + "learning_rate": 2.5727180084437327e-06, + "log_odds_chosen": 1.2050302028656006, + "log_odds_ratio": -0.44126567244529724, + "logits/chosen": -0.4281534254550934, + "logits/rejected": -0.43468666076660156, + "logps/chosen": -0.8962228894233704, + "logps/rejected": -1.6920740604400635, + "loss": 0.9079, + "nll_loss": 0.8637346029281616, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08962228894233704, + "rewards/margins": 0.07958510518074036, + "rewards/rejected": -0.169207364320755, + "step": 9420 + }, + { + "epoch": 1.7, + "grad_norm": 2.1131508350372314, + "learning_rate": 2.5668947445042945e-06, + "log_odds_chosen": 1.251560091972351, + "log_odds_ratio": -0.48243609070777893, + "logits/chosen": -0.43499651551246643, + "logits/rejected": -0.4199391305446625, + "logps/chosen": -0.9016925692558289, + "logps/rejected": -1.7924721240997314, + "loss": 0.9366, + "nll_loss": 0.8883956074714661, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.090169258415699, + "rewards/margins": 0.08907793462276459, + "rewards/rejected": -0.1792472004890442, + "step": 9430 + }, + { + "epoch": 1.71, + "grad_norm": 2.5323009490966797, + "learning_rate": 2.5610714805648564e-06, + "log_odds_chosen": 0.9132372736930847, + "log_odds_ratio": -0.5154244899749756, + "logits/chosen": -0.4603002667427063, + "logits/rejected": -0.4696424901485443, + "logps/chosen": -0.9054134488105774, + "logps/rejected": -1.5461491346359253, + "loss": 0.9467, + "nll_loss": 0.8951939344406128, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09054134041070938, + "rewards/margins": 0.0640735775232315, + "rewards/rejected": -0.1546149104833603, + "step": 9440 + }, + { + "epoch": 1.71, + "grad_norm": 1.876884937286377, + "learning_rate": 2.5552482166254187e-06, + "log_odds_chosen": 0.6280218958854675, + "log_odds_ratio": -0.5808584690093994, + "logits/chosen": -0.41892462968826294, + "logits/rejected": -0.45282214879989624, + "logps/chosen": -0.9991976618766785, + "logps/rejected": -1.454352617263794, + "loss": 0.952, + "nll_loss": 0.8938835263252258, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09991975873708725, + "rewards/margins": 0.045515503734350204, + "rewards/rejected": -0.14543527364730835, + "step": 9450 + }, + { + "epoch": 1.71, + "grad_norm": 1.4596505165100098, + "learning_rate": 2.54942495268598e-06, + "log_odds_chosen": 0.981662392616272, + "log_odds_ratio": -0.5320886373519897, + "logits/chosen": -0.43657493591308594, + "logits/rejected": -0.4576547145843506, + "logps/chosen": -0.8840090036392212, + "logps/rejected": -1.5034620761871338, + "loss": 1.0212, + "nll_loss": 0.9680083394050598, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08840090036392212, + "rewards/margins": 0.06194530799984932, + "rewards/rejected": -0.15034620463848114, + "step": 9460 + }, + { + "epoch": 1.71, + "grad_norm": 0.9314261674880981, + "learning_rate": 2.543601688746542e-06, + "log_odds_chosen": 1.2412148714065552, + "log_odds_ratio": -0.44239601492881775, + "logits/chosen": -0.4623107314109802, + "logits/rejected": -0.48735103011131287, + "logps/chosen": -0.9057219624519348, + "logps/rejected": -1.7566516399383545, + "loss": 0.9482, + "nll_loss": 0.9039756059646606, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.09057219326496124, + "rewards/margins": 0.08509297668933868, + "rewards/rejected": -0.17566516995429993, + "step": 9470 + }, + { + "epoch": 1.71, + "grad_norm": 1.9713478088378906, + "learning_rate": 2.5377784248071044e-06, + "log_odds_chosen": 1.1023633480072021, + "log_odds_ratio": -0.47931233048439026, + "logits/chosen": -0.4793943762779236, + "logits/rejected": -0.48942771553993225, + "logps/chosen": -0.8621703386306763, + "logps/rejected": -1.6248624324798584, + "loss": 0.9769, + "nll_loss": 0.9289461970329285, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08621703088283539, + "rewards/margins": 0.07626921683549881, + "rewards/rejected": -0.1624862402677536, + "step": 9480 + }, + { + "epoch": 1.71, + "grad_norm": 1.8307769298553467, + "learning_rate": 2.5319551608676663e-06, + "log_odds_chosen": 0.8780630826950073, + "log_odds_ratio": -0.5695803165435791, + "logits/chosen": -0.4494501054286957, + "logits/rejected": -0.41476479172706604, + "logps/chosen": -0.9255537986755371, + "logps/rejected": -1.55754816532135, + "loss": 1.0147, + "nll_loss": 0.9577773809432983, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09255538135766983, + "rewards/margins": 0.06319941580295563, + "rewards/rejected": -0.15575480461120605, + "step": 9490 + }, + { + "epoch": 1.72, + "grad_norm": 0.9392945766448975, + "learning_rate": 2.526131896928228e-06, + "log_odds_chosen": 1.1123392581939697, + "log_odds_ratio": -0.5118842720985413, + "logits/chosen": -0.44842857122421265, + "logits/rejected": -0.45434433221817017, + "logps/chosen": -0.8633874654769897, + "logps/rejected": -1.617057204246521, + "loss": 0.8661, + "nll_loss": 0.814924418926239, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08633874356746674, + "rewards/margins": 0.07536698877811432, + "rewards/rejected": -0.16170573234558105, + "step": 9500 + }, + { + "epoch": 1.72, + "grad_norm": 1.8088247776031494, + "learning_rate": 2.52030863298879e-06, + "log_odds_chosen": 1.3172489404678345, + "log_odds_ratio": -0.4467700123786926, + "logits/chosen": -0.43645501136779785, + "logits/rejected": -0.44458237290382385, + "logps/chosen": -0.7922109961509705, + "logps/rejected": -1.7160981893539429, + "loss": 0.9327, + "nll_loss": 0.8880230784416199, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.07922110706567764, + "rewards/margins": 0.09238873422145844, + "rewards/rejected": -0.17160983383655548, + "step": 9510 + }, + { + "epoch": 1.72, + "grad_norm": 2.9287898540496826, + "learning_rate": 2.514485369049352e-06, + "log_odds_chosen": 0.775164783000946, + "log_odds_ratio": -0.5303457975387573, + "logits/chosen": -0.4484976828098297, + "logits/rejected": -0.45544663071632385, + "logps/chosen": -0.9658139944076538, + "logps/rejected": -1.4660108089447021, + "loss": 0.9324, + "nll_loss": 0.8793715238571167, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09658139944076538, + "rewards/margins": 0.05001969262957573, + "rewards/rejected": -0.14660108089447021, + "step": 9520 + }, + { + "epoch": 1.72, + "grad_norm": 1.672947645187378, + "learning_rate": 2.5086621051099143e-06, + "log_odds_chosen": 1.0569748878479004, + "log_odds_ratio": -0.4957321584224701, + "logits/chosen": -0.42946892976760864, + "logits/rejected": -0.41913193464279175, + "logps/chosen": -0.9915930032730103, + "logps/rejected": -1.7599254846572876, + "loss": 0.9782, + "nll_loss": 0.9286025166511536, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09915930032730103, + "rewards/margins": 0.07683324813842773, + "rewards/rejected": -0.17599254846572876, + "step": 9530 + }, + { + "epoch": 1.72, + "grad_norm": 1.7308794260025024, + "learning_rate": 2.502838841170476e-06, + "log_odds_chosen": 1.1496503353118896, + "log_odds_ratio": -0.5147876739501953, + "logits/chosen": -0.4033949375152588, + "logits/rejected": -0.40659064054489136, + "logps/chosen": -0.9279058575630188, + "logps/rejected": -1.677899718284607, + "loss": 0.9095, + "nll_loss": 0.8580510020256042, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09279057383537292, + "rewards/margins": 0.07499939203262329, + "rewards/rejected": -0.16778996586799622, + "step": 9540 + }, + { + "epoch": 1.73, + "grad_norm": 1.5058820247650146, + "learning_rate": 2.4970155772310376e-06, + "log_odds_chosen": 0.6924060583114624, + "log_odds_ratio": -0.5773219466209412, + "logits/chosen": -0.42662039399147034, + "logits/rejected": -0.4317251145839691, + "logps/chosen": -0.9882136583328247, + "logps/rejected": -1.4816371202468872, + "loss": 0.9622, + "nll_loss": 0.9044473767280579, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09882136434316635, + "rewards/margins": 0.04934234172105789, + "rewards/rejected": -0.14816370606422424, + "step": 9550 + }, + { + "epoch": 1.73, + "grad_norm": 1.561524510383606, + "learning_rate": 2.4911923132916e-06, + "log_odds_chosen": 0.9440113306045532, + "log_odds_ratio": -0.5216168165206909, + "logits/chosen": -0.4172740578651428, + "logits/rejected": -0.4479880928993225, + "logps/chosen": -0.9342236518859863, + "logps/rejected": -1.585782766342163, + "loss": 0.9582, + "nll_loss": 0.906027615070343, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09342236816883087, + "rewards/margins": 0.06515590846538544, + "rewards/rejected": -0.1585782915353775, + "step": 9560 + }, + { + "epoch": 1.73, + "grad_norm": 1.6601184606552124, + "learning_rate": 2.485369049352162e-06, + "log_odds_chosen": 0.9561376571655273, + "log_odds_ratio": -0.4933817386627197, + "logits/chosen": -0.48122167587280273, + "logits/rejected": -0.5029059648513794, + "logps/chosen": -1.0312833786010742, + "logps/rejected": -1.712125539779663, + "loss": 1.0299, + "nll_loss": 0.9805164337158203, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.1031283363699913, + "rewards/margins": 0.068084217607975, + "rewards/rejected": -0.1712125688791275, + "step": 9570 + }, + { + "epoch": 1.73, + "grad_norm": 1.6917434930801392, + "learning_rate": 2.4795457854127237e-06, + "log_odds_chosen": 0.5046578645706177, + "log_odds_ratio": -0.6268638372421265, + "logits/chosen": -0.4347049593925476, + "logits/rejected": -0.4096949100494385, + "logps/chosen": -0.9971641302108765, + "logps/rejected": -1.3836841583251953, + "loss": 0.9449, + "nll_loss": 0.882180392742157, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09971641004085541, + "rewards/margins": 0.038651980459690094, + "rewards/rejected": -0.1383683979511261, + "step": 9580 + }, + { + "epoch": 1.73, + "grad_norm": 2.294053792953491, + "learning_rate": 2.4737225214732856e-06, + "log_odds_chosen": 0.924883246421814, + "log_odds_ratio": -0.5290486216545105, + "logits/chosen": -0.4396088719367981, + "logits/rejected": -0.4460625648498535, + "logps/chosen": -0.9009316563606262, + "logps/rejected": -1.5800864696502686, + "loss": 0.9309, + "nll_loss": 0.8779793977737427, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09009316563606262, + "rewards/margins": 0.06791548430919647, + "rewards/rejected": -0.1580086499452591, + "step": 9590 + }, + { + "epoch": 1.73, + "grad_norm": 1.6921731233596802, + "learning_rate": 2.4678992575338475e-06, + "log_odds_chosen": 1.2375385761260986, + "log_odds_ratio": -0.4622219204902649, + "logits/chosen": -0.41445812582969666, + "logits/rejected": -0.43146657943725586, + "logps/chosen": -0.8876203298568726, + "logps/rejected": -1.799599289894104, + "loss": 0.9212, + "nll_loss": 0.8750225901603699, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08876203000545502, + "rewards/margins": 0.0911978930234909, + "rewards/rejected": -0.17995992302894592, + "step": 9600 + }, + { + "epoch": 1.74, + "grad_norm": 0.9766725897789001, + "learning_rate": 2.4620759935944098e-06, + "log_odds_chosen": 0.7962125539779663, + "log_odds_ratio": -0.5203061103820801, + "logits/chosen": -0.4127267897129059, + "logits/rejected": -0.41843312978744507, + "logps/chosen": -0.917259693145752, + "logps/rejected": -1.4751300811767578, + "loss": 0.9822, + "nll_loss": 0.9301362037658691, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09172599017620087, + "rewards/margins": 0.05578702688217163, + "rewards/rejected": -0.1475130021572113, + "step": 9610 + }, + { + "epoch": 1.74, + "grad_norm": 1.1548055410385132, + "learning_rate": 2.4562527296549717e-06, + "log_odds_chosen": 1.2751153707504272, + "log_odds_ratio": -0.4778338372707367, + "logits/chosen": -0.4446820616722107, + "logits/rejected": -0.4349389970302582, + "logps/chosen": -0.8049659729003906, + "logps/rejected": -1.6605899333953857, + "loss": 0.9126, + "nll_loss": 0.8648591041564941, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08049659430980682, + "rewards/margins": 0.08556241542100906, + "rewards/rejected": -0.1660590022802353, + "step": 9620 + }, + { + "epoch": 1.74, + "grad_norm": 2.5714190006256104, + "learning_rate": 2.450429465715533e-06, + "log_odds_chosen": 1.2102885246276855, + "log_odds_ratio": -0.4847165644168854, + "logits/chosen": -0.37883883714675903, + "logits/rejected": -0.40896421670913696, + "logps/chosen": -0.7805262804031372, + "logps/rejected": -1.6285765171051025, + "loss": 0.8425, + "nll_loss": 0.7940112352371216, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.07805263251066208, + "rewards/margins": 0.08480501919984818, + "rewards/rejected": -0.16285763680934906, + "step": 9630 + }, + { + "epoch": 1.74, + "grad_norm": 1.6911524534225464, + "learning_rate": 2.4446062017760954e-06, + "log_odds_chosen": 0.9459424018859863, + "log_odds_ratio": -0.5352808833122253, + "logits/chosen": -0.46002206206321716, + "logits/rejected": -0.5104098320007324, + "logps/chosen": -0.8379015922546387, + "logps/rejected": -1.4783127307891846, + "loss": 1.0092, + "nll_loss": 0.9556834101676941, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08379016816616058, + "rewards/margins": 0.0640411227941513, + "rewards/rejected": -0.1478312909603119, + "step": 9640 + }, + { + "epoch": 1.74, + "grad_norm": 1.3632971048355103, + "learning_rate": 2.4387829378366573e-06, + "log_odds_chosen": 0.7504720687866211, + "log_odds_ratio": -0.5602525472640991, + "logits/chosen": -0.4938369691371918, + "logits/rejected": -0.4717886447906494, + "logps/chosen": -0.8749774098396301, + "logps/rejected": -1.4180119037628174, + "loss": 0.96, + "nll_loss": 0.9039288759231567, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08749774843454361, + "rewards/margins": 0.05430345609784126, + "rewards/rejected": -0.14180120825767517, + "step": 9650 + }, + { + "epoch": 1.74, + "grad_norm": 1.4129406213760376, + "learning_rate": 2.432959673897219e-06, + "log_odds_chosen": 0.9551762342453003, + "log_odds_ratio": -0.5054140686988831, + "logits/chosen": -0.40426698327064514, + "logits/rejected": -0.4051760137081146, + "logps/chosen": -0.8844103813171387, + "logps/rejected": -1.5333651304244995, + "loss": 0.9301, + "nll_loss": 0.8796060681343079, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08844103664159775, + "rewards/margins": 0.06489548832178116, + "rewards/rejected": -0.1533365249633789, + "step": 9660 + }, + { + "epoch": 1.75, + "grad_norm": 1.6229116916656494, + "learning_rate": 2.4271364099577815e-06, + "log_odds_chosen": 1.0696156024932861, + "log_odds_ratio": -0.4787077307701111, + "logits/chosen": -0.44113072752952576, + "logits/rejected": -0.44910669326782227, + "logps/chosen": -0.8486385345458984, + "logps/rejected": -1.6105642318725586, + "loss": 0.9211, + "nll_loss": 0.873221755027771, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08486385643482208, + "rewards/margins": 0.07619258016347885, + "rewards/rejected": -0.16105642914772034, + "step": 9670 + }, + { + "epoch": 1.75, + "grad_norm": 2.514098644256592, + "learning_rate": 2.421313146018343e-06, + "log_odds_chosen": 0.8378480076789856, + "log_odds_ratio": -0.48678427934646606, + "logits/chosen": -0.46361297369003296, + "logits/rejected": -0.45725908875465393, + "logps/chosen": -0.8935993313789368, + "logps/rejected": -1.4331048727035522, + "loss": 0.9382, + "nll_loss": 0.8894980549812317, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08935993909835815, + "rewards/margins": 0.053950559347867966, + "rewards/rejected": -0.14331048727035522, + "step": 9680 + }, + { + "epoch": 1.75, + "grad_norm": 0.9816827774047852, + "learning_rate": 2.415489882078905e-06, + "log_odds_chosen": 1.1938976049423218, + "log_odds_ratio": -0.48203128576278687, + "logits/chosen": -0.4673733711242676, + "logits/rejected": -0.47799110412597656, + "logps/chosen": -0.8920567631721497, + "logps/rejected": -1.721644401550293, + "loss": 0.9587, + "nll_loss": 0.9105375409126282, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08920568227767944, + "rewards/margins": 0.08295877277851105, + "rewards/rejected": -0.1721644401550293, + "step": 9690 + }, + { + "epoch": 1.75, + "grad_norm": 1.19546377658844, + "learning_rate": 2.409666618139467e-06, + "log_odds_chosen": 1.1333242654800415, + "log_odds_ratio": -0.5056787729263306, + "logits/chosen": -0.4303444027900696, + "logits/rejected": -0.41326698660850525, + "logps/chosen": -0.8802807927131653, + "logps/rejected": -1.6464083194732666, + "loss": 0.8832, + "nll_loss": 0.8326579332351685, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08802806586027145, + "rewards/margins": 0.07661274820566177, + "rewards/rejected": -0.16464082896709442, + "step": 9700 + }, + { + "epoch": 1.75, + "grad_norm": 0.8413112759590149, + "learning_rate": 2.403843354200029e-06, + "log_odds_chosen": 0.7889858484268188, + "log_odds_ratio": -0.565024733543396, + "logits/chosen": -0.41838541626930237, + "logits/rejected": -0.4011848568916321, + "logps/chosen": -0.8995911478996277, + "logps/rejected": -1.4334033727645874, + "loss": 0.9728, + "nll_loss": 0.9163390398025513, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08995912224054337, + "rewards/margins": 0.05338122323155403, + "rewards/rejected": -0.1433403491973877, + "step": 9710 + }, + { + "epoch": 1.76, + "grad_norm": 1.572912573814392, + "learning_rate": 2.3980200902605914e-06, + "log_odds_chosen": 1.1054677963256836, + "log_odds_ratio": -0.5001887083053589, + "logits/chosen": -0.4604727625846863, + "logits/rejected": -0.49845361709594727, + "logps/chosen": -0.8892809748649597, + "logps/rejected": -1.6379826068878174, + "loss": 1.0193, + "nll_loss": 0.9693231582641602, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08892810344696045, + "rewards/margins": 0.07487015426158905, + "rewards/rejected": -0.1637982577085495, + "step": 9720 + }, + { + "epoch": 1.76, + "grad_norm": 0.9494357705116272, + "learning_rate": 2.392196826321153e-06, + "log_odds_chosen": 1.0644892454147339, + "log_odds_ratio": -0.5272237658500671, + "logits/chosen": -0.4514384865760803, + "logits/rejected": -0.45779746770858765, + "logps/chosen": -0.8867007493972778, + "logps/rejected": -1.6791248321533203, + "loss": 1.0191, + "nll_loss": 0.9663643836975098, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08867006003856659, + "rewards/margins": 0.07924243062734604, + "rewards/rejected": -0.16791249811649323, + "step": 9730 + }, + { + "epoch": 1.76, + "grad_norm": 1.5654778480529785, + "learning_rate": 2.3863735623817147e-06, + "log_odds_chosen": 0.6653653979301453, + "log_odds_ratio": -0.578784167766571, + "logits/chosen": -0.4321361482143402, + "logits/rejected": -0.42093291878700256, + "logps/chosen": -0.9089535474777222, + "logps/rejected": -1.3596817255020142, + "loss": 0.9331, + "nll_loss": 0.8752476572990417, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09089535474777222, + "rewards/margins": 0.04507281631231308, + "rewards/rejected": -0.1359681636095047, + "step": 9740 + }, + { + "epoch": 1.76, + "grad_norm": 1.4848594665527344, + "learning_rate": 2.380550298442277e-06, + "log_odds_chosen": 0.9823587536811829, + "log_odds_ratio": -0.5315570831298828, + "logits/chosen": -0.4404227137565613, + "logits/rejected": -0.4610070288181305, + "logps/chosen": -0.9339573979377747, + "logps/rejected": -1.6493076086044312, + "loss": 1.0242, + "nll_loss": 0.971057116985321, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09339574724435806, + "rewards/margins": 0.07153502851724625, + "rewards/rejected": -0.16493076086044312, + "step": 9750 + }, + { + "epoch": 1.76, + "grad_norm": 1.2012207508087158, + "learning_rate": 2.374727034502839e-06, + "log_odds_chosen": 1.0185325145721436, + "log_odds_ratio": -0.5244266390800476, + "logits/chosen": -0.45023947954177856, + "logits/rejected": -0.488433301448822, + "logps/chosen": -0.9145146608352661, + "logps/rejected": -1.5999908447265625, + "loss": 1.0072, + "nll_loss": 0.9547730684280396, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09145145863294601, + "rewards/margins": 0.06854760646820068, + "rewards/rejected": -0.1599990725517273, + "step": 9760 + }, + { + "epoch": 1.76, + "grad_norm": 1.891196370124817, + "learning_rate": 2.3689037705634004e-06, + "log_odds_chosen": 0.9177335500717163, + "log_odds_ratio": -0.45958462357521057, + "logits/chosen": -0.4622717499732971, + "logits/rejected": -0.46385353803634644, + "logps/chosen": -0.9317830801010132, + "logps/rejected": -1.5283830165863037, + "loss": 1.0005, + "nll_loss": 0.9544920921325684, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.09317831695079803, + "rewards/margins": 0.059659987688064575, + "rewards/rejected": -0.1528383046388626, + "step": 9770 + }, + { + "epoch": 1.77, + "grad_norm": 2.1724750995635986, + "learning_rate": 2.3630805066239627e-06, + "log_odds_chosen": 1.292080044746399, + "log_odds_ratio": -0.4301665425300598, + "logits/chosen": -0.3887042701244354, + "logits/rejected": -0.3993460536003113, + "logps/chosen": -0.8036476373672485, + "logps/rejected": -1.6760154962539673, + "loss": 0.8367, + "nll_loss": 0.7936657667160034, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08036476373672485, + "rewards/margins": 0.08723679184913635, + "rewards/rejected": -0.1676015555858612, + "step": 9780 + }, + { + "epoch": 1.77, + "grad_norm": 2.153475761413574, + "learning_rate": 2.3572572426845246e-06, + "log_odds_chosen": 0.9047862887382507, + "log_odds_ratio": -0.5216431617736816, + "logits/chosen": -0.4829631745815277, + "logits/rejected": -0.4913422167301178, + "logps/chosen": -0.9199289083480835, + "logps/rejected": -1.498349905014038, + "loss": 1.0016, + "nll_loss": 0.9494854211807251, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09199289977550507, + "rewards/margins": 0.057842087000608444, + "rewards/rejected": -0.14983497560024261, + "step": 9790 + }, + { + "epoch": 1.77, + "grad_norm": 2.112905263900757, + "learning_rate": 2.3514339787450865e-06, + "log_odds_chosen": 1.2953317165374756, + "log_odds_ratio": -0.43290096521377563, + "logits/chosen": -0.44959911704063416, + "logits/rejected": -0.42779120802879333, + "logps/chosen": -0.8838974237442017, + "logps/rejected": -1.8059628009796143, + "loss": 0.9752, + "nll_loss": 0.9318834543228149, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08838973939418793, + "rewards/margins": 0.09220656007528305, + "rewards/rejected": -0.18059630692005157, + "step": 9800 + }, + { + "epoch": 1.77, + "grad_norm": 1.9552569389343262, + "learning_rate": 2.3456107148056483e-06, + "log_odds_chosen": 1.206032156944275, + "log_odds_ratio": -0.4666607975959778, + "logits/chosen": -0.37482333183288574, + "logits/rejected": -0.40247243642807007, + "logps/chosen": -0.8406115770339966, + "logps/rejected": -1.7043174505233765, + "loss": 0.9233, + "nll_loss": 0.8766835927963257, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0840611681342125, + "rewards/margins": 0.08637058734893799, + "rewards/rejected": -0.17043174803256989, + "step": 9810 + }, + { + "epoch": 1.77, + "grad_norm": 1.8231532573699951, + "learning_rate": 2.3397874508662102e-06, + "log_odds_chosen": 0.8592397570610046, + "log_odds_ratio": -0.5452404022216797, + "logits/chosen": -0.43346747756004333, + "logits/rejected": -0.44631171226501465, + "logps/chosen": -0.9697023630142212, + "logps/rejected": -1.4977270364761353, + "loss": 0.9444, + "nll_loss": 0.8898833990097046, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09697023779153824, + "rewards/margins": 0.05280245468020439, + "rewards/rejected": -0.14977270364761353, + "step": 9820 + }, + { + "epoch": 1.78, + "grad_norm": 1.39771568775177, + "learning_rate": 2.3339641869267725e-06, + "log_odds_chosen": 0.6571693420410156, + "log_odds_ratio": -0.5906838774681091, + "logits/chosen": -0.44236254692077637, + "logits/rejected": -0.44203391671180725, + "logps/chosen": -0.9650084376335144, + "logps/rejected": -1.4409892559051514, + "loss": 1.0067, + "nll_loss": 0.9475903511047363, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09650083631277084, + "rewards/margins": 0.047598086297512054, + "rewards/rejected": -0.1440989375114441, + "step": 9830 + }, + { + "epoch": 1.78, + "grad_norm": 2.2172060012817383, + "learning_rate": 2.3281409229873344e-06, + "log_odds_chosen": 0.7106519937515259, + "log_odds_ratio": -0.539935827255249, + "logits/chosen": -0.4445548951625824, + "logits/rejected": -0.43924275040626526, + "logps/chosen": -0.906205952167511, + "logps/rejected": -1.373451590538025, + "loss": 0.9577, + "nll_loss": 0.9036978483200073, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09062059223651886, + "rewards/margins": 0.046724557876586914, + "rewards/rejected": -0.13734516501426697, + "step": 9840 + }, + { + "epoch": 1.78, + "grad_norm": 0.8983945250511169, + "learning_rate": 2.3223176590478963e-06, + "log_odds_chosen": 1.0619983673095703, + "log_odds_ratio": -0.49372729659080505, + "logits/chosen": -0.4473651349544525, + "logits/rejected": -0.4316297173500061, + "logps/chosen": -0.9270407557487488, + "logps/rejected": -1.694265604019165, + "loss": 0.9834, + "nll_loss": 0.9340094327926636, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09270408004522324, + "rewards/margins": 0.07672245800495148, + "rewards/rejected": -0.16942653059959412, + "step": 9850 + }, + { + "epoch": 1.78, + "grad_norm": 1.786767840385437, + "learning_rate": 2.316494395108458e-06, + "log_odds_chosen": 1.2031980752944946, + "log_odds_ratio": -0.4756312370300293, + "logits/chosen": -0.45098644495010376, + "logits/rejected": -0.4427367150783539, + "logps/chosen": -0.863764762878418, + "logps/rejected": -1.763685941696167, + "loss": 0.8749, + "nll_loss": 0.8273009061813354, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.08637648075819016, + "rewards/margins": 0.08999210596084595, + "rewards/rejected": -0.1763685941696167, + "step": 9860 + }, + { + "epoch": 1.78, + "grad_norm": 1.3234548568725586, + "learning_rate": 2.31067113116902e-06, + "log_odds_chosen": 1.1590311527252197, + "log_odds_ratio": -0.5146543383598328, + "logits/chosen": -0.4411509931087494, + "logits/rejected": -0.41618838906288147, + "logps/chosen": -0.9716132283210754, + "logps/rejected": -1.8201326131820679, + "loss": 0.9392, + "nll_loss": 0.8877296447753906, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09716132283210754, + "rewards/margins": 0.084851935505867, + "rewards/rejected": -0.18201325833797455, + "step": 9870 + }, + { + "epoch": 1.78, + "grad_norm": 1.1882553100585938, + "learning_rate": 2.304847867229582e-06, + "log_odds_chosen": 1.0298006534576416, + "log_odds_ratio": -0.5688801407814026, + "logits/chosen": -0.4849920868873596, + "logits/rejected": -0.4678238034248352, + "logps/chosen": -0.8872254490852356, + "logps/rejected": -1.601528525352478, + "loss": 0.9184, + "nll_loss": 0.8614827990531921, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08872254192829132, + "rewards/margins": 0.07143032550811768, + "rewards/rejected": -0.160152867436409, + "step": 9880 + }, + { + "epoch": 1.79, + "grad_norm": 1.4803732633590698, + "learning_rate": 2.2990246032901443e-06, + "log_odds_chosen": 1.0394694805145264, + "log_odds_ratio": -0.5270654559135437, + "logits/chosen": -0.4371699392795563, + "logits/rejected": -0.4427434504032135, + "logps/chosen": -0.9934245944023132, + "logps/rejected": -1.740962028503418, + "loss": 0.9955, + "nll_loss": 0.9427839517593384, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0993424504995346, + "rewards/margins": 0.07475373893976212, + "rewards/rejected": -0.17409618198871613, + "step": 9890 + }, + { + "epoch": 1.79, + "grad_norm": 1.5292103290557861, + "learning_rate": 2.2932013393507057e-06, + "log_odds_chosen": 0.7853134870529175, + "log_odds_ratio": -0.5695432424545288, + "logits/chosen": -0.45036354660987854, + "logits/rejected": -0.4651457369327545, + "logps/chosen": -0.9447164535522461, + "logps/rejected": -1.521506905555725, + "loss": 0.972, + "nll_loss": 0.9150772094726562, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09447164833545685, + "rewards/margins": 0.05767903849482536, + "rewards/rejected": -0.1521506905555725, + "step": 9900 + }, + { + "epoch": 1.79, + "grad_norm": 2.0200865268707275, + "learning_rate": 2.2873780754112676e-06, + "log_odds_chosen": 1.0599063634872437, + "log_odds_ratio": -0.5302172899246216, + "logits/chosen": -0.4713926315307617, + "logits/rejected": -0.4528167247772217, + "logps/chosen": -0.8392230868339539, + "logps/rejected": -1.5280404090881348, + "loss": 0.9771, + "nll_loss": 0.9240929484367371, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08392231166362762, + "rewards/margins": 0.06888174265623093, + "rewards/rejected": -0.15280406177043915, + "step": 9910 + }, + { + "epoch": 1.79, + "grad_norm": 1.2986183166503906, + "learning_rate": 2.28155481147183e-06, + "log_odds_chosen": 0.6897674202919006, + "log_odds_ratio": -0.6012214422225952, + "logits/chosen": -0.48748350143432617, + "logits/rejected": -0.5002898573875427, + "logps/chosen": -1.021876573562622, + "logps/rejected": -1.4822615385055542, + "loss": 1.03, + "nll_loss": 0.9698923826217651, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10218765586614609, + "rewards/margins": 0.04603850096464157, + "rewards/rejected": -0.14822617173194885, + "step": 9920 + }, + { + "epoch": 1.79, + "grad_norm": 0.9028427600860596, + "learning_rate": 2.275731547532392e-06, + "log_odds_chosen": 1.1438870429992676, + "log_odds_ratio": -0.4906987249851227, + "logits/chosen": -0.4426101744174957, + "logits/rejected": -0.44273123145103455, + "logps/chosen": -0.8546684384346008, + "logps/rejected": -1.698992371559143, + "loss": 0.9586, + "nll_loss": 0.9095567464828491, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08546683937311172, + "rewards/margins": 0.08443240821361542, + "rewards/rejected": -0.16989924013614655, + "step": 9930 + }, + { + "epoch": 1.8, + "grad_norm": 2.1396689414978027, + "learning_rate": 2.269908283592954e-06, + "log_odds_chosen": 1.3287298679351807, + "log_odds_ratio": -0.4608958661556244, + "logits/chosen": -0.41538411378860474, + "logits/rejected": -0.4415758550167084, + "logps/chosen": -0.8634954690933228, + "logps/rejected": -1.8478952646255493, + "loss": 0.9885, + "nll_loss": 0.9423999786376953, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08634954690933228, + "rewards/margins": 0.09843997657299042, + "rewards/rejected": -0.1847895085811615, + "step": 9940 + }, + { + "epoch": 1.8, + "grad_norm": 2.275242328643799, + "learning_rate": 2.2640850196535156e-06, + "log_odds_chosen": 0.6594809293746948, + "log_odds_ratio": -0.5583489537239075, + "logits/chosen": -0.48110976815223694, + "logits/rejected": -0.5074446797370911, + "logps/chosen": -0.9442492723464966, + "logps/rejected": -1.3900192975997925, + "loss": 0.9743, + "nll_loss": 0.9184621572494507, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09442492574453354, + "rewards/margins": 0.04457702115178108, + "rewards/rejected": -0.13900193572044373, + "step": 9950 + }, + { + "epoch": 1.8, + "grad_norm": 1.6967520713806152, + "learning_rate": 2.2582617557140775e-06, + "log_odds_chosen": 0.7493517398834229, + "log_odds_ratio": -0.5595013499259949, + "logits/chosen": -0.5545259714126587, + "logits/rejected": -0.5433686375617981, + "logps/chosen": -0.9461520314216614, + "logps/rejected": -1.4703208208084106, + "loss": 1.0238, + "nll_loss": 0.967811107635498, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09461519867181778, + "rewards/margins": 0.05241687223315239, + "rewards/rejected": -0.14703206717967987, + "step": 9960 + }, + { + "epoch": 1.8, + "grad_norm": 1.5574473142623901, + "learning_rate": 2.25243849177464e-06, + "log_odds_chosen": 1.2710860967636108, + "log_odds_ratio": -0.4554689824581146, + "logits/chosen": -0.45570430159568787, + "logits/rejected": -0.4198875427246094, + "logps/chosen": -0.9600180387496948, + "logps/rejected": -1.9265406131744385, + "loss": 0.9535, + "nll_loss": 0.9079564213752747, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09600180387496948, + "rewards/margins": 0.09665225446224213, + "rewards/rejected": -0.1926540583372116, + "step": 9970 + }, + { + "epoch": 1.8, + "grad_norm": 1.789555311203003, + "learning_rate": 2.2466152278352017e-06, + "log_odds_chosen": 1.1111174821853638, + "log_odds_ratio": -0.49643293023109436, + "logits/chosen": -0.4519789218902588, + "logits/rejected": -0.4754433035850525, + "logps/chosen": -0.8925978541374207, + "logps/rejected": -1.6644165515899658, + "loss": 0.9226, + "nll_loss": 0.8729545474052429, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0892597883939743, + "rewards/margins": 0.0771818682551384, + "rewards/rejected": -0.1664416491985321, + "step": 9980 + }, + { + "epoch": 1.8, + "grad_norm": 1.401259183883667, + "learning_rate": 2.240791963895763e-06, + "log_odds_chosen": 1.0764435529708862, + "log_odds_ratio": -0.5070358514785767, + "logits/chosen": -0.454425573348999, + "logits/rejected": -0.4417805075645447, + "logps/chosen": -0.9401386380195618, + "logps/rejected": -1.649101972579956, + "loss": 0.9432, + "nll_loss": 0.8924501538276672, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09401386976242065, + "rewards/margins": 0.07089634984731674, + "rewards/rejected": -0.1649101972579956, + "step": 9990 + }, + { + "epoch": 1.81, + "grad_norm": 0.8955624103546143, + "learning_rate": 2.2349686999563254e-06, + "log_odds_chosen": 0.9595357775688171, + "log_odds_ratio": -0.5476449131965637, + "logits/chosen": -0.5105107426643372, + "logits/rejected": -0.513346254825592, + "logps/chosen": -0.9034037590026855, + "logps/rejected": -1.5487500429153442, + "loss": 1.0059, + "nll_loss": 0.9511575698852539, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09034038335084915, + "rewards/margins": 0.06453461945056915, + "rewards/rejected": -0.1548749953508377, + "step": 10000 + }, + { + "epoch": 1.81, + "grad_norm": 2.2015881538391113, + "learning_rate": 2.2291454360168873e-06, + "log_odds_chosen": 1.0903244018554688, + "log_odds_ratio": -0.4742640554904938, + "logits/chosen": -0.44515347480773926, + "logits/rejected": -0.48136234283447266, + "logps/chosen": -0.9461368322372437, + "logps/rejected": -1.7086235284805298, + "loss": 0.9172, + "nll_loss": 0.8697601556777954, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0946136862039566, + "rewards/margins": 0.0762486606836319, + "rewards/rejected": -0.1708623617887497, + "step": 10010 + }, + { + "epoch": 1.81, + "grad_norm": 0.9485613107681274, + "learning_rate": 2.2233221720774492e-06, + "log_odds_chosen": 0.9951506853103638, + "log_odds_ratio": -0.4970771372318268, + "logits/chosen": -0.3801301419734955, + "logits/rejected": -0.40891996026039124, + "logps/chosen": -0.9870649576187134, + "logps/rejected": -1.6817152500152588, + "loss": 0.9403, + "nll_loss": 0.890583872795105, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09870649874210358, + "rewards/margins": 0.0694650262594223, + "rewards/rejected": -0.16817152500152588, + "step": 10020 + }, + { + "epoch": 1.81, + "grad_norm": 1.397936463356018, + "learning_rate": 2.2174989081380115e-06, + "log_odds_chosen": 1.0712759494781494, + "log_odds_ratio": -0.516617476940155, + "logits/chosen": -0.45938482880592346, + "logits/rejected": -0.47290220856666565, + "logps/chosen": -0.908774197101593, + "logps/rejected": -1.6888080835342407, + "loss": 0.9708, + "nll_loss": 0.9191882014274597, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09087742865085602, + "rewards/margins": 0.0780033990740776, + "rewards/rejected": -0.16888082027435303, + "step": 10030 + }, + { + "epoch": 1.81, + "grad_norm": 1.9884051084518433, + "learning_rate": 2.211675644198573e-06, + "log_odds_chosen": 0.7885714769363403, + "log_odds_ratio": -0.62605881690979, + "logits/chosen": -0.4488893151283264, + "logits/rejected": -0.4411085247993469, + "logps/chosen": -0.9576784372329712, + "logps/rejected": -1.561320185661316, + "loss": 0.9844, + "nll_loss": 0.9217513799667358, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09576784074306488, + "rewards/margins": 0.06036417931318283, + "rewards/rejected": -0.1561320275068283, + "step": 10040 + }, + { + "epoch": 1.82, + "grad_norm": 1.355200171470642, + "learning_rate": 2.2058523802591353e-06, + "log_odds_chosen": 1.0193064212799072, + "log_odds_ratio": -0.5205415487289429, + "logits/chosen": -0.47365063428878784, + "logits/rejected": -0.4856860637664795, + "logps/chosen": -0.882122814655304, + "logps/rejected": -1.5590479373931885, + "loss": 0.9558, + "nll_loss": 0.9037929773330688, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08821228891611099, + "rewards/margins": 0.06769250333309174, + "rewards/rejected": -0.15590479969978333, + "step": 10050 + }, + { + "epoch": 1.82, + "grad_norm": 0.8581579327583313, + "learning_rate": 2.200029116319697e-06, + "log_odds_chosen": 0.9242337942123413, + "log_odds_ratio": -0.5349970459938049, + "logits/chosen": -0.4497644901275635, + "logits/rejected": -0.452747106552124, + "logps/chosen": -0.8844796419143677, + "logps/rejected": -1.5484631061553955, + "loss": 1.0005, + "nll_loss": 0.9469534754753113, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08844795823097229, + "rewards/margins": 0.06639834493398666, + "rewards/rejected": -0.15484629571437836, + "step": 10060 + }, + { + "epoch": 1.82, + "grad_norm": 2.4767024517059326, + "learning_rate": 2.194205852380259e-06, + "log_odds_chosen": 1.2953739166259766, + "log_odds_ratio": -0.42459726333618164, + "logits/chosen": -0.43675118684768677, + "logits/rejected": -0.46500349044799805, + "logps/chosen": -0.8256725072860718, + "logps/rejected": -1.7106800079345703, + "loss": 0.9456, + "nll_loss": 0.9031159281730652, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0825672596693039, + "rewards/margins": 0.08850078284740448, + "rewards/rejected": -0.17106802761554718, + "step": 10070 + }, + { + "epoch": 1.82, + "grad_norm": 2.0872771739959717, + "learning_rate": 2.188382588440821e-06, + "log_odds_chosen": 0.7203958630561829, + "log_odds_ratio": -0.5865238904953003, + "logits/chosen": -0.4555412828922272, + "logits/rejected": -0.43005886673927307, + "logps/chosen": -0.9552356600761414, + "logps/rejected": -1.4711189270019531, + "loss": 0.9704, + "nll_loss": 0.9117962718009949, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09552355855703354, + "rewards/margins": 0.05158834531903267, + "rewards/rejected": -0.1471119225025177, + "step": 10080 + }, + { + "epoch": 1.82, + "grad_norm": 0.9507777690887451, + "learning_rate": 2.182559324501383e-06, + "log_odds_chosen": 1.4054168462753296, + "log_odds_ratio": -0.49908047914505005, + "logits/chosen": -0.4112454950809479, + "logits/rejected": -0.392622709274292, + "logps/chosen": -0.8882936239242554, + "logps/rejected": -1.9450048208236694, + "loss": 0.9121, + "nll_loss": 0.862238883972168, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08882936090230942, + "rewards/margins": 0.10567110776901245, + "rewards/rejected": -0.19450047612190247, + "step": 10090 + }, + { + "epoch": 1.82, + "grad_norm": 1.3266576528549194, + "learning_rate": 2.1767360605619447e-06, + "log_odds_chosen": 0.7194452285766602, + "log_odds_ratio": -0.5657768249511719, + "logits/chosen": -0.4552794396877289, + "logits/rejected": -0.4327179491519928, + "logps/chosen": -0.927895188331604, + "logps/rejected": -1.453904390335083, + "loss": 0.9989, + "nll_loss": 0.9423456192016602, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.09278951585292816, + "rewards/margins": 0.05260094255208969, + "rewards/rejected": -0.14539046585559845, + "step": 10100 + }, + { + "epoch": 1.83, + "grad_norm": 1.1379450559616089, + "learning_rate": 2.170912796622507e-06, + "log_odds_chosen": 0.949578583240509, + "log_odds_ratio": -0.52561354637146, + "logits/chosen": -0.45671501755714417, + "logits/rejected": -0.478085994720459, + "logps/chosen": -0.9839479327201843, + "logps/rejected": -1.619179368019104, + "loss": 0.9291, + "nll_loss": 0.876539409160614, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09839479625225067, + "rewards/margins": 0.06352315098047256, + "rewards/rejected": -0.16191793978214264, + "step": 10110 + }, + { + "epoch": 1.83, + "grad_norm": 0.87856125831604, + "learning_rate": 2.1650895326830685e-06, + "log_odds_chosen": 1.1006487607955933, + "log_odds_ratio": -0.45974189043045044, + "logits/chosen": -0.4218316674232483, + "logits/rejected": -0.46339720487594604, + "logps/chosen": -0.8847794532775879, + "logps/rejected": -1.6512638330459595, + "loss": 0.9331, + "nll_loss": 0.887128472328186, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08847793191671371, + "rewards/margins": 0.07664843648672104, + "rewards/rejected": -0.16512638330459595, + "step": 10120 + }, + { + "epoch": 1.83, + "grad_norm": 1.5756317377090454, + "learning_rate": 2.1592662687436304e-06, + "log_odds_chosen": 1.3205078840255737, + "log_odds_ratio": -0.45531734824180603, + "logits/chosen": -0.40888315439224243, + "logits/rejected": -0.4066869616508484, + "logps/chosen": -0.8146879076957703, + "logps/rejected": -1.7089191675186157, + "loss": 0.8601, + "nll_loss": 0.8145501017570496, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08146879822015762, + "rewards/margins": 0.08942312747240067, + "rewards/rejected": -0.1708919107913971, + "step": 10130 + }, + { + "epoch": 1.83, + "grad_norm": 2.2840380668640137, + "learning_rate": 2.1534430048041927e-06, + "log_odds_chosen": 0.9677292108535767, + "log_odds_ratio": -0.5366533994674683, + "logits/chosen": -0.4365871846675873, + "logits/rejected": -0.45229673385620117, + "logps/chosen": -0.9193583726882935, + "logps/rejected": -1.5962176322937012, + "loss": 0.8965, + "nll_loss": 0.8428059816360474, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09193582832813263, + "rewards/margins": 0.06768593192100525, + "rewards/rejected": -0.15962176024913788, + "step": 10140 + }, + { + "epoch": 1.83, + "grad_norm": 1.5510019063949585, + "learning_rate": 2.1476197408647546e-06, + "log_odds_chosen": 1.1609294414520264, + "log_odds_ratio": -0.5104137063026428, + "logits/chosen": -0.4274633824825287, + "logits/rejected": -0.46502789855003357, + "logps/chosen": -0.8433011770248413, + "logps/rejected": -1.6716792583465576, + "loss": 0.8893, + "nll_loss": 0.8382207155227661, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08433012664318085, + "rewards/margins": 0.08283781260251999, + "rewards/rejected": -0.16716793179512024, + "step": 10150 + }, + { + "epoch": 1.84, + "grad_norm": 1.7966018915176392, + "learning_rate": 2.141796476925317e-06, + "log_odds_chosen": 1.1001217365264893, + "log_odds_ratio": -0.4984889030456543, + "logits/chosen": -0.4312856197357178, + "logits/rejected": -0.45621857047080994, + "logps/chosen": -0.904300332069397, + "logps/rejected": -1.708540678024292, + "loss": 0.9235, + "nll_loss": 0.8736575841903687, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09043003618717194, + "rewards/margins": 0.08042405545711517, + "rewards/rejected": -0.1708541065454483, + "step": 10160 + }, + { + "epoch": 1.84, + "grad_norm": 1.6098682880401611, + "learning_rate": 2.1359732129858784e-06, + "log_odds_chosen": 0.7923783659934998, + "log_odds_ratio": -0.5927404761314392, + "logits/chosen": -0.4654027819633484, + "logits/rejected": -0.44761067628860474, + "logps/chosen": -0.9215261340141296, + "logps/rejected": -1.476153016090393, + "loss": 0.9836, + "nll_loss": 0.9243131875991821, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09215261787176132, + "rewards/margins": 0.05546267703175545, + "rewards/rejected": -0.14761529862880707, + "step": 10170 + }, + { + "epoch": 1.84, + "grad_norm": 1.7316303253173828, + "learning_rate": 2.1301499490464402e-06, + "log_odds_chosen": 0.8692628741264343, + "log_odds_ratio": -0.5368847250938416, + "logits/chosen": -0.5000066757202148, + "logits/rejected": -0.4991425573825836, + "logps/chosen": -1.024705410003662, + "logps/rejected": -1.6390259265899658, + "loss": 1.0576, + "nll_loss": 1.0039422512054443, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.10247053951025009, + "rewards/margins": 0.06143205612897873, + "rewards/rejected": -0.16390261054039001, + "step": 10180 + }, + { + "epoch": 1.84, + "grad_norm": 1.9501219987869263, + "learning_rate": 2.1243266851070026e-06, + "log_odds_chosen": 1.5202281475067139, + "log_odds_ratio": -0.44537049531936646, + "logits/chosen": -0.411649227142334, + "logits/rejected": -0.43437641859054565, + "logps/chosen": -0.9563344717025757, + "logps/rejected": -2.076907157897949, + "loss": 0.9448, + "nll_loss": 0.9002774953842163, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09563344717025757, + "rewards/margins": 0.11205726861953735, + "rewards/rejected": -0.20769071578979492, + "step": 10190 + }, + { + "epoch": 1.84, + "grad_norm": 1.2907829284667969, + "learning_rate": 2.1185034211675644e-06, + "log_odds_chosen": 0.8441012501716614, + "log_odds_ratio": -0.563463568687439, + "logits/chosen": -0.5322362184524536, + "logits/rejected": -0.51605224609375, + "logps/chosen": -1.0157923698425293, + "logps/rejected": -1.6410290002822876, + "loss": 1.0297, + "nll_loss": 0.973351776599884, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10157923400402069, + "rewards/margins": 0.06252367049455643, + "rewards/rejected": -0.16410291194915771, + "step": 10200 + }, + { + "epoch": 1.84, + "grad_norm": 1.910621166229248, + "learning_rate": 2.112680157228126e-06, + "log_odds_chosen": 1.0539970397949219, + "log_odds_ratio": -0.4770180583000183, + "logits/chosen": -0.44424891471862793, + "logits/rejected": -0.450103223323822, + "logps/chosen": -0.9240690469741821, + "logps/rejected": -1.6838849782943726, + "loss": 0.9446, + "nll_loss": 0.8969265818595886, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09240689128637314, + "rewards/margins": 0.07598160207271576, + "rewards/rejected": -0.1683885157108307, + "step": 10210 + }, + { + "epoch": 1.85, + "grad_norm": 1.2610995769500732, + "learning_rate": 2.1068568932886882e-06, + "log_odds_chosen": 1.1630918979644775, + "log_odds_ratio": -0.49245303869247437, + "logits/chosen": -0.4346703588962555, + "logits/rejected": -0.4480690360069275, + "logps/chosen": -0.879808247089386, + "logps/rejected": -1.7137651443481445, + "loss": 0.8559, + "nll_loss": 0.8066719770431519, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08798082172870636, + "rewards/margins": 0.08339568227529526, + "rewards/rejected": -0.17137651145458221, + "step": 10220 + }, + { + "epoch": 1.85, + "grad_norm": 1.153172492980957, + "learning_rate": 2.10103362934925e-06, + "log_odds_chosen": 1.0892242193222046, + "log_odds_ratio": -0.5428228378295898, + "logits/chosen": -0.47800391912460327, + "logits/rejected": -0.46864748001098633, + "logps/chosen": -0.8633974194526672, + "logps/rejected": -1.635337471961975, + "loss": 0.9987, + "nll_loss": 0.9444171786308289, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08633974194526672, + "rewards/margins": 0.07719399780035019, + "rewards/rejected": -0.1635337471961975, + "step": 10230 + }, + { + "epoch": 1.85, + "grad_norm": 1.19821035861969, + "learning_rate": 2.095210365409812e-06, + "log_odds_chosen": 1.3325726985931396, + "log_odds_ratio": -0.4372948706150055, + "logits/chosen": -0.39473778009414673, + "logits/rejected": -0.45915713906288147, + "logps/chosen": -0.8078659772872925, + "logps/rejected": -1.7254482507705688, + "loss": 0.8942, + "nll_loss": 0.8504945635795593, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08078660070896149, + "rewards/margins": 0.09175822883844376, + "rewards/rejected": -0.17254483699798584, + "step": 10240 + }, + { + "epoch": 1.85, + "grad_norm": 0.8175916075706482, + "learning_rate": 2.0893871014703743e-06, + "log_odds_chosen": 0.9957863688468933, + "log_odds_ratio": -0.4890173375606537, + "logits/chosen": -0.4783898890018463, + "logits/rejected": -0.44662246108055115, + "logps/chosen": -0.8859121203422546, + "logps/rejected": -1.575537919998169, + "loss": 0.8932, + "nll_loss": 0.8443046808242798, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08859121799468994, + "rewards/margins": 0.06896258145570755, + "rewards/rejected": -0.1575538069009781, + "step": 10250 + }, + { + "epoch": 1.85, + "grad_norm": 1.2621811628341675, + "learning_rate": 2.0835638375309358e-06, + "log_odds_chosen": 0.5714842081069946, + "log_odds_ratio": -0.6428765654563904, + "logits/chosen": -0.4756339490413666, + "logits/rejected": -0.4753071367740631, + "logps/chosen": -0.949560821056366, + "logps/rejected": -1.346477746963501, + "loss": 0.9943, + "nll_loss": 0.9299713373184204, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09495609253644943, + "rewards/margins": 0.03969167545437813, + "rewards/rejected": -0.13464777171611786, + "step": 10260 + }, + { + "epoch": 1.86, + "grad_norm": 1.2808736562728882, + "learning_rate": 2.077740573591498e-06, + "log_odds_chosen": 1.1980714797973633, + "log_odds_ratio": -0.4109151363372803, + "logits/chosen": -0.4529429078102112, + "logits/rejected": -0.46951961517333984, + "logps/chosen": -0.8301106691360474, + "logps/rejected": -1.5841398239135742, + "loss": 0.923, + "nll_loss": 0.8818821907043457, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.08301106840372086, + "rewards/margins": 0.07540292292833328, + "rewards/rejected": -0.15841399133205414, + "step": 10270 + }, + { + "epoch": 1.86, + "grad_norm": 1.2657065391540527, + "learning_rate": 2.07191730965206e-06, + "log_odds_chosen": 0.738211989402771, + "log_odds_ratio": -0.5358443856239319, + "logits/chosen": -0.5280221700668335, + "logits/rejected": -0.5341017246246338, + "logps/chosen": -1.0342191457748413, + "logps/rejected": -1.5212204456329346, + "loss": 1.0247, + "nll_loss": 0.9711573719978333, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10342191159725189, + "rewards/margins": 0.04870011284947395, + "rewards/rejected": -0.15212205052375793, + "step": 10280 + }, + { + "epoch": 1.86, + "grad_norm": 1.4096204042434692, + "learning_rate": 2.066094045712622e-06, + "log_odds_chosen": 0.7922347784042358, + "log_odds_ratio": -0.6058631539344788, + "logits/chosen": -0.46681904792785645, + "logits/rejected": -0.46833691000938416, + "logps/chosen": -0.8852903246879578, + "logps/rejected": -1.4500166177749634, + "loss": 0.922, + "nll_loss": 0.8614572286605835, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08852903544902802, + "rewards/margins": 0.05647264048457146, + "rewards/rejected": -0.14500167965888977, + "step": 10290 + }, + { + "epoch": 1.86, + "grad_norm": 1.230337142944336, + "learning_rate": 2.0602707817731837e-06, + "log_odds_chosen": 0.8310950398445129, + "log_odds_ratio": -0.537976861000061, + "logits/chosen": -0.43221116065979004, + "logits/rejected": -0.4261694550514221, + "logps/chosen": -0.9257045984268188, + "logps/rejected": -1.5209006071090698, + "loss": 1.0114, + "nll_loss": 0.9576155543327332, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.092570461332798, + "rewards/margins": 0.05951959639787674, + "rewards/rejected": -0.15209007263183594, + "step": 10300 + }, + { + "epoch": 1.86, + "grad_norm": 1.3708995580673218, + "learning_rate": 2.0544475178337456e-06, + "log_odds_chosen": 0.9308149218559265, + "log_odds_ratio": -0.5380842685699463, + "logits/chosen": -0.41678470373153687, + "logits/rejected": -0.46170076727867126, + "logps/chosen": -0.840059757232666, + "logps/rejected": -1.5198965072631836, + "loss": 0.9102, + "nll_loss": 0.856410026550293, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08400598913431168, + "rewards/margins": 0.06798367202281952, + "rewards/rejected": -0.1519896537065506, + "step": 10310 + }, + { + "epoch": 1.86, + "grad_norm": 1.3757504224777222, + "learning_rate": 2.0486242538943075e-06, + "log_odds_chosen": 1.27957284450531, + "log_odds_ratio": -0.444243848323822, + "logits/chosen": -0.374275267124176, + "logits/rejected": -0.4208962917327881, + "logps/chosen": -0.8259752988815308, + "logps/rejected": -1.6976467370986938, + "loss": 0.8901, + "nll_loss": 0.8456643223762512, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.0825975313782692, + "rewards/margins": 0.08716712892055511, + "rewards/rejected": -0.1697646528482437, + "step": 10320 + }, + { + "epoch": 1.87, + "grad_norm": 0.9839291572570801, + "learning_rate": 2.04280098995487e-06, + "log_odds_chosen": 1.0052300691604614, + "log_odds_ratio": -0.49391070008277893, + "logits/chosen": -0.4329233169555664, + "logits/rejected": -0.4708273410797119, + "logps/chosen": -0.9669458270072937, + "logps/rejected": -1.6411478519439697, + "loss": 0.9361, + "nll_loss": 0.8866797685623169, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09669457376003265, + "rewards/margins": 0.06742019951343536, + "rewards/rejected": -0.1641147881746292, + "step": 10330 + }, + { + "epoch": 1.87, + "grad_norm": 1.147086262702942, + "learning_rate": 2.0369777260154317e-06, + "log_odds_chosen": 0.954150378704071, + "log_odds_ratio": -0.5189642310142517, + "logits/chosen": -0.4370267391204834, + "logits/rejected": -0.4634695053100586, + "logps/chosen": -0.8270130157470703, + "logps/rejected": -1.4731693267822266, + "loss": 0.935, + "nll_loss": 0.8830587267875671, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08270130306482315, + "rewards/margins": 0.0646156296133995, + "rewards/rejected": -0.14731693267822266, + "step": 10340 + }, + { + "epoch": 1.87, + "grad_norm": 1.124394178390503, + "learning_rate": 2.0311544620759936e-06, + "log_odds_chosen": 1.1465202569961548, + "log_odds_ratio": -0.5014799237251282, + "logits/chosen": -0.43889516592025757, + "logits/rejected": -0.45893678069114685, + "logps/chosen": -0.9335900545120239, + "logps/rejected": -1.7377182245254517, + "loss": 0.9606, + "nll_loss": 0.910497784614563, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09335900843143463, + "rewards/margins": 0.08041281998157501, + "rewards/rejected": -0.17377182841300964, + "step": 10350 + }, + { + "epoch": 1.87, + "grad_norm": 1.9966627359390259, + "learning_rate": 2.0253311981365555e-06, + "log_odds_chosen": 1.1126954555511475, + "log_odds_ratio": -0.5292860269546509, + "logits/chosen": -0.46195143461227417, + "logits/rejected": -0.48550620675086975, + "logps/chosen": -0.8964789509773254, + "logps/rejected": -1.733088731765747, + "loss": 0.963, + "nll_loss": 0.9100550413131714, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08964791148900986, + "rewards/margins": 0.08366095274686813, + "rewards/rejected": -0.17330887913703918, + "step": 10360 + }, + { + "epoch": 1.87, + "grad_norm": 2.0089480876922607, + "learning_rate": 2.0195079341971174e-06, + "log_odds_chosen": 1.166711449623108, + "log_odds_ratio": -0.5026243925094604, + "logits/chosen": -0.44632425904273987, + "logits/rejected": -0.43838948011398315, + "logps/chosen": -0.8504183888435364, + "logps/rejected": -1.643214225769043, + "loss": 0.95, + "nll_loss": 0.899712860584259, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0850418359041214, + "rewards/margins": 0.0792795792222023, + "rewards/rejected": -0.1643213927745819, + "step": 10370 + }, + { + "epoch": 1.88, + "grad_norm": 1.4208773374557495, + "learning_rate": 2.0136846702576797e-06, + "log_odds_chosen": 1.0821831226348877, + "log_odds_ratio": -0.5470158457756042, + "logits/chosen": -0.4514341950416565, + "logits/rejected": -0.439272403717041, + "logps/chosen": -0.9203437566757202, + "logps/rejected": -1.6602756977081299, + "loss": 0.8832, + "nll_loss": 0.8285048604011536, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09203437715768814, + "rewards/margins": 0.07399321347475052, + "rewards/rejected": -0.16602759063243866, + "step": 10380 + }, + { + "epoch": 1.88, + "grad_norm": 1.332013726234436, + "learning_rate": 2.007861406318241e-06, + "log_odds_chosen": 1.0032024383544922, + "log_odds_ratio": -0.49925652146339417, + "logits/chosen": -0.41910386085510254, + "logits/rejected": -0.44506892561912537, + "logps/chosen": -0.7921913862228394, + "logps/rejected": -1.494015097618103, + "loss": 0.9336, + "nll_loss": 0.8837070465087891, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07921913266181946, + "rewards/margins": 0.07018236815929413, + "rewards/rejected": -0.14940151572227478, + "step": 10390 + }, + { + "epoch": 1.88, + "grad_norm": 1.406699299812317, + "learning_rate": 2.002038142378803e-06, + "log_odds_chosen": 1.0385334491729736, + "log_odds_ratio": -0.5291948318481445, + "logits/chosen": -0.4945642948150635, + "logits/rejected": -0.502414345741272, + "logps/chosen": -0.8367093801498413, + "logps/rejected": -1.5903352499008179, + "loss": 1.0034, + "nll_loss": 0.9504679441452026, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08367092907428741, + "rewards/margins": 0.07536258548498154, + "rewards/rejected": -0.15903352200984955, + "step": 10400 + }, + { + "epoch": 1.88, + "grad_norm": 1.2992877960205078, + "learning_rate": 1.996214878439365e-06, + "log_odds_chosen": 1.3700332641601562, + "log_odds_ratio": -0.4885827898979187, + "logits/chosen": -0.4483235478401184, + "logits/rejected": -0.45449957251548767, + "logps/chosen": -0.8136239051818848, + "logps/rejected": -1.7881104946136475, + "loss": 0.9034, + "nll_loss": 0.8545898199081421, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08136239647865295, + "rewards/margins": 0.09744866192340851, + "rewards/rejected": -0.17881104350090027, + "step": 10410 + }, + { + "epoch": 1.88, + "grad_norm": 1.1529314517974854, + "learning_rate": 1.9903916144999272e-06, + "log_odds_chosen": 1.0313104391098022, + "log_odds_ratio": -0.5160423517227173, + "logits/chosen": -0.4965154528617859, + "logits/rejected": -0.4709865152835846, + "logps/chosen": -0.9274126291275024, + "logps/rejected": -1.6849533319473267, + "loss": 0.962, + "nll_loss": 0.910438060760498, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09274126589298248, + "rewards/margins": 0.07575404644012451, + "rewards/rejected": -0.168495312333107, + "step": 10420 + }, + { + "epoch": 1.88, + "grad_norm": 2.287060499191284, + "learning_rate": 1.984568350560489e-06, + "log_odds_chosen": 1.3181803226470947, + "log_odds_ratio": -0.4897107481956482, + "logits/chosen": -0.4968651235103607, + "logits/rejected": -0.5039435625076294, + "logps/chosen": -0.8362882733345032, + "logps/rejected": -1.8195197582244873, + "loss": 0.9172, + "nll_loss": 0.8682142496109009, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0836288332939148, + "rewards/margins": 0.09832315146923065, + "rewards/rejected": -0.18195198476314545, + "step": 10430 + }, + { + "epoch": 1.89, + "grad_norm": 2.968153953552246, + "learning_rate": 1.978745086621051e-06, + "log_odds_chosen": 1.207155466079712, + "log_odds_ratio": -0.4584660530090332, + "logits/chosen": -0.449285089969635, + "logits/rejected": -0.45987534523010254, + "logps/chosen": -0.8811396360397339, + "logps/rejected": -1.7007176876068115, + "loss": 0.9449, + "nll_loss": 0.899084746837616, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08811396360397339, + "rewards/margins": 0.08195780217647552, + "rewards/rejected": -0.1700717806816101, + "step": 10440 + }, + { + "epoch": 1.89, + "grad_norm": 1.2522773742675781, + "learning_rate": 1.972921822681613e-06, + "log_odds_chosen": 1.0319015979766846, + "log_odds_ratio": -0.5145862102508545, + "logits/chosen": -0.4706307053565979, + "logits/rejected": -0.45233672857284546, + "logps/chosen": -0.8894746899604797, + "logps/rejected": -1.6508815288543701, + "loss": 0.935, + "nll_loss": 0.8835735321044922, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08894746750593185, + "rewards/margins": 0.07614068686962128, + "rewards/rejected": -0.16508816182613373, + "step": 10450 + }, + { + "epoch": 1.89, + "grad_norm": 0.9435124397277832, + "learning_rate": 1.9670985587421748e-06, + "log_odds_chosen": 1.142953634262085, + "log_odds_ratio": -0.4884144365787506, + "logits/chosen": -0.4347442090511322, + "logits/rejected": -0.4512789249420166, + "logps/chosen": -0.8941251039505005, + "logps/rejected": -1.6644847393035889, + "loss": 0.9366, + "nll_loss": 0.887799859046936, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08941250294446945, + "rewards/margins": 0.07703599333763123, + "rewards/rejected": -0.16644850373268127, + "step": 10460 + }, + { + "epoch": 1.89, + "grad_norm": 1.5761239528656006, + "learning_rate": 1.961275294802737e-06, + "log_odds_chosen": 1.017764687538147, + "log_odds_ratio": -0.5214357376098633, + "logits/chosen": -0.4196850657463074, + "logits/rejected": -0.41368383169174194, + "logps/chosen": -1.0039745569229126, + "logps/rejected": -1.7455123662948608, + "loss": 0.9837, + "nll_loss": 0.931601881980896, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.10039746761322021, + "rewards/margins": 0.07415377348661423, + "rewards/rejected": -0.17455123364925385, + "step": 10470 + }, + { + "epoch": 1.89, + "grad_norm": 1.391938328742981, + "learning_rate": 1.9554520308632985e-06, + "log_odds_chosen": 0.8118370175361633, + "log_odds_ratio": -0.5162337422370911, + "logits/chosen": -0.4641450345516205, + "logits/rejected": -0.46027374267578125, + "logps/chosen": -0.9821346402168274, + "logps/rejected": -1.5798102617263794, + "loss": 0.9943, + "nll_loss": 0.9427239298820496, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09821345657110214, + "rewards/margins": 0.059767574071884155, + "rewards/rejected": -0.1579810380935669, + "step": 10480 + }, + { + "epoch": 1.89, + "grad_norm": 1.5339981317520142, + "learning_rate": 1.949628766923861e-06, + "log_odds_chosen": 1.3268300294876099, + "log_odds_ratio": -0.43518322706222534, + "logits/chosen": -0.40181851387023926, + "logits/rejected": -0.4130808711051941, + "logps/chosen": -0.8143070340156555, + "logps/rejected": -1.6981405019760132, + "loss": 0.8613, + "nll_loss": 0.8178032040596008, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08143070340156555, + "rewards/margins": 0.08838334679603577, + "rewards/rejected": -0.1698140650987625, + "step": 10490 + }, + { + "epoch": 1.9, + "grad_norm": 1.1120244264602661, + "learning_rate": 1.9438055029844227e-06, + "log_odds_chosen": 0.5836787223815918, + "log_odds_ratio": -0.6393161416053772, + "logits/chosen": -0.5042875409126282, + "logits/rejected": -0.4676700532436371, + "logps/chosen": -1.0195683240890503, + "logps/rejected": -1.4169611930847168, + "loss": 0.9633, + "nll_loss": 0.8993996381759644, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.10195682942867279, + "rewards/margins": 0.03973928466439247, + "rewards/rejected": -0.14169611036777496, + "step": 10500 + }, + { + "epoch": 1.9, + "grad_norm": 3.777588367462158, + "learning_rate": 1.9379822390449846e-06, + "log_odds_chosen": 0.7282012701034546, + "log_odds_ratio": -0.5987340807914734, + "logits/chosen": -0.4834275245666504, + "logits/rejected": -0.49766093492507935, + "logps/chosen": -0.9485651254653931, + "logps/rejected": -1.4880679845809937, + "loss": 1.0014, + "nll_loss": 0.9415055513381958, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.09485651552677155, + "rewards/margins": 0.053950272500514984, + "rewards/rejected": -0.14880679547786713, + "step": 10510 + }, + { + "epoch": 1.9, + "grad_norm": 1.464909315109253, + "learning_rate": 1.9321589751055465e-06, + "log_odds_chosen": 0.9100486636161804, + "log_odds_ratio": -0.5332632064819336, + "logits/chosen": -0.4278945326805115, + "logits/rejected": -0.4454631209373474, + "logps/chosen": -0.8939846754074097, + "logps/rejected": -1.5239169597625732, + "loss": 0.9558, + "nll_loss": 0.9025092124938965, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08939848095178604, + "rewards/margins": 0.06299323588609695, + "rewards/rejected": -0.1523916870355606, + "step": 10520 + }, + { + "epoch": 1.9, + "grad_norm": 1.9130334854125977, + "learning_rate": 1.9263357111661084e-06, + "log_odds_chosen": 0.9289069175720215, + "log_odds_ratio": -0.48663145303726196, + "logits/chosen": -0.46611160039901733, + "logits/rejected": -0.470900297164917, + "logps/chosen": -0.9118865728378296, + "logps/rejected": -1.5583826303482056, + "loss": 0.9263, + "nll_loss": 0.8776055574417114, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09118865430355072, + "rewards/margins": 0.06464960426092148, + "rewards/rejected": -0.155838280916214, + "step": 10530 + }, + { + "epoch": 1.9, + "grad_norm": 2.1003193855285645, + "learning_rate": 1.9205124472266707e-06, + "log_odds_chosen": 0.6223214864730835, + "log_odds_ratio": -0.5882120132446289, + "logits/chosen": -0.5001975297927856, + "logits/rejected": -0.4912651479244232, + "logps/chosen": -0.9899564981460571, + "logps/rejected": -1.4137847423553467, + "loss": 0.9585, + "nll_loss": 0.8996666073799133, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09899566322565079, + "rewards/margins": 0.042382821440696716, + "rewards/rejected": -0.1413784921169281, + "step": 10540 + }, + { + "epoch": 1.91, + "grad_norm": 0.984575092792511, + "learning_rate": 1.9146891832872326e-06, + "log_odds_chosen": 0.868719220161438, + "log_odds_ratio": -0.570231556892395, + "logits/chosen": -0.46261295676231384, + "logits/rejected": -0.4425339102745056, + "logps/chosen": -1.009018898010254, + "logps/rejected": -1.5990309715270996, + "loss": 0.9591, + "nll_loss": 0.9021209478378296, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10090188682079315, + "rewards/margins": 0.05900119990110397, + "rewards/rejected": -0.15990306437015533, + "step": 10550 + }, + { + "epoch": 1.91, + "grad_norm": 1.5289955139160156, + "learning_rate": 1.9088659193477945e-06, + "log_odds_chosen": 0.7999789714813232, + "log_odds_ratio": -0.5057905316352844, + "logits/chosen": -0.49575895071029663, + "logits/rejected": -0.5013529062271118, + "logps/chosen": -0.8297192454338074, + "logps/rejected": -1.3387572765350342, + "loss": 0.9545, + "nll_loss": 0.9039432406425476, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08297193050384521, + "rewards/margins": 0.050903789699077606, + "rewards/rejected": -0.13387572765350342, + "step": 10560 + }, + { + "epoch": 1.91, + "grad_norm": 2.148205041885376, + "learning_rate": 1.9030426554083564e-06, + "log_odds_chosen": 1.1249816417694092, + "log_odds_ratio": -0.4598170816898346, + "logits/chosen": -0.4110940098762512, + "logits/rejected": -0.4696727693080902, + "logps/chosen": -0.8722484707832336, + "logps/rejected": -1.6248779296875, + "loss": 0.9154, + "nll_loss": 0.869467556476593, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08722484111785889, + "rewards/margins": 0.07526294887065887, + "rewards/rejected": -0.16248780488967896, + "step": 10570 + }, + { + "epoch": 1.91, + "grad_norm": 2.001585006713867, + "learning_rate": 1.8972193914689182e-06, + "log_odds_chosen": 0.7757295370101929, + "log_odds_ratio": -0.5288358926773071, + "logits/chosen": -0.43838778138160706, + "logits/rejected": -0.4571276605129242, + "logps/chosen": -0.8766340017318726, + "logps/rejected": -1.429776668548584, + "loss": 0.8802, + "nll_loss": 0.8273234367370605, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.08766339719295502, + "rewards/margins": 0.05531426519155502, + "rewards/rejected": -0.14297766983509064, + "step": 10580 + }, + { + "epoch": 1.91, + "grad_norm": 1.1974306106567383, + "learning_rate": 1.8913961275294801e-06, + "log_odds_chosen": 1.1978665590286255, + "log_odds_ratio": -0.46727222204208374, + "logits/chosen": -0.4318181574344635, + "logits/rejected": -0.43885666131973267, + "logps/chosen": -0.9730944633483887, + "logps/rejected": -1.8150783777236938, + "loss": 0.9856, + "nll_loss": 0.9389021992683411, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09730944782495499, + "rewards/margins": 0.08419839292764664, + "rewards/rejected": -0.18150784075260162, + "step": 10590 + }, + { + "epoch": 1.91, + "grad_norm": 1.1426621675491333, + "learning_rate": 1.885572863590042e-06, + "log_odds_chosen": 0.7413898706436157, + "log_odds_ratio": -0.5726367235183716, + "logits/chosen": -0.4949052929878235, + "logits/rejected": -0.4821873605251312, + "logps/chosen": -0.9837077260017395, + "logps/rejected": -1.532716989517212, + "loss": 1.0599, + "nll_loss": 1.0026252269744873, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09837077558040619, + "rewards/margins": 0.05490092560648918, + "rewards/rejected": -0.15327170491218567, + "step": 10600 + }, + { + "epoch": 1.92, + "grad_norm": 0.9339642524719238, + "learning_rate": 1.8797495996506041e-06, + "log_odds_chosen": 1.1743080615997314, + "log_odds_ratio": -0.4658436179161072, + "logits/chosen": -0.4181239604949951, + "logits/rejected": -0.3980239927768707, + "logps/chosen": -0.8593562841415405, + "logps/rejected": -1.6901576519012451, + "loss": 0.8921, + "nll_loss": 0.8454761505126953, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08593563735485077, + "rewards/margins": 0.08308015763759613, + "rewards/rejected": -0.1690157949924469, + "step": 10610 + }, + { + "epoch": 1.92, + "grad_norm": 1.617745041847229, + "learning_rate": 1.873926335711166e-06, + "log_odds_chosen": 1.598487377166748, + "log_odds_ratio": -0.43908342719078064, + "logits/chosen": -0.3870371878147125, + "logits/rejected": -0.44044798612594604, + "logps/chosen": -0.788866400718689, + "logps/rejected": -1.896519660949707, + "loss": 0.9262, + "nll_loss": 0.8823148608207703, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.07888665050268173, + "rewards/margins": 0.11076532304286957, + "rewards/rejected": -0.1896519660949707, + "step": 10620 + }, + { + "epoch": 1.92, + "grad_norm": 2.108999729156494, + "learning_rate": 1.8681030717717279e-06, + "log_odds_chosen": 0.9158283472061157, + "log_odds_ratio": -0.5347623229026794, + "logits/chosen": -0.47179970145225525, + "logits/rejected": -0.4699479937553406, + "logps/chosen": -0.9932149052619934, + "logps/rejected": -1.637498140335083, + "loss": 0.968, + "nll_loss": 0.91449373960495, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09932147711515427, + "rewards/margins": 0.06442831456661224, + "rewards/rejected": -0.1637497991323471, + "step": 10630 + }, + { + "epoch": 1.92, + "grad_norm": 1.5383033752441406, + "learning_rate": 1.86227980783229e-06, + "log_odds_chosen": 0.801388144493103, + "log_odds_ratio": -0.5564557909965515, + "logits/chosen": -0.4179549217224121, + "logits/rejected": -0.4258841574192047, + "logps/chosen": -0.8891839981079102, + "logps/rejected": -1.4303535223007202, + "loss": 0.9048, + "nll_loss": 0.8491758108139038, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08891840279102325, + "rewards/margins": 0.05411697179079056, + "rewards/rejected": -0.1430353820323944, + "step": 10640 + }, + { + "epoch": 1.92, + "grad_norm": 1.2563393115997314, + "learning_rate": 1.8564565438928519e-06, + "log_odds_chosen": 1.0633662939071655, + "log_odds_ratio": -0.4974342882633209, + "logits/chosen": -0.4390109181404114, + "logits/rejected": -0.45791512727737427, + "logps/chosen": -0.9185087084770203, + "logps/rejected": -1.6706024408340454, + "loss": 0.9623, + "nll_loss": 0.9125840067863464, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.0918508842587471, + "rewards/margins": 0.0752093642950058, + "rewards/rejected": -0.1670602411031723, + "step": 10650 + }, + { + "epoch": 1.93, + "grad_norm": 1.8719691038131714, + "learning_rate": 1.850633279953414e-06, + "log_odds_chosen": 1.075117826461792, + "log_odds_ratio": -0.5143997669219971, + "logits/chosen": -0.42022705078125, + "logits/rejected": -0.43766602873802185, + "logps/chosen": -0.8672161102294922, + "logps/rejected": -1.615086317062378, + "loss": 0.9092, + "nll_loss": 0.8577867746353149, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.08672161400318146, + "rewards/margins": 0.07478703558444977, + "rewards/rejected": -0.16150864958763123, + "step": 10660 + }, + { + "epoch": 1.93, + "grad_norm": 1.6300572156906128, + "learning_rate": 1.8448100160139756e-06, + "log_odds_chosen": 0.7079966068267822, + "log_odds_ratio": -0.5975538492202759, + "logits/chosen": -0.4701474606990814, + "logits/rejected": -0.442574679851532, + "logps/chosen": -0.8829503059387207, + "logps/rejected": -1.3568874597549438, + "loss": 0.9297, + "nll_loss": 0.8699787855148315, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08829504251480103, + "rewards/margins": 0.04739370197057724, + "rewards/rejected": -0.13568873703479767, + "step": 10670 + }, + { + "epoch": 1.93, + "grad_norm": 1.6964452266693115, + "learning_rate": 1.8389867520745377e-06, + "log_odds_chosen": 0.7021001577377319, + "log_odds_ratio": -0.5906314849853516, + "logits/chosen": -0.4793156683444977, + "logits/rejected": -0.458412230014801, + "logps/chosen": -1.0250890254974365, + "logps/rejected": -1.5190303325653076, + "loss": 1.0251, + "nll_loss": 0.9660388231277466, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10250891745090485, + "rewards/margins": 0.04939410835504532, + "rewards/rejected": -0.15190303325653076, + "step": 10680 + }, + { + "epoch": 1.93, + "grad_norm": 1.7809092998504639, + "learning_rate": 1.8331634881350996e-06, + "log_odds_chosen": 0.7496052980422974, + "log_odds_ratio": -0.5474902987480164, + "logits/chosen": -0.46370047330856323, + "logits/rejected": -0.47559723258018494, + "logps/chosen": -0.945774257183075, + "logps/rejected": -1.4582557678222656, + "loss": 0.9561, + "nll_loss": 0.9013026356697083, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09457743167877197, + "rewards/margins": 0.05124815180897713, + "rewards/rejected": -0.1458255797624588, + "step": 10690 + }, + { + "epoch": 1.93, + "grad_norm": 0.7367926836013794, + "learning_rate": 1.8273402241956617e-06, + "log_odds_chosen": 0.6301822066307068, + "log_odds_ratio": -0.6171268224716187, + "logits/chosen": -0.4975239634513855, + "logits/rejected": -0.47095784544944763, + "logps/chosen": -0.9024847149848938, + "logps/rejected": -1.346397876739502, + "loss": 0.9553, + "nll_loss": 0.8935791254043579, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0902484729886055, + "rewards/margins": 0.04439132288098335, + "rewards/rejected": -0.13463978469371796, + "step": 10700 + }, + { + "epoch": 1.93, + "grad_norm": 1.028935432434082, + "learning_rate": 1.8215169602562234e-06, + "log_odds_chosen": 0.8751462697982788, + "log_odds_ratio": -0.5305973887443542, + "logits/chosen": -0.46825432777404785, + "logits/rejected": -0.48088812828063965, + "logps/chosen": -0.8856697082519531, + "logps/rejected": -1.4735255241394043, + "loss": 0.9443, + "nll_loss": 0.8912407755851746, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08856697380542755, + "rewards/margins": 0.058785580098629, + "rewards/rejected": -0.14735254645347595, + "step": 10710 + }, + { + "epoch": 1.94, + "grad_norm": 0.9565833210945129, + "learning_rate": 1.8156936963167855e-06, + "log_odds_chosen": 0.7781075239181519, + "log_odds_ratio": -0.611039400100708, + "logits/chosen": -0.436478853225708, + "logits/rejected": -0.42543378472328186, + "logps/chosen": -1.1170709133148193, + "logps/rejected": -1.660090684890747, + "loss": 1.0555, + "nll_loss": 0.9943556785583496, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.11170710623264313, + "rewards/margins": 0.05430195480585098, + "rewards/rejected": -0.1660090684890747, + "step": 10720 + }, + { + "epoch": 1.94, + "grad_norm": 1.5010349750518799, + "learning_rate": 1.8098704323773476e-06, + "log_odds_chosen": 1.050125002861023, + "log_odds_ratio": -0.5018699169158936, + "logits/chosen": -0.4801279902458191, + "logits/rejected": -0.4455450177192688, + "logps/chosen": -0.8765707015991211, + "logps/rejected": -1.631679892539978, + "loss": 0.942, + "nll_loss": 0.8918358087539673, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.08765707910060883, + "rewards/margins": 0.07551092654466629, + "rewards/rejected": -0.16316799819469452, + "step": 10730 + }, + { + "epoch": 1.94, + "grad_norm": 2.181539535522461, + "learning_rate": 1.8040471684379093e-06, + "log_odds_chosen": 0.9742463827133179, + "log_odds_ratio": -0.5732168555259705, + "logits/chosen": -0.45570212602615356, + "logits/rejected": -0.4601594805717468, + "logps/chosen": -0.9455745816230774, + "logps/rejected": -1.6646206378936768, + "loss": 0.9501, + "nll_loss": 0.8927611112594604, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09455744922161102, + "rewards/margins": 0.07190461456775665, + "rewards/rejected": -0.16646204888820648, + "step": 10740 + }, + { + "epoch": 1.94, + "grad_norm": 2.1117494106292725, + "learning_rate": 1.7982239044984714e-06, + "log_odds_chosen": 0.8903090357780457, + "log_odds_ratio": -0.5152918696403503, + "logits/chosen": -0.43422913551330566, + "logits/rejected": -0.46289482712745667, + "logps/chosen": -0.8911846876144409, + "logps/rejected": -1.4724773168563843, + "loss": 0.9887, + "nll_loss": 0.9372140169143677, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.08911846578121185, + "rewards/margins": 0.05812928080558777, + "rewards/rejected": -0.14724776148796082, + "step": 10750 + }, + { + "epoch": 1.94, + "grad_norm": 1.099900245666504, + "learning_rate": 1.7924006405590332e-06, + "log_odds_chosen": 1.094017505645752, + "log_odds_ratio": -0.5704022645950317, + "logits/chosen": -0.434635728597641, + "logits/rejected": -0.43005552887916565, + "logps/chosen": -0.8817561268806458, + "logps/rejected": -1.7000316381454468, + "loss": 0.9863, + "nll_loss": 0.9292108416557312, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08817560225725174, + "rewards/margins": 0.0818275660276413, + "rewards/rejected": -0.17000316083431244, + "step": 10760 + }, + { + "epoch": 1.95, + "grad_norm": 1.4841543436050415, + "learning_rate": 1.7865773766195953e-06, + "log_odds_chosen": 0.9462421536445618, + "log_odds_ratio": -0.5376307368278503, + "logits/chosen": -0.45013323426246643, + "logits/rejected": -0.4466262757778168, + "logps/chosen": -0.9163403511047363, + "logps/rejected": -1.6084949970245361, + "loss": 0.9662, + "nll_loss": 0.9124782681465149, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09163403511047363, + "rewards/margins": 0.06921547651290894, + "rewards/rejected": -0.16084951162338257, + "step": 10770 + }, + { + "epoch": 1.95, + "grad_norm": 0.9597242474555969, + "learning_rate": 1.780754112680157e-06, + "log_odds_chosen": 0.9573777914047241, + "log_odds_ratio": -0.5530497431755066, + "logits/chosen": -0.4183259606361389, + "logits/rejected": -0.4384457468986511, + "logps/chosen": -0.9744545817375183, + "logps/rejected": -1.6454483270645142, + "loss": 1.0157, + "nll_loss": 0.9604204297065735, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.09744546562433243, + "rewards/margins": 0.06709937006235123, + "rewards/rejected": -0.16454483568668365, + "step": 10780 + }, + { + "epoch": 1.95, + "grad_norm": 1.0203475952148438, + "learning_rate": 1.7749308487407191e-06, + "log_odds_chosen": 0.9382126927375793, + "log_odds_ratio": -0.5153268575668335, + "logits/chosen": -0.45768433809280396, + "logits/rejected": -0.4407344460487366, + "logps/chosen": -0.9273079633712769, + "logps/rejected": -1.5609608888626099, + "loss": 0.9625, + "nll_loss": 0.9110045433044434, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09273079037666321, + "rewards/margins": 0.06336529552936554, + "rewards/rejected": -0.15609610080718994, + "step": 10790 + }, + { + "epoch": 1.95, + "grad_norm": 1.6416630744934082, + "learning_rate": 1.769107584801281e-06, + "log_odds_chosen": 1.2006902694702148, + "log_odds_ratio": -0.4718552529811859, + "logits/chosen": -0.4566555619239807, + "logits/rejected": -0.4525316655635834, + "logps/chosen": -0.9319143295288086, + "logps/rejected": -1.8008266687393188, + "loss": 0.8939, + "nll_loss": 0.8467473983764648, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09319143742322922, + "rewards/margins": 0.08689123392105103, + "rewards/rejected": -0.18008264899253845, + "step": 10800 + }, + { + "epoch": 1.95, + "grad_norm": 2.494734764099121, + "learning_rate": 1.763284320861843e-06, + "log_odds_chosen": 0.9643856883049011, + "log_odds_ratio": -0.5331692695617676, + "logits/chosen": -0.47048425674438477, + "logits/rejected": -0.46407657861709595, + "logps/chosen": -0.9381911158561707, + "logps/rejected": -1.6207780838012695, + "loss": 0.9624, + "nll_loss": 0.9090880155563354, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09381911903619766, + "rewards/margins": 0.06825868785381317, + "rewards/rejected": -0.16207781434059143, + "step": 10810 + }, + { + "epoch": 1.95, + "grad_norm": 1.1414620876312256, + "learning_rate": 1.7574610569224048e-06, + "log_odds_chosen": 1.0904492139816284, + "log_odds_ratio": -0.5350916385650635, + "logits/chosen": -0.4038774371147156, + "logits/rejected": -0.45020437240600586, + "logps/chosen": -0.9018272161483765, + "logps/rejected": -1.7020618915557861, + "loss": 0.9503, + "nll_loss": 0.8968209028244019, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09018273651599884, + "rewards/margins": 0.08002346754074097, + "rewards/rejected": -0.1702061891555786, + "step": 10820 + }, + { + "epoch": 1.96, + "grad_norm": 1.8628263473510742, + "learning_rate": 1.7516377929829669e-06, + "log_odds_chosen": 1.0281225442886353, + "log_odds_ratio": -0.4999011158943176, + "logits/chosen": -0.4683711528778076, + "logits/rejected": -0.475578635931015, + "logps/chosen": -0.916462779045105, + "logps/rejected": -1.6054890155792236, + "loss": 0.9858, + "nll_loss": 0.9357932806015015, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.09164627641439438, + "rewards/margins": 0.06890259683132172, + "rewards/rejected": -0.1605488806962967, + "step": 10830 + }, + { + "epoch": 1.96, + "grad_norm": 1.0807766914367676, + "learning_rate": 1.745814529043529e-06, + "log_odds_chosen": 1.076616644859314, + "log_odds_ratio": -0.482137531042099, + "logits/chosen": -0.4159305989742279, + "logits/rejected": -0.39468756318092346, + "logps/chosen": -0.8382644653320312, + "logps/rejected": -1.6001307964324951, + "loss": 0.8777, + "nll_loss": 0.8294404149055481, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.083826445043087, + "rewards/margins": 0.0761866495013237, + "rewards/rejected": -0.1600130945444107, + "step": 10840 + }, + { + "epoch": 1.96, + "grad_norm": 1.4051988124847412, + "learning_rate": 1.7399912651040906e-06, + "log_odds_chosen": 0.8923505544662476, + "log_odds_ratio": -0.5629561543464661, + "logits/chosen": -0.4815370440483093, + "logits/rejected": -0.4566279351711273, + "logps/chosen": -0.8964195251464844, + "logps/rejected": -1.5521047115325928, + "loss": 1.0146, + "nll_loss": 0.958348274230957, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08964196592569351, + "rewards/margins": 0.06556852906942368, + "rewards/rejected": -0.155210480093956, + "step": 10850 + }, + { + "epoch": 1.96, + "grad_norm": 0.9403336048126221, + "learning_rate": 1.7341680011646527e-06, + "log_odds_chosen": 1.1863648891448975, + "log_odds_ratio": -0.5220173001289368, + "logits/chosen": -0.4481693208217621, + "logits/rejected": -0.4287276268005371, + "logps/chosen": -0.9415832757949829, + "logps/rejected": -1.7910654544830322, + "loss": 0.9736, + "nll_loss": 0.9214186668395996, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09415832906961441, + "rewards/margins": 0.08494821935892105, + "rewards/rejected": -0.17910653352737427, + "step": 10860 + }, + { + "epoch": 1.96, + "grad_norm": 1.2430269718170166, + "learning_rate": 1.7283447372252146e-06, + "log_odds_chosen": 1.073395013809204, + "log_odds_ratio": -0.5384390950202942, + "logits/chosen": -0.48438698053359985, + "logits/rejected": -0.4872768521308899, + "logps/chosen": -0.9116014242172241, + "logps/rejected": -1.6653658151626587, + "loss": 0.9836, + "nll_loss": 0.9297466278076172, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.09116014838218689, + "rewards/margins": 0.07537643611431122, + "rewards/rejected": -0.16653656959533691, + "step": 10870 + }, + { + "epoch": 1.97, + "grad_norm": 1.2624175548553467, + "learning_rate": 1.7225214732857767e-06, + "log_odds_chosen": 1.233604073524475, + "log_odds_ratio": -0.4971106946468353, + "logits/chosen": -0.44337910413742065, + "logits/rejected": -0.4556514620780945, + "logps/chosen": -0.8173624277114868, + "logps/rejected": -1.6833994388580322, + "loss": 0.9029, + "nll_loss": 0.8531962633132935, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.0817362368106842, + "rewards/margins": 0.08660370111465454, + "rewards/rejected": -0.16833993792533875, + "step": 10880 + }, + { + "epoch": 1.97, + "grad_norm": 1.0927963256835938, + "learning_rate": 1.7166982093463384e-06, + "log_odds_chosen": 0.8071399927139282, + "log_odds_ratio": -0.5108460187911987, + "logits/chosen": -0.4830823540687561, + "logits/rejected": -0.4780608117580414, + "logps/chosen": -0.9677250981330872, + "logps/rejected": -1.5685020685195923, + "loss": 0.9669, + "nll_loss": 0.9157981872558594, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09677250683307648, + "rewards/margins": 0.060077689588069916, + "rewards/rejected": -0.15685021877288818, + "step": 10890 + }, + { + "epoch": 1.97, + "grad_norm": 1.9250324964523315, + "learning_rate": 1.7108749454069005e-06, + "log_odds_chosen": 0.9409559369087219, + "log_odds_ratio": -0.5403879880905151, + "logits/chosen": -0.4660972058773041, + "logits/rejected": -0.47896018624305725, + "logps/chosen": -1.005990982055664, + "logps/rejected": -1.6514829397201538, + "loss": 0.9834, + "nll_loss": 0.9293805360794067, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.10059911012649536, + "rewards/margins": 0.06454919278621674, + "rewards/rejected": -0.1651482880115509, + "step": 10900 + }, + { + "epoch": 1.97, + "grad_norm": 1.1880030632019043, + "learning_rate": 1.7050516814674624e-06, + "log_odds_chosen": 0.9781869053840637, + "log_odds_ratio": -0.5084264874458313, + "logits/chosen": -0.4466976225376129, + "logits/rejected": -0.453413188457489, + "logps/chosen": -1.0177123546600342, + "logps/rejected": -1.7334365844726562, + "loss": 0.9245, + "nll_loss": 0.8736462593078613, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.10177123546600342, + "rewards/margins": 0.07157242298126221, + "rewards/rejected": -0.17334364354610443, + "step": 10910 + }, + { + "epoch": 1.97, + "grad_norm": 1.5584683418273926, + "learning_rate": 1.6992284175280245e-06, + "log_odds_chosen": 0.9679558873176575, + "log_odds_ratio": -0.5330004692077637, + "logits/chosen": -0.42195218801498413, + "logits/rejected": -0.41038408875465393, + "logps/chosen": -0.9459524154663086, + "logps/rejected": -1.6363149881362915, + "loss": 0.8991, + "nll_loss": 0.8457754850387573, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09459523856639862, + "rewards/margins": 0.06903626024723053, + "rewards/rejected": -0.16363149881362915, + "step": 10920 + }, + { + "epoch": 1.97, + "grad_norm": 0.8798287510871887, + "learning_rate": 1.6934051535885862e-06, + "log_odds_chosen": 0.7307524085044861, + "log_odds_ratio": -0.5784265398979187, + "logits/chosen": -0.49423471093177795, + "logits/rejected": -0.4821283221244812, + "logps/chosen": -0.9477846026420593, + "logps/rejected": -1.4808070659637451, + "loss": 0.96, + "nll_loss": 0.9021958112716675, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09477847069501877, + "rewards/margins": 0.05330223590135574, + "rewards/rejected": -0.1480807065963745, + "step": 10930 + }, + { + "epoch": 1.98, + "grad_norm": 1.3429219722747803, + "learning_rate": 1.6875818896491483e-06, + "log_odds_chosen": 1.1456918716430664, + "log_odds_ratio": -0.4871048927307129, + "logits/chosen": -0.41457852721214294, + "logits/rejected": -0.42475467920303345, + "logps/chosen": -0.8636356592178345, + "logps/rejected": -1.6684157848358154, + "loss": 0.856, + "nll_loss": 0.8073083162307739, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08636356890201569, + "rewards/margins": 0.08047802746295929, + "rewards/rejected": -0.16684159636497498, + "step": 10940 + }, + { + "epoch": 1.98, + "grad_norm": 2.187709331512451, + "learning_rate": 1.6817586257097104e-06, + "log_odds_chosen": 1.4078530073165894, + "log_odds_ratio": -0.47329598665237427, + "logits/chosen": -0.41357675194740295, + "logits/rejected": -0.4157022535800934, + "logps/chosen": -0.9161952137947083, + "logps/rejected": -1.9065545797348022, + "loss": 0.92, + "nll_loss": 0.8726755380630493, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.09161952137947083, + "rewards/margins": 0.09903593361377716, + "rewards/rejected": -0.19065546989440918, + "step": 10950 + }, + { + "epoch": 1.98, + "grad_norm": 2.4422430992126465, + "learning_rate": 1.675935361770272e-06, + "log_odds_chosen": 0.7092650532722473, + "log_odds_ratio": -0.5866946578025818, + "logits/chosen": -0.44020456075668335, + "logits/rejected": -0.4163680672645569, + "logps/chosen": -0.9696685671806335, + "logps/rejected": -1.4656903743743896, + "loss": 0.9728, + "nll_loss": 0.9141584634780884, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09696687757968903, + "rewards/margins": 0.049602169543504715, + "rewards/rejected": -0.14656902849674225, + "step": 10960 + }, + { + "epoch": 1.98, + "grad_norm": 1.0880329608917236, + "learning_rate": 1.6701120978308341e-06, + "log_odds_chosen": 0.8620030283927917, + "log_odds_ratio": -0.5120142698287964, + "logits/chosen": -0.42742785811424255, + "logits/rejected": -0.44241565465927124, + "logps/chosen": -0.9114185571670532, + "logps/rejected": -1.5257482528686523, + "loss": 0.9209, + "nll_loss": 0.8696750402450562, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09114186465740204, + "rewards/margins": 0.06143295019865036, + "rewards/rejected": -0.1525748074054718, + "step": 10970 + }, + { + "epoch": 1.98, + "grad_norm": 1.5797709226608276, + "learning_rate": 1.664288833891396e-06, + "log_odds_chosen": 1.3317102193832397, + "log_odds_ratio": -0.4480084776878357, + "logits/chosen": -0.4008447527885437, + "logits/rejected": -0.39526838064193726, + "logps/chosen": -0.9441932439804077, + "logps/rejected": -1.8612372875213623, + "loss": 0.9713, + "nll_loss": 0.9264762997627258, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09441931545734406, + "rewards/margins": 0.09170440584421158, + "rewards/rejected": -0.18612372875213623, + "step": 10980 + }, + { + "epoch": 1.99, + "grad_norm": 1.1879231929779053, + "learning_rate": 1.6584655699519581e-06, + "log_odds_chosen": 1.3444769382476807, + "log_odds_ratio": -0.44155794382095337, + "logits/chosen": -0.4258067011833191, + "logits/rejected": -0.4344039559364319, + "logps/chosen": -0.8423998951911926, + "logps/rejected": -1.838492751121521, + "loss": 1.0015, + "nll_loss": 0.9573596715927124, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08423998206853867, + "rewards/margins": 0.09960927069187164, + "rewards/rejected": -0.1838492602109909, + "step": 10990 + }, + { + "epoch": 1.99, + "grad_norm": 1.3635789155960083, + "learning_rate": 1.6526423060125198e-06, + "log_odds_chosen": 0.9028606414794922, + "log_odds_ratio": -0.5877591967582703, + "logits/chosen": -0.4743015170097351, + "logits/rejected": -0.4625968337059021, + "logps/chosen": -0.950115978717804, + "logps/rejected": -1.6341667175292969, + "loss": 1.0627, + "nll_loss": 1.0039708614349365, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.09501160681247711, + "rewards/margins": 0.06840505450963974, + "rewards/rejected": -0.16341665387153625, + "step": 11000 + } + ], + "logging_steps": 10, + "max_steps": 13838, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}