{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994756161510225, "eval_steps": 500, "global_step": 953, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01048767697954903, "grad_norm": 12.504458138350461, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.1660214066505432, "log_odds_ratio": -0.6960338354110718, "logits/chosen": -2.542905330657959, "logits/rejected": -2.5316882133483887, "logps/chosen": -0.9998037219047546, "logps/rejected": -1.0999689102172852, "loss": 2.7433, "nll_loss": 2.6550583839416504, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04999018833041191, "rewards/margins": 0.005008256994187832, "rewards/rejected": -0.05499844625592232, "step": 10 }, { "epoch": 0.02097535395909806, "grad_norm": 3.296398746092505, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.1942831575870514, "log_odds_ratio": -0.6660380959510803, "logits/chosen": -3.148456335067749, "logits/rejected": -3.171660900115967, "logps/chosen": -0.7626909613609314, "logps/rejected": -0.8731427192687988, "loss": 0.563, "nll_loss": 0.5225270986557007, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03813454881310463, "rewards/margins": 0.00552258500829339, "rewards/rejected": -0.04365713149309158, "step": 20 }, { "epoch": 0.03146303093864709, "grad_norm": 2.4400188978085695, "learning_rate": 6e-06, "log_odds_chosen": 0.2339784801006317, "log_odds_ratio": -0.6537522673606873, "logits/chosen": -2.9630327224731445, "logits/rejected": -2.9368481636047363, "logps/chosen": -0.8345462679862976, "logps/rejected": -0.9655241966247559, "loss": 0.5355, "nll_loss": 0.4940575659275055, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04172731190919876, "rewards/margins": 0.0065488978289067745, "rewards/rejected": -0.04827621206641197, "step": 30 }, { "epoch": 0.04195070791819612, "grad_norm": 2.765802378357493, "learning_rate": 8.000000000000001e-06, "log_odds_chosen": 0.15870003402233124, "log_odds_ratio": -0.6969180107116699, "logits/chosen": -2.8065195083618164, "logits/rejected": -2.7910008430480957, "logps/chosen": -0.8027766346931458, "logps/rejected": -0.9165509343147278, "loss": 0.5199, "nll_loss": 0.48035889863967896, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04013883322477341, "rewards/margins": 0.005688714794814587, "rewards/rejected": -0.04582754150032997, "step": 40 }, { "epoch": 0.05243838489774515, "grad_norm": 2.7404814506796704, "learning_rate": 1e-05, "log_odds_chosen": 0.24872338771820068, "log_odds_ratio": -0.680080771446228, "logits/chosen": -2.7704856395721436, "logits/rejected": -2.77298641204834, "logps/chosen": -0.7987793684005737, "logps/rejected": -0.9668463468551636, "loss": 0.5424, "nll_loss": 0.48421746492385864, "rewards/accuracies": 0.59375, "rewards/chosen": -0.03993896767497063, "rewards/margins": 0.00840335339307785, "rewards/rejected": -0.048342324793338776, "step": 50 }, { "epoch": 0.06292606187729417, "grad_norm": 2.7601739927853473, "learning_rate": 1.2e-05, "log_odds_chosen": 0.21160352230072021, "log_odds_ratio": -0.6764382123947144, "logits/chosen": -3.0032615661621094, "logits/rejected": -2.9960169792175293, "logps/chosen": -0.7965995669364929, "logps/rejected": -0.917363166809082, "loss": 0.5463, "nll_loss": 0.516124427318573, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.039829984307289124, "rewards/margins": 0.006038171239197254, "rewards/rejected": -0.045868150889873505, "step": 60 }, { "epoch": 0.07341373885684321, "grad_norm": 3.2123267767300128, "learning_rate": 1.4e-05, "log_odds_chosen": 0.19886036217212677, "log_odds_ratio": -0.690485417842865, "logits/chosen": -2.978163719177246, "logits/rejected": -3.0078656673431396, "logps/chosen": -0.8206535577774048, "logps/rejected": -0.9310994148254395, "loss": 0.5403, "nll_loss": 0.530234694480896, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04103267565369606, "rewards/margins": 0.0055222949013113976, "rewards/rejected": -0.046554967761039734, "step": 70 }, { "epoch": 0.08390141583639224, "grad_norm": 3.267750524500123, "learning_rate": 1.6000000000000003e-05, "log_odds_chosen": 0.1725669652223587, "log_odds_ratio": -0.689757764339447, "logits/chosen": -2.963442087173462, "logits/rejected": -2.953914165496826, "logps/chosen": -0.8903671503067017, "logps/rejected": -1.0184500217437744, "loss": 0.5632, "nll_loss": 0.48384732007980347, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.0445183590054512, "rewards/margins": 0.006404136773198843, "rewards/rejected": -0.050922494381666183, "step": 80 }, { "epoch": 0.09438909281594127, "grad_norm": 6.338896835312273, "learning_rate": 1.8e-05, "log_odds_chosen": 0.2590278387069702, "log_odds_ratio": -0.6696828603744507, "logits/chosen": -2.7556283473968506, "logits/rejected": -2.759223461151123, "logps/chosen": -0.8806008100509644, "logps/rejected": -1.0427037477493286, "loss": 0.5599, "nll_loss": 0.49117976427078247, "rewards/accuracies": 0.59375, "rewards/chosen": -0.044030044227838516, "rewards/margins": 0.008105142042040825, "rewards/rejected": -0.05213518068194389, "step": 90 }, { "epoch": 0.1048767697954903, "grad_norm": 2.844482964932932, "learning_rate": 2e-05, "log_odds_chosen": 0.20001336932182312, "log_odds_ratio": -0.6672823429107666, "logits/chosen": -2.836613178253174, "logits/rejected": -2.826347827911377, "logps/chosen": -0.8816211819648743, "logps/rejected": -1.0050264596939087, "loss": 0.5675, "nll_loss": 0.5239149332046509, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.044081058353185654, "rewards/margins": 0.006170268170535564, "rewards/rejected": -0.05025132745504379, "step": 100 }, { "epoch": 0.11536444677503933, "grad_norm": 2.717573270122186, "learning_rate": 1.9069251784911845e-05, "log_odds_chosen": 0.26770642399787903, "log_odds_ratio": -0.6399692296981812, "logits/chosen": -2.8041529655456543, "logits/rejected": -2.828374147415161, "logps/chosen": -0.8482567071914673, "logps/rejected": -1.021328330039978, "loss": 0.568, "nll_loss": 0.5094035863876343, "rewards/accuracies": 0.625, "rewards/chosen": -0.042412832379341125, "rewards/margins": 0.008653589524328709, "rewards/rejected": -0.05106641724705696, "step": 110 }, { "epoch": 0.12585212375458835, "grad_norm": 2.3522582585650906, "learning_rate": 1.825741858350554e-05, "log_odds_chosen": 0.2770318388938904, "log_odds_ratio": -0.6538770198822021, "logits/chosen": -2.9046432971954346, "logits/rejected": -2.921250343322754, "logps/chosen": -0.8698671460151672, "logps/rejected": -1.0593181848526, "loss": 0.6048, "nll_loss": 0.5620476007461548, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.0434933602809906, "rewards/margins": 0.009472550824284554, "rewards/rejected": -0.05296590179204941, "step": 120 }, { "epoch": 0.1363398007341374, "grad_norm": 2.3512564845307704, "learning_rate": 1.7541160386140587e-05, "log_odds_chosen": 0.213302880525589, "log_odds_ratio": -0.6861675977706909, "logits/chosen": -2.926781177520752, "logits/rejected": -2.930361747741699, "logps/chosen": -0.9192083477973938, "logps/rejected": -1.06519615650177, "loss": 0.5923, "nll_loss": 0.5574383735656738, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04596042260527611, "rewards/margins": 0.007299385964870453, "rewards/rejected": -0.05325980857014656, "step": 130 }, { "epoch": 0.14682747771368643, "grad_norm": 2.2489368047485705, "learning_rate": 1.6903085094570334e-05, "log_odds_chosen": 0.24789170920848846, "log_odds_ratio": -0.655090868473053, "logits/chosen": -2.9084389209747314, "logits/rejected": -2.9173099994659424, "logps/chosen": -0.9441210031509399, "logps/rejected": -1.1045926809310913, "loss": 0.5882, "nll_loss": 0.5544429421424866, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.047206051647663116, "rewards/margins": 0.008023588918149471, "rewards/rejected": -0.05522964149713516, "step": 140 }, { "epoch": 0.15731515469323545, "grad_norm": 2.6715309670512903, "learning_rate": 1.6329931618554523e-05, "log_odds_chosen": 0.14654028415679932, "log_odds_ratio": -0.7416929006576538, "logits/chosen": -2.8286139965057373, "logits/rejected": -2.842860698699951, "logps/chosen": -0.9699670672416687, "logps/rejected": -1.0669214725494385, "loss": 0.5441, "nll_loss": 0.5359360575675964, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.048498354852199554, "rewards/margins": 0.004847715608775616, "rewards/rejected": -0.053346067667007446, "step": 150 }, { "epoch": 0.16780283167278448, "grad_norm": 2.4917874181934616, "learning_rate": 1.5811388300841898e-05, "log_odds_chosen": 0.19475655257701874, "log_odds_ratio": -0.664051353931427, "logits/chosen": -2.8252522945404053, "logits/rejected": -2.839994192123413, "logps/chosen": -0.9179447889328003, "logps/rejected": -1.0352815389633179, "loss": 0.6078, "nll_loss": 0.5540346503257751, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.045897237956523895, "rewards/margins": 0.005866840481758118, "rewards/rejected": -0.05176408216357231, "step": 160 }, { "epoch": 0.1782905086523335, "grad_norm": 2.493896039254152, "learning_rate": 1.533929977694741e-05, "log_odds_chosen": 0.25445470213890076, "log_odds_ratio": -0.6574397087097168, "logits/chosen": -2.895998477935791, "logits/rejected": -2.9125123023986816, "logps/chosen": -0.8917832374572754, "logps/rejected": -1.0586717128753662, "loss": 0.5884, "nll_loss": 0.5544494986534119, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04458915814757347, "rewards/margins": 0.008344428613781929, "rewards/rejected": -0.05293358489871025, "step": 170 }, { "epoch": 0.18877818563188253, "grad_norm": 2.368451448201635, "learning_rate": 1.49071198499986e-05, "log_odds_chosen": 0.2552924156188965, "log_odds_ratio": -0.6543556451797485, "logits/chosen": -2.8886399269104004, "logits/rejected": -2.905686378479004, "logps/chosen": -0.9206914901733398, "logps/rejected": -1.091048240661621, "loss": 0.5686, "nll_loss": 0.551173985004425, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04603457450866699, "rewards/margins": 0.008517834357917309, "rewards/rejected": -0.054552413523197174, "step": 180 }, { "epoch": 0.19926586261143156, "grad_norm": 4.734046585912702, "learning_rate": 1.4509525002200235e-05, "log_odds_chosen": 0.21173310279846191, "log_odds_ratio": -0.6579927206039429, "logits/chosen": -2.9355111122131348, "logits/rejected": -2.952430009841919, "logps/chosen": -0.9388859868049622, "logps/rejected": -1.0733187198638916, "loss": 0.5936, "nll_loss": 0.6142745018005371, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04694430157542229, "rewards/margins": 0.006721635349094868, "rewards/rejected": -0.05366594344377518, "step": 190 }, { "epoch": 0.2097535395909806, "grad_norm": 2.2391424397427073, "learning_rate": 1.4142135623730951e-05, "log_odds_chosen": 0.28418153524398804, "log_odds_ratio": -0.6668760180473328, "logits/chosen": -2.873599052429199, "logits/rejected": -2.9066414833068848, "logps/chosen": -0.9204713702201843, "logps/rejected": -1.128112554550171, "loss": 0.5689, "nll_loss": 0.5723541975021362, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.046023570001125336, "rewards/margins": 0.010382059030234814, "rewards/rejected": -0.056405626237392426, "step": 200 }, { "epoch": 0.22024121657052964, "grad_norm": 2.1684330770876152, "learning_rate": 1.3801311186847084e-05, "log_odds_chosen": 0.11919783055782318, "log_odds_ratio": -0.7173447012901306, "logits/chosen": -2.884079933166504, "logits/rejected": -2.8981668949127197, "logps/chosen": -0.8726099729537964, "logps/rejected": -0.9488958120346069, "loss": 0.5693, "nll_loss": 0.5325449109077454, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04363049939274788, "rewards/margins": 0.0038142912089824677, "rewards/rejected": -0.04744479060173035, "step": 210 }, { "epoch": 0.23072889355007867, "grad_norm": 2.510753834710904, "learning_rate": 1.3483997249264842e-05, "log_odds_chosen": 0.18100012838840485, "log_odds_ratio": -0.7047401666641235, "logits/chosen": -2.8885810375213623, "logits/rejected": -2.8980116844177246, "logps/chosen": -0.8880792856216431, "logps/rejected": -1.0071966648101807, "loss": 0.5589, "nll_loss": 0.5211626291275024, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.044403962790966034, "rewards/margins": 0.005955878179520369, "rewards/rejected": -0.05035984516143799, "step": 220 }, { "epoch": 0.2412165705296277, "grad_norm": 2.0148191421861705, "learning_rate": 1.3187609467915744e-05, "log_odds_chosen": 0.2717307209968567, "log_odds_ratio": -0.6763201951980591, "logits/chosen": -2.829516887664795, "logits/rejected": -2.842909574508667, "logps/chosen": -0.9367680549621582, "logps/rejected": -1.1125657558441162, "loss": 0.5701, "nll_loss": 0.5263533592224121, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04683841019868851, "rewards/margins": 0.008789879269897938, "rewards/rejected": -0.05562828853726387, "step": 230 }, { "epoch": 0.2517042475091767, "grad_norm": 2.286828850039024, "learning_rate": 1.2909944487358057e-05, "log_odds_chosen": 0.2564060091972351, "log_odds_ratio": -0.651031494140625, "logits/chosen": -2.979280471801758, "logits/rejected": -3.0063037872314453, "logps/chosen": -0.9010913968086243, "logps/rejected": -1.065353512763977, "loss": 0.5799, "nll_loss": 0.5546143054962158, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04505457356572151, "rewards/margins": 0.008213100023567677, "rewards/rejected": -0.053267668932676315, "step": 240 }, { "epoch": 0.26219192448872575, "grad_norm": 3.959216899336302, "learning_rate": 1.2649110640673518e-05, "log_odds_chosen": 0.2661912143230438, "log_odds_ratio": -0.6746715307235718, "logits/chosen": -2.9726908206939697, "logits/rejected": -2.974113941192627, "logps/chosen": -0.8829942941665649, "logps/rejected": -1.0264866352081299, "loss": 0.5502, "nll_loss": 0.5201153755187988, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04414971172809601, "rewards/margins": 0.007174622267484665, "rewards/rejected": -0.05132433772087097, "step": 250 }, { "epoch": 0.2726796014682748, "grad_norm": 2.2699181039817, "learning_rate": 1.2403473458920845e-05, "log_odds_chosen": 0.2342940866947174, "log_odds_ratio": -0.6783974766731262, "logits/chosen": -2.9759726524353027, "logits/rejected": -2.9923360347747803, "logps/chosen": -0.9042210578918457, "logps/rejected": -1.0481539964675903, "loss": 0.5304, "nll_loss": 0.45657747983932495, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.0452110581099987, "rewards/margins": 0.007196647580713034, "rewards/rejected": -0.052407700568437576, "step": 260 }, { "epoch": 0.2831672784478238, "grad_norm": 2.380998150273162, "learning_rate": 1.2171612389003691e-05, "log_odds_chosen": 0.17961958050727844, "log_odds_ratio": -0.6983593702316284, "logits/chosen": -2.938765525817871, "logits/rejected": -2.965757369995117, "logps/chosen": -0.9548166990280151, "logps/rejected": -1.0895111560821533, "loss": 0.5673, "nll_loss": 0.5430372357368469, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.04774082824587822, "rewards/margins": 0.006734730210155249, "rewards/rejected": -0.0544755645096302, "step": 270 }, { "epoch": 0.29365495542737285, "grad_norm": 2.0870887262121323, "learning_rate": 1.1952286093343936e-05, "log_odds_chosen": 0.2291949987411499, "log_odds_ratio": -0.6750219464302063, "logits/chosen": -2.928527355194092, "logits/rejected": -2.9543163776397705, "logps/chosen": -0.9355181455612183, "logps/rejected": -1.0729036331176758, "loss": 0.5434, "nll_loss": 0.47713321447372437, "rewards/accuracies": 0.65625, "rewards/chosen": -0.04677591472864151, "rewards/margins": 0.006869266740977764, "rewards/rejected": -0.05364518240094185, "step": 280 }, { "epoch": 0.30414263240692185, "grad_norm": 2.661552133228645, "learning_rate": 1.1744404390294071e-05, "log_odds_chosen": 0.36491650342941284, "log_odds_ratio": -0.620793879032135, "logits/chosen": -2.880122661590576, "logits/rejected": -2.8935391902923584, "logps/chosen": -0.836012065410614, "logps/rejected": -1.05286705493927, "loss": 0.5596, "nll_loss": 0.4885989725589752, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0418006032705307, "rewards/margins": 0.010842744261026382, "rewards/rejected": -0.05264334753155708, "step": 290 }, { "epoch": 0.3146303093864709, "grad_norm": 3.127285518362044, "learning_rate": 1.1547005383792517e-05, "log_odds_chosen": 0.255328893661499, "log_odds_ratio": -0.6939107179641724, "logits/chosen": -2.9603378772735596, "logits/rejected": -2.992128372192383, "logps/chosen": -0.8731514811515808, "logps/rejected": -1.0526010990142822, "loss": 0.5835, "nll_loss": 0.5112031102180481, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0436575748026371, "rewards/margins": 0.008972481824457645, "rewards/rejected": -0.052630048245191574, "step": 300 }, { "epoch": 0.3251179863660199, "grad_norm": 2.013637214040506, "learning_rate": 1.1359236684941297e-05, "log_odds_chosen": 0.21040907502174377, "log_odds_ratio": -0.688109278678894, "logits/chosen": -2.9860305786132812, "logits/rejected": -2.9820261001586914, "logps/chosen": -0.9089478254318237, "logps/rejected": -1.0382112264633179, "loss": 0.585, "nll_loss": 0.5399721264839172, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.045447397977113724, "rewards/margins": 0.006463165394961834, "rewards/rejected": -0.051910560578107834, "step": 310 }, { "epoch": 0.33560566334556896, "grad_norm": 2.1577553752792995, "learning_rate": 1.118033988749895e-05, "log_odds_chosen": 0.27985960245132446, "log_odds_ratio": -0.6601210832595825, "logits/chosen": -3.0387003421783447, "logits/rejected": -3.0464096069335938, "logps/chosen": -0.9086373448371887, "logps/rejected": -1.0836986303329468, "loss": 0.5243, "nll_loss": 0.4922841191291809, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.045431867241859436, "rewards/margins": 0.008753069676458836, "rewards/rejected": -0.0541849359869957, "step": 320 }, { "epoch": 0.34609334032511796, "grad_norm": 2.422690319169778, "learning_rate": 1.1009637651263608e-05, "log_odds_chosen": 0.28255337476730347, "log_odds_ratio": -0.6909259557723999, "logits/chosen": -2.950887441635132, "logits/rejected": -2.9948947429656982, "logps/chosen": -0.9054603576660156, "logps/rejected": -1.0888211727142334, "loss": 0.5544, "nll_loss": 0.5376341342926025, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04527302458882332, "rewards/margins": 0.009168041869997978, "rewards/rejected": -0.05444106459617615, "step": 330 }, { "epoch": 0.356581017304667, "grad_norm": 2.2975046406882798, "learning_rate": 1.0846522890932809e-05, "log_odds_chosen": 0.2153971642255783, "log_odds_ratio": -0.6926898956298828, "logits/chosen": -2.9686572551727295, "logits/rejected": -3.0199432373046875, "logps/chosen": -0.8590608835220337, "logps/rejected": -1.00636887550354, "loss": 0.5708, "nll_loss": 0.5127817392349243, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.042953044176101685, "rewards/margins": 0.007365405559539795, "rewards/rejected": -0.05031844973564148, "step": 340 }, { "epoch": 0.36706869428421607, "grad_norm": 2.135727653321979, "learning_rate": 1.0690449676496977e-05, "log_odds_chosen": 0.2665565609931946, "log_odds_ratio": -0.6829238533973694, "logits/chosen": -3.044860363006592, "logits/rejected": -3.0616378784179688, "logps/chosen": -0.8791500329971313, "logps/rejected": -1.0402672290802002, "loss": 0.5495, "nll_loss": 0.5228344202041626, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04395749792456627, "rewards/margins": 0.00805586390197277, "rewards/rejected": -0.05201335996389389, "step": 350 }, { "epoch": 0.37755637126376507, "grad_norm": 3.150177435714442, "learning_rate": 1.0540925533894598e-05, "log_odds_chosen": 0.4033277928829193, "log_odds_ratio": -0.602225124835968, "logits/chosen": -2.9472672939300537, "logits/rejected": -2.975858211517334, "logps/chosen": -0.8669608235359192, "logps/rejected": -1.110353708267212, "loss": 0.5494, "nll_loss": 0.5087054371833801, "rewards/accuracies": 0.625, "rewards/chosen": -0.0433480478823185, "rewards/margins": 0.01216964516788721, "rewards/rejected": -0.05551769211888313, "step": 360 }, { "epoch": 0.3880440482433141, "grad_norm": 2.130197231019511, "learning_rate": 1.0397504898200728e-05, "log_odds_chosen": 0.3966829478740692, "log_odds_ratio": -0.6142522096633911, "logits/chosen": -3.0528526306152344, "logits/rejected": -3.0623490810394287, "logps/chosen": -0.8640265464782715, "logps/rejected": -1.1243717670440674, "loss": 0.5232, "nll_loss": 0.5101068615913391, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.043201327323913574, "rewards/margins": 0.013017257675528526, "rewards/rejected": -0.05621858313679695, "step": 370 }, { "epoch": 0.3985317252228631, "grad_norm": 2.415549044992692, "learning_rate": 1.0259783520851543e-05, "log_odds_chosen": 0.46208301186561584, "log_odds_ratio": -0.5873923301696777, "logits/chosen": -3.055903196334839, "logits/rejected": -3.089763879776001, "logps/chosen": -0.8685981035232544, "logps/rejected": -1.1247217655181885, "loss": 0.5376, "nll_loss": 0.5167646408081055, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.0434299036860466, "rewards/margins": 0.01280617993324995, "rewards/rejected": -0.056236088275909424, "step": 380 }, { "epoch": 0.4090194022024122, "grad_norm": 2.4197618087673036, "learning_rate": 1.0127393670836667e-05, "log_odds_chosen": 0.08936772495508194, "log_odds_ratio": -0.7186132073402405, "logits/chosen": -2.998857021331787, "logits/rejected": -3.021352529525757, "logps/chosen": -0.9128287434577942, "logps/rejected": -0.9754525423049927, "loss": 0.5571, "nll_loss": 0.5319759845733643, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04564143717288971, "rewards/margins": 0.0031311833299696445, "rewards/rejected": -0.048772621899843216, "step": 390 }, { "epoch": 0.4195070791819612, "grad_norm": 2.0748995530757424, "learning_rate": 1e-05, "log_odds_chosen": 0.23965713381767273, "log_odds_ratio": -0.6899853348731995, "logits/chosen": -2.883575201034546, "logits/rejected": -2.908125400543213, "logps/chosen": -0.9490350484848022, "logps/rejected": -1.1106139421463013, "loss": 0.5725, "nll_loss": 0.5262094736099243, "rewards/accuracies": 0.53125, "rewards/chosen": -0.04745175316929817, "rewards/margins": 0.00807894580066204, "rewards/rejected": -0.05553068965673447, "step": 400 }, { "epoch": 0.4299947561615102, "grad_norm": 2.0498490112152026, "learning_rate": 9.877295966495898e-06, "log_odds_chosen": 0.14244404435157776, "log_odds_ratio": -0.7278560996055603, "logits/chosen": -2.988100051879883, "logits/rejected": -2.9914164543151855, "logps/chosen": -0.8709594011306763, "logps/rejected": -0.9773006439208984, "loss": 0.5455, "nll_loss": 0.4832683503627777, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04354798048734665, "rewards/margins": 0.0053170593455433846, "rewards/rejected": -0.04886503517627716, "step": 410 }, { "epoch": 0.4404824331410593, "grad_norm": 1.9311064341389872, "learning_rate": 9.759000729485331e-06, "log_odds_chosen": 0.30063071846961975, "log_odds_ratio": -0.643203854560852, "logits/chosen": -2.9488558769226074, "logits/rejected": -2.9841551780700684, "logps/chosen": -0.8707404136657715, "logps/rejected": -1.0532442331314087, "loss": 0.5355, "nll_loss": 0.474843829870224, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04353701323270798, "rewards/margins": 0.009125196374952793, "rewards/rejected": -0.05266221612691879, "step": 420 }, { "epoch": 0.4509701101206083, "grad_norm": 2.119895291758326, "learning_rate": 9.644856443408244e-06, "log_odds_chosen": 0.2837393879890442, "log_odds_ratio": -0.6551750898361206, "logits/chosen": -2.9840757846832275, "logits/rejected": -2.9921929836273193, "logps/chosen": -0.8468173146247864, "logps/rejected": -1.0135347843170166, "loss": 0.5557, "nll_loss": 0.5443450212478638, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.04234086349606514, "rewards/margins": 0.00833587534725666, "rewards/rejected": -0.05067674070596695, "step": 430 }, { "epoch": 0.46145778710015734, "grad_norm": 2.095435518308805, "learning_rate": 9.534625892455923e-06, "log_odds_chosen": 0.2355252504348755, "log_odds_ratio": -0.6598283648490906, "logits/chosen": -3.0252740383148193, "logits/rejected": -3.045849323272705, "logps/chosen": -0.8709392547607422, "logps/rejected": -1.0179613828659058, "loss": 0.5508, "nll_loss": 0.5189236998558044, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04354696720838547, "rewards/margins": 0.0073511130176484585, "rewards/rejected": -0.050898075103759766, "step": 440 }, { "epoch": 0.47194546407970633, "grad_norm": 1.9017756846669818, "learning_rate": 9.428090415820635e-06, "log_odds_chosen": 0.34075412154197693, "log_odds_ratio": -0.6583858728408813, "logits/chosen": -3.0218703746795654, "logits/rejected": -3.0481696128845215, "logps/chosen": -0.8293315768241882, "logps/rejected": -1.047191858291626, "loss": 0.5286, "nll_loss": 0.4964592456817627, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.04146658256649971, "rewards/margins": 0.010893006809055805, "rewards/rejected": -0.05235959216952324, "step": 450 }, { "epoch": 0.4824331410592554, "grad_norm": 2.079766146123277, "learning_rate": 9.325048082403139e-06, "log_odds_chosen": 0.16855968534946442, "log_odds_ratio": -0.711928129196167, "logits/chosen": -3.0086510181427, "logits/rejected": -3.0489156246185303, "logps/chosen": -0.9442957043647766, "logps/rejected": -1.072997808456421, "loss": 0.5326, "nll_loss": 0.5338221788406372, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04721478372812271, "rewards/margins": 0.00643510278314352, "rewards/rejected": -0.05364988371729851, "step": 460 }, { "epoch": 0.4929208180388044, "grad_norm": 2.4868491558153085, "learning_rate": 9.225312080288851e-06, "log_odds_chosen": 0.23586861789226532, "log_odds_ratio": -0.6902174949645996, "logits/chosen": -2.986264705657959, "logits/rejected": -3.0127644538879395, "logps/chosen": -0.8882457613945007, "logps/rejected": -1.034985899925232, "loss": 0.5413, "nll_loss": 0.5090312361717224, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.044412292540073395, "rewards/margins": 0.007337009999901056, "rewards/rejected": -0.051749296486377716, "step": 470 }, { "epoch": 0.5034084950183534, "grad_norm": 2.0043501739666882, "learning_rate": 9.12870929175277e-06, "log_odds_chosen": 0.17604230344295502, "log_odds_ratio": -0.707550048828125, "logits/chosen": -3.088604211807251, "logits/rejected": -3.12184476852417, "logps/chosen": -0.8456010818481445, "logps/rejected": -0.9586717486381531, "loss": 0.5178, "nll_loss": 0.5126105546951294, "rewards/accuracies": 0.5625, "rewards/chosen": -0.042280055582523346, "rewards/margins": 0.005653535481542349, "rewards/rejected": -0.047933585941791534, "step": 480 }, { "epoch": 0.5138961719979025, "grad_norm": 1.9415978406566505, "learning_rate": 9.035079029052514e-06, "log_odds_chosen": 0.22476902604103088, "log_odds_ratio": -0.6716736555099487, "logits/chosen": -3.003417491912842, "logits/rejected": -3.0048608779907227, "logps/chosen": -0.9196673631668091, "logps/rejected": -1.0358223915100098, "loss": 0.5397, "nll_loss": 0.5024985671043396, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.045983362942934036, "rewards/margins": 0.005807754583656788, "rewards/rejected": -0.051791124045848846, "step": 490 }, { "epoch": 0.5243838489774515, "grad_norm": 2.2353701695425423, "learning_rate": 8.94427190999916e-06, "log_odds_chosen": 0.20684054493904114, "log_odds_ratio": -0.698712944984436, "logits/chosen": -3.0111751556396484, "logits/rejected": -3.0036330223083496, "logps/chosen": -0.8826943635940552, "logps/rejected": -1.0074814558029175, "loss": 0.548, "nll_loss": 0.5235316157341003, "rewards/accuracies": 0.625, "rewards/chosen": -0.0441347137093544, "rewards/margins": 0.006239361595362425, "rewards/rejected": -0.050374072045087814, "step": 500 }, { "epoch": 0.5348715259570005, "grad_norm": 1.742537477144132, "learning_rate": 8.856148855400955e-06, "log_odds_chosen": 0.3066679835319519, "log_odds_ratio": -0.6453306674957275, "logits/chosen": -2.9636032581329346, "logits/rejected": -2.97407865524292, "logps/chosen": -0.8404191136360168, "logps/rejected": -1.0267155170440674, "loss": 0.5264, "nll_loss": 0.5354185104370117, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04202095791697502, "rewards/margins": 0.009314822033047676, "rewards/rejected": -0.05133577436208725, "step": 510 }, { "epoch": 0.5453592029365496, "grad_norm": 1.6799388590726438, "learning_rate": 8.770580193070294e-06, "log_odds_chosen": 0.24468369781970978, "log_odds_ratio": -0.6710330247879028, "logits/chosen": -2.959213972091675, "logits/rejected": -2.966728687286377, "logps/chosen": -0.9035038948059082, "logps/rejected": -1.0690029859542847, "loss": 0.5366, "nll_loss": 0.47406935691833496, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04517520219087601, "rewards/margins": 0.008274954743683338, "rewards/rejected": -0.053450148552656174, "step": 520 }, { "epoch": 0.5558468799160986, "grad_norm": 1.8707354612150964, "learning_rate": 8.687444855261389e-06, "log_odds_chosen": 0.4215427339076996, "log_odds_ratio": -0.6489927172660828, "logits/chosen": -3.0756938457489014, "logits/rejected": -3.0923542976379395, "logps/chosen": -0.8253329992294312, "logps/rejected": -1.1108949184417725, "loss": 0.5365, "nll_loss": 0.45042163133621216, "rewards/accuracies": 0.53125, "rewards/chosen": -0.04126664996147156, "rewards/margins": 0.014278100803494453, "rewards/rejected": -0.05554475262761116, "step": 530 }, { "epoch": 0.5663345568956476, "grad_norm": 1.922705947748225, "learning_rate": 8.606629658238705e-06, "log_odds_chosen": 0.1879667341709137, "log_odds_ratio": -0.6903280019760132, "logits/chosen": -2.975130796432495, "logits/rejected": -3.0028696060180664, "logps/chosen": -0.8695458173751831, "logps/rejected": -0.9805169105529785, "loss": 0.5535, "nll_loss": 0.5275255441665649, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04347729682922363, "rewards/margins": 0.005548550747334957, "rewards/rejected": -0.049025844782590866, "step": 540 }, { "epoch": 0.5768222338751966, "grad_norm": 1.9089385183272836, "learning_rate": 8.528028654224417e-06, "log_odds_chosen": 0.42722567915916443, "log_odds_ratio": -0.6043616533279419, "logits/chosen": -2.9973807334899902, "logits/rejected": -3.0049965381622314, "logps/chosen": -0.8592002987861633, "logps/rejected": -1.1192405223846436, "loss": 0.537, "nll_loss": 0.5372708439826965, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.042960021644830704, "rewards/margins": 0.013002010062336922, "rewards/rejected": -0.05596202611923218, "step": 550 }, { "epoch": 0.5873099108547457, "grad_norm": 1.9519454661958895, "learning_rate": 8.451542547285167e-06, "log_odds_chosen": 0.23686861991882324, "log_odds_ratio": -0.679013192653656, "logits/chosen": -3.0309016704559326, "logits/rejected": -3.0620574951171875, "logps/chosen": -0.8845365643501282, "logps/rejected": -1.0314432382583618, "loss": 0.5215, "nll_loss": 0.5018130540847778, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04422682151198387, "rewards/margins": 0.0073453388176858425, "rewards/rejected": -0.05157216265797615, "step": 560 }, { "epoch": 0.5977975878342947, "grad_norm": 1.902474576616517, "learning_rate": 8.37707816583391e-06, "log_odds_chosen": 0.157462477684021, "log_odds_ratio": -0.7165660858154297, "logits/chosen": -2.971592903137207, "logits/rejected": -2.9932913780212402, "logps/chosen": -0.8898121118545532, "logps/rejected": -0.9948716163635254, "loss": 0.5041, "nll_loss": 0.5276492834091187, "rewards/accuracies": 0.5, "rewards/chosen": -0.044490598142147064, "rewards/margins": 0.005252980627119541, "rewards/rejected": -0.04974358528852463, "step": 570 }, { "epoch": 0.6082852648138437, "grad_norm": 1.9526588876095308, "learning_rate": 8.304547985373997e-06, "log_odds_chosen": 0.27767136693000793, "log_odds_ratio": -0.6578360199928284, "logits/chosen": -3.0485613346099854, "logits/rejected": -3.061281204223633, "logps/chosen": -0.8733240962028503, "logps/rejected": -1.0594861507415771, "loss": 0.5456, "nll_loss": 0.48286086320877075, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.043666206300258636, "rewards/margins": 0.009308096952736378, "rewards/rejected": -0.05297430604696274, "step": 580 }, { "epoch": 0.6187729417933928, "grad_norm": 1.963515177379308, "learning_rate": 8.233869695926184e-06, "log_odds_chosen": 0.32016056776046753, "log_odds_ratio": -0.6649240255355835, "logits/chosen": -3.0834898948669434, "logits/rejected": -3.123967409133911, "logps/chosen": -0.8281318545341492, "logps/rejected": -1.021436095237732, "loss": 0.5124, "nll_loss": 0.5498961210250854, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04140659421682358, "rewards/margins": 0.009665210731327534, "rewards/rejected": -0.05107180029153824, "step": 590 }, { "epoch": 0.6292606187729418, "grad_norm": 2.1416673571833584, "learning_rate": 8.164965809277262e-06, "log_odds_chosen": 0.3141978085041046, "log_odds_ratio": -0.6486893892288208, "logits/chosen": -3.1147074699401855, "logits/rejected": -3.11454176902771, "logps/chosen": -0.8215556144714355, "logps/rejected": -1.009476661682129, "loss": 0.5144, "nll_loss": 0.4836875796318054, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04107777774333954, "rewards/margins": 0.009396053850650787, "rewards/rejected": -0.05047383904457092, "step": 600 }, { "epoch": 0.6397482957524908, "grad_norm": 2.03894912155955, "learning_rate": 8.097763301789162e-06, "log_odds_chosen": 0.1958848237991333, "log_odds_ratio": -0.6933802366256714, "logits/chosen": -3.016098737716675, "logits/rejected": -3.046642780303955, "logps/chosen": -0.8733209371566772, "logps/rejected": -0.9883171916007996, "loss": 0.526, "nll_loss": 0.4880569875240326, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0436660535633564, "rewards/margins": 0.005749809555709362, "rewards/rejected": -0.049415864050388336, "step": 610 }, { "epoch": 0.6502359727320398, "grad_norm": 2.068974001178546, "learning_rate": 8.03219328902499e-06, "log_odds_chosen": 0.17991718649864197, "log_odds_ratio": -0.7055822610855103, "logits/chosen": -3.045403003692627, "logits/rejected": -3.0644798278808594, "logps/chosen": -0.8806620836257935, "logps/rejected": -1.0145095586776733, "loss": 0.5295, "nll_loss": 0.5151625275611877, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.04403311014175415, "rewards/margins": 0.006692370865494013, "rewards/rejected": -0.05072547867894173, "step": 620 }, { "epoch": 0.6607236497115889, "grad_norm": 1.9705491215328443, "learning_rate": 7.968190728895958e-06, "log_odds_chosen": 0.23948292434215546, "log_odds_ratio": -0.6947344541549683, "logits/chosen": -3.016519546508789, "logits/rejected": -3.042133331298828, "logps/chosen": -0.8557758331298828, "logps/rejected": -1.0029237270355225, "loss": 0.5331, "nll_loss": 0.5245988368988037, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0427887924015522, "rewards/margins": 0.007357400842010975, "rewards/rejected": -0.0501461923122406, "step": 630 }, { "epoch": 0.6712113266911379, "grad_norm": 2.664256522681278, "learning_rate": 7.905694150420949e-06, "log_odds_chosen": 0.3717094659805298, "log_odds_ratio": -0.6480633020401001, "logits/chosen": -3.0543761253356934, "logits/rejected": -3.0751733779907227, "logps/chosen": -0.8645519018173218, "logps/rejected": -1.102386713027954, "loss": 0.5149, "nll_loss": 0.46133953332901, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04322759807109833, "rewards/margins": 0.011891739442944527, "rewards/rejected": -0.0551193431019783, "step": 640 }, { "epoch": 0.6816990036706869, "grad_norm": 1.878621524799117, "learning_rate": 7.844645405527363e-06, "log_odds_chosen": 0.1861819326877594, "log_odds_ratio": -0.7022497057914734, "logits/chosen": -3.0863146781921387, "logits/rejected": -3.113098621368408, "logps/chosen": -0.8403372764587402, "logps/rejected": -0.9548438191413879, "loss": 0.5336, "nll_loss": 0.5122831463813782, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04201686754822731, "rewards/margins": 0.0057253288105130196, "rewards/rejected": -0.047742195427417755, "step": 650 }, { "epoch": 0.6921866806502359, "grad_norm": 1.8977100039056058, "learning_rate": 7.78498944161523e-06, "log_odds_chosen": 0.2854728400707245, "log_odds_ratio": -0.6552462577819824, "logits/chosen": -3.052263021469116, "logits/rejected": -3.0898962020874023, "logps/chosen": -0.8826674222946167, "logps/rejected": -1.0711818933486938, "loss": 0.5304, "nll_loss": 0.4874996542930603, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.044133372604846954, "rewards/margins": 0.009425725787878036, "rewards/rejected": -0.05355909466743469, "step": 660 }, { "epoch": 0.702674357629785, "grad_norm": 1.8195731091765575, "learning_rate": 7.726674092862559e-06, "log_odds_chosen": 0.4364054203033447, "log_odds_ratio": -0.6321254968643188, "logits/chosen": -2.9931445121765137, "logits/rejected": -3.025317907333374, "logps/chosen": -0.8416171073913574, "logps/rejected": -1.1292223930358887, "loss": 0.5237, "nll_loss": 0.46936100721359253, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.04208085685968399, "rewards/margins": 0.014380265958607197, "rewards/rejected": -0.05646112561225891, "step": 670 }, { "epoch": 0.713162034609334, "grad_norm": 2.0599075037830192, "learning_rate": 7.669649888473705e-06, "log_odds_chosen": 0.31395241618156433, "log_odds_ratio": -0.650139570236206, "logits/chosen": -2.9855525493621826, "logits/rejected": -2.9897267818450928, "logps/chosen": -0.8750125169754028, "logps/rejected": -1.0669299364089966, "loss": 0.5075, "nll_loss": 0.4943002760410309, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.04375062882900238, "rewards/margins": 0.009595867246389389, "rewards/rejected": -0.05334649235010147, "step": 680 }, { "epoch": 0.723649711588883, "grad_norm": 1.8347271674417223, "learning_rate": 7.61386987626881e-06, "log_odds_chosen": 0.18291696906089783, "log_odds_ratio": -0.7239105701446533, "logits/chosen": -2.97595477104187, "logits/rejected": -2.991725444793701, "logps/chosen": -0.8641953468322754, "logps/rejected": -0.9991108179092407, "loss": 0.5304, "nll_loss": 0.5499680638313293, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04320976510643959, "rewards/margins": 0.006745772901922464, "rewards/rejected": -0.04995553940534592, "step": 690 }, { "epoch": 0.7341373885684321, "grad_norm": 2.2852704943230915, "learning_rate": 7.559289460184545e-06, "log_odds_chosen": 0.3105728030204773, "log_odds_ratio": -0.6319602727890015, "logits/chosen": -2.985989809036255, "logits/rejected": -3.0209579467773438, "logps/chosen": -0.8320032358169556, "logps/rejected": -1.0303562879562378, "loss": 0.5296, "nll_loss": 0.5422422885894775, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04160016402602196, "rewards/margins": 0.009917653165757656, "rewards/rejected": -0.05151782184839249, "step": 700 }, { "epoch": 0.7446250655479811, "grad_norm": 1.9768197452256755, "learning_rate": 7.505866250408016e-06, "log_odds_chosen": 0.2948063015937805, "log_odds_ratio": -0.6451742649078369, "logits/chosen": -3.1170597076416016, "logits/rejected": -3.136089324951172, "logps/chosen": -0.8415013551712036, "logps/rejected": -1.0454984903335571, "loss": 0.5237, "nll_loss": 0.47949719429016113, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04207506403326988, "rewards/margins": 0.01019985694438219, "rewards/rejected": -0.0522749237716198, "step": 710 }, { "epoch": 0.7551127425275301, "grad_norm": 1.905599119477425, "learning_rate": 7.4535599249993e-06, "log_odds_chosen": 0.40306347608566284, "log_odds_ratio": -0.6352882385253906, "logits/chosen": -3.064483642578125, "logits/rejected": -3.087808847427368, "logps/chosen": -0.7972971200942993, "logps/rejected": -1.046507477760315, "loss": 0.5304, "nll_loss": 0.4636651873588562, "rewards/accuracies": 0.625, "rewards/chosen": -0.039864856749773026, "rewards/margins": 0.012460513040423393, "rewards/rejected": -0.05232536792755127, "step": 720 }, { "epoch": 0.7656004195070791, "grad_norm": 2.19124615484763, "learning_rate": 7.402332101976053e-06, "log_odds_chosen": 0.12367966026067734, "log_odds_ratio": -0.7226089239120483, "logits/chosen": -3.0835583209991455, "logits/rejected": -3.0826332569122314, "logps/chosen": -0.8365408778190613, "logps/rejected": -0.9029885530471802, "loss": 0.5374, "nll_loss": 0.5031268000602722, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.04182704538106918, "rewards/margins": 0.0033223754726350307, "rewards/rejected": -0.04514942690730095, "step": 730 }, { "epoch": 0.7760880964866282, "grad_norm": 2.0835998895674837, "learning_rate": 7.352146220938079e-06, "log_odds_chosen": 0.33691075444221497, "log_odds_ratio": -0.6264201402664185, "logits/chosen": -3.1278512477874756, "logits/rejected": -3.139995574951172, "logps/chosen": -0.8067742586135864, "logps/rejected": -1.0221493244171143, "loss": 0.5312, "nll_loss": 0.4790155291557312, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.040338706225156784, "rewards/margins": 0.010768752545118332, "rewards/rejected": -0.051107458770275116, "step": 740 }, { "epoch": 0.7865757734661772, "grad_norm": 1.9667031119071154, "learning_rate": 7.3029674334022146e-06, "log_odds_chosen": 0.23670358955860138, "log_odds_ratio": -0.6752098202705383, "logits/chosen": -3.1056113243103027, "logits/rejected": -3.1298460960388184, "logps/chosen": -0.8614869117736816, "logps/rejected": -0.9949930310249329, "loss": 0.5426, "nll_loss": 0.4975660443305969, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0430743470788002, "rewards/margins": 0.006675302051007748, "rewards/rejected": -0.049749650061130524, "step": 750 }, { "epoch": 0.7970634504457262, "grad_norm": 1.8638714551633075, "learning_rate": 7.254762501100117e-06, "log_odds_chosen": 0.2394195795059204, "log_odds_ratio": -0.6686865091323853, "logits/chosen": -3.092322826385498, "logits/rejected": -3.0998446941375732, "logps/chosen": -0.8189753293991089, "logps/rejected": -0.9735254049301147, "loss": 0.5115, "nll_loss": 0.4049908220767975, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.0409487709403038, "rewards/margins": 0.007727508433163166, "rewards/rejected": -0.048676274716854095, "step": 760 }, { "epoch": 0.8075511274252754, "grad_norm": 2.098087236150393, "learning_rate": 7.207499701564472e-06, "log_odds_chosen": 0.21572642028331757, "log_odds_ratio": -0.7029857635498047, "logits/chosen": -3.0059127807617188, "logits/rejected": -3.0258781909942627, "logps/chosen": -0.8941653370857239, "logps/rejected": -1.0438942909240723, "loss": 0.5343, "nll_loss": 0.5011810064315796, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.044708263128995895, "rewards/margins": 0.007486448623239994, "rewards/rejected": -0.05219471454620361, "step": 770 }, { "epoch": 0.8180388044048243, "grad_norm": 1.908201970451478, "learning_rate": 7.1611487403943295e-06, "log_odds_chosen": 0.22588184475898743, "log_odds_ratio": -0.6703106164932251, "logits/chosen": -3.0057101249694824, "logits/rejected": -3.0319108963012695, "logps/chosen": -0.8802768588066101, "logps/rejected": -0.997613787651062, "loss": 0.5466, "nll_loss": 0.5490036606788635, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0440138503909111, "rewards/margins": 0.005866837687790394, "rewards/rejected": -0.04988069087266922, "step": 780 }, { "epoch": 0.8285264813843733, "grad_norm": 1.8452821315553456, "learning_rate": 7.115680669648201e-06, "log_odds_chosen": 0.32251420617103577, "log_odds_ratio": -0.6489396691322327, "logits/chosen": -2.991415500640869, "logits/rejected": -3.0075478553771973, "logps/chosen": -0.8143788576126099, "logps/rejected": -1.0171436071395874, "loss": 0.5052, "nll_loss": 0.4423222541809082, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04071894288063049, "rewards/margins": 0.010138243436813354, "rewards/rejected": -0.05085718631744385, "step": 790 }, { "epoch": 0.8390141583639223, "grad_norm": 2.099723593564682, "learning_rate": 7.0710678118654756e-06, "log_odds_chosen": 0.4498319625854492, "log_odds_ratio": -0.5986544489860535, "logits/chosen": -2.9999208450317383, "logits/rejected": -2.9963490962982178, "logps/chosen": -0.782555341720581, "logps/rejected": -1.068285584449768, "loss": 0.5173, "nll_loss": 0.4201901853084564, "rewards/accuracies": 0.65625, "rewards/chosen": -0.03912776708602905, "rewards/margins": 0.014286505989730358, "rewards/rejected": -0.053414274007081985, "step": 800 }, { "epoch": 0.8495018353434715, "grad_norm": 1.9010573028789273, "learning_rate": 7.027283689263066e-06, "log_odds_chosen": 0.34422335028648376, "log_odds_ratio": -0.6322020292282104, "logits/chosen": -3.0011842250823975, "logits/rejected": -2.9966137409210205, "logps/chosen": -0.8086786270141602, "logps/rejected": -1.0155996084213257, "loss": 0.5132, "nll_loss": 0.4740920066833496, "rewards/accuracies": 0.59375, "rewards/chosen": -0.040433935821056366, "rewards/margins": 0.010346042923629284, "rewards/rejected": -0.05077998712658882, "step": 810 }, { "epoch": 0.8599895123230205, "grad_norm": 2.3144073315770353, "learning_rate": 6.984302957695783e-06, "log_odds_chosen": 0.29515784978866577, "log_odds_ratio": -0.6521409749984741, "logits/chosen": -2.943692445755005, "logits/rejected": -2.9414219856262207, "logps/chosen": -0.8414862751960754, "logps/rejected": -1.0143965482711792, "loss": 0.504, "nll_loss": 0.4271189570426941, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04207431524991989, "rewards/margins": 0.008645516820251942, "rewards/rejected": -0.05071982741355896, "step": 820 }, { "epoch": 0.8704771893025695, "grad_norm": 2.371001107698096, "learning_rate": 6.942101345006233e-06, "log_odds_chosen": 0.2455742061138153, "log_odds_ratio": -0.7013689279556274, "logits/chosen": -2.933568239212036, "logits/rejected": -2.977832794189453, "logps/chosen": -0.8553229570388794, "logps/rejected": -1.0332233905792236, "loss": 0.5251, "nll_loss": 0.46586036682128906, "rewards/accuracies": 0.53125, "rewards/chosen": -0.04276614636182785, "rewards/margins": 0.008895025588572025, "rewards/rejected": -0.0516611710190773, "step": 830 }, { "epoch": 0.8809648662821186, "grad_norm": 1.977587507180873, "learning_rate": 6.900655593423542e-06, "log_odds_chosen": 0.19387319684028625, "log_odds_ratio": -0.6939007639884949, "logits/chosen": -2.9483094215393066, "logits/rejected": -2.966421365737915, "logps/chosen": -0.8696029782295227, "logps/rejected": -1.0034617185592651, "loss": 0.5136, "nll_loss": 0.48451894521713257, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.043480150401592255, "rewards/margins": 0.006692938506603241, "rewards/rejected": -0.050173092633485794, "step": 840 }, { "epoch": 0.8914525432616676, "grad_norm": 2.0931872980265527, "learning_rate": 6.859943405700353e-06, "log_odds_chosen": 0.27469760179519653, "log_odds_ratio": -0.6496983170509338, "logits/chosen": -2.882544994354248, "logits/rejected": -2.907102584838867, "logps/chosen": -0.8309645652770996, "logps/rejected": -0.9983605146408081, "loss": 0.5054, "nll_loss": 0.4892002046108246, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0415482297539711, "rewards/margins": 0.008369805291295052, "rewards/rejected": -0.049918033182621, "step": 850 }, { "epoch": 0.9019402202412166, "grad_norm": 1.9059523373512675, "learning_rate": 6.819943394704736e-06, "log_odds_chosen": 0.2372780740261078, "log_odds_ratio": -0.6811105012893677, "logits/chosen": -2.9579243659973145, "logits/rejected": -2.9706907272338867, "logps/chosen": -0.8282278180122375, "logps/rejected": -0.982342541217804, "loss": 0.5277, "nll_loss": 0.4725598692893982, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.0414113886654377, "rewards/margins": 0.007705743424594402, "rewards/rejected": -0.049117133021354675, "step": 860 }, { "epoch": 0.9124278972207656, "grad_norm": 1.892543797666968, "learning_rate": 6.780635036208105e-06, "log_odds_chosen": 0.287548691034317, "log_odds_ratio": -0.6644268035888672, "logits/chosen": -3.0049710273742676, "logits/rejected": -3.0431902408599854, "logps/chosen": -0.8620280027389526, "logps/rejected": -1.0551369190216064, "loss": 0.4935, "nll_loss": 0.4828346371650696, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04310140386223793, "rewards/margins": 0.009655444882810116, "rewards/rejected": -0.05275684595108032, "step": 870 }, { "epoch": 0.9229155742003147, "grad_norm": 1.6128728864363475, "learning_rate": 6.741998624632421e-06, "log_odds_chosen": 0.2844703197479248, "log_odds_ratio": -0.6617631316184998, "logits/chosen": -3.044353723526001, "logits/rejected": -3.0480034351348877, "logps/chosen": -0.808245837688446, "logps/rejected": -0.990073561668396, "loss": 0.4881, "nll_loss": 0.43747878074645996, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.0404122956097126, "rewards/margins": 0.009091392159461975, "rewards/rejected": -0.04950368404388428, "step": 880 }, { "epoch": 0.9334032511798637, "grad_norm": 2.329046484142618, "learning_rate": 6.70401523153991e-06, "log_odds_chosen": 0.32051050662994385, "log_odds_ratio": -0.6461818218231201, "logits/chosen": -3.0071539878845215, "logits/rejected": -3.0232186317443848, "logps/chosen": -0.8105939030647278, "logps/rejected": -0.993729293346405, "loss": 0.4935, "nll_loss": 0.46434158086776733, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04052969440817833, "rewards/margins": 0.00915677472949028, "rewards/rejected": -0.04968646913766861, "step": 890 }, { "epoch": 0.9438909281594127, "grad_norm": 2.0086740635642073, "learning_rate": 6.666666666666667e-06, "log_odds_chosen": 0.2798821032047272, "log_odds_ratio": -0.664302408695221, "logits/chosen": -2.9259209632873535, "logits/rejected": -2.9381814002990723, "logps/chosen": -0.7818757891654968, "logps/rejected": -0.9571603536605835, "loss": 0.5239, "nll_loss": 0.4661863446235657, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03909378498792648, "rewards/margins": 0.008764232508838177, "rewards/rejected": -0.04785802215337753, "step": 900 }, { "epoch": 0.9543786051389617, "grad_norm": 2.068822950454407, "learning_rate": 6.629935441317959e-06, "log_odds_chosen": 0.479647159576416, "log_odds_ratio": -0.6314842700958252, "logits/chosen": -2.974902629852295, "logits/rejected": -2.9787256717681885, "logps/chosen": -0.8285977244377136, "logps/rejected": -1.1534996032714844, "loss": 0.5142, "nll_loss": 0.46572408080101013, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04142988473176956, "rewards/margins": 0.016245096921920776, "rewards/rejected": -0.05767498165369034, "step": 910 }, { "epoch": 0.9648662821185108, "grad_norm": 1.9606527520032064, "learning_rate": 6.593804733957872e-06, "log_odds_chosen": 0.3219223618507385, "log_odds_ratio": -0.649006187915802, "logits/chosen": -2.895038604736328, "logits/rejected": -2.9138269424438477, "logps/chosen": -0.7895429134368896, "logps/rejected": -0.9961126446723938, "loss": 0.4837, "nll_loss": 0.43109196424484253, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.0394771471619606, "rewards/margins": 0.010328484699130058, "rewards/rejected": -0.04980562627315521, "step": 920 }, { "epoch": 0.9753539590980598, "grad_norm": 2.2191050074705405, "learning_rate": 6.55825835783953e-06, "log_odds_chosen": 0.21952304244041443, "log_odds_ratio": -0.6805615425109863, "logits/chosen": -2.8973617553710938, "logits/rejected": -2.900251865386963, "logps/chosen": -0.8730388879776001, "logps/rejected": -1.0255097150802612, "loss": 0.5135, "nll_loss": 0.5237925052642822, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04365193843841553, "rewards/margins": 0.007623549550771713, "rewards/rejected": -0.05127548426389694, "step": 930 }, { "epoch": 0.9858416360776088, "grad_norm": 1.9816052115352747, "learning_rate": 6.523280730534423e-06, "log_odds_chosen": 0.2554723024368286, "log_odds_ratio": -0.6887288689613342, "logits/chosen": -2.93623685836792, "logits/rejected": -2.9283607006073, "logps/chosen": -0.7786284685134888, "logps/rejected": -0.9273189306259155, "loss": 0.5095, "nll_loss": 0.4773116111755371, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03893141821026802, "rewards/margins": 0.007434530649334192, "rewards/rejected": -0.046365950256586075, "step": 940 }, { "epoch": 0.9963293130571579, "grad_norm": 2.074452011854083, "learning_rate": 6.488856845230502e-06, "log_odds_chosen": 0.2605803310871124, "log_odds_ratio": -0.6914502382278442, "logits/chosen": -2.9090209007263184, "logits/rejected": -2.9163012504577637, "logps/chosen": -0.8585780262947083, "logps/rejected": -1.0175925493240356, "loss": 0.5383, "nll_loss": 0.503527045249939, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04292890429496765, "rewards/margins": 0.007950720377266407, "rewards/rejected": -0.050879620015621185, "step": 950 }, { "epoch": 0.9994756161510225, "step": 953, "total_flos": 0.0, "train_loss": 0.56347276506494, "train_runtime": 19079.6454, "train_samples_per_second": 3.197, "train_steps_per_second": 0.05 } ], "logging_steps": 10, "max_steps": 953, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }