{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994756161510225, "eval_steps": 100, "global_step": 953, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01048767697954903, "grad_norm": 281.5632535171625, "learning_rate": 7.000000000000001e-07, "log_odds_chosen": 0.14837229251861572, "log_odds_ratio": -0.7063122987747192, "logits/chosen": -2.4233744144439697, "logits/rejected": -2.3922557830810547, "logps/chosen": -1.0665283203125, "logps/rejected": -1.164435625076294, "loss": 3.7384, "nll_loss": 3.6487019062042236, "rewards/accuracies": 0.53125, "rewards/chosen": -0.05332641676068306, "rewards/margins": 0.004895367659628391, "rewards/rejected": -0.058221787214279175, "step": 10 }, { "epoch": 0.02097535395909806, "grad_norm": 3.6095114671977337, "learning_rate": 1.4000000000000001e-06, "log_odds_chosen": 0.18771903216838837, "log_odds_ratio": -0.6616674661636353, "logits/chosen": -2.669743061065674, "logits/rejected": -2.6637511253356934, "logps/chosen": -0.8115625381469727, "logps/rejected": -0.9194537401199341, "loss": 0.598, "nll_loss": 0.5553613901138306, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04057813063263893, "rewards/margins": 0.005394552834331989, "rewards/rejected": -0.045972686260938644, "step": 20 }, { "epoch": 0.03146303093864709, "grad_norm": 2.6104338509446743, "learning_rate": 2.1e-06, "log_odds_chosen": 0.24361269176006317, "log_odds_ratio": -0.6484603881835938, "logits/chosen": -2.8152480125427246, "logits/rejected": -2.770486831665039, "logps/chosen": -0.7975724339485168, "logps/rejected": -0.9327106475830078, "loss": 0.539, "nll_loss": 0.4975182116031647, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03987862169742584, "rewards/margins": 0.006756913848221302, "rewards/rejected": -0.04663553088903427, "step": 30 }, { "epoch": 0.04195070791819612, "grad_norm": 2.6082713320666966, "learning_rate": 2.8000000000000003e-06, "log_odds_chosen": 0.18453697860240936, "log_odds_ratio": -0.6863341331481934, "logits/chosen": -2.7431702613830566, "logits/rejected": -2.721076488494873, "logps/chosen": -0.7775384783744812, "logps/rejected": -0.8990561366081238, "loss": 0.5182, "nll_loss": 0.4802665710449219, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03887692838907242, "rewards/margins": 0.006075879093259573, "rewards/rejected": -0.04495280981063843, "step": 40 }, { "epoch": 0.05243838489774515, "grad_norm": 2.8319159240383356, "learning_rate": 3.5e-06, "log_odds_chosen": 0.2895735204219818, "log_odds_ratio": -0.6829751133918762, "logits/chosen": -2.6645712852478027, "logits/rejected": -2.6532058715820312, "logps/chosen": -0.7420316934585571, "logps/rejected": -0.92218017578125, "loss": 0.5346, "nll_loss": 0.4737791419029236, "rewards/accuracies": 0.625, "rewards/chosen": -0.03710158169269562, "rewards/margins": 0.009007426910102367, "rewards/rejected": -0.04610900953412056, "step": 50 }, { "epoch": 0.06292606187729417, "grad_norm": 2.702391106634465, "learning_rate": 4.2e-06, "log_odds_chosen": 0.23618292808532715, "log_odds_ratio": -0.6679760217666626, "logits/chosen": -2.7234179973602295, "logits/rejected": -2.701585292816162, "logps/chosen": -0.7408851385116577, "logps/rejected": -0.8674576878547668, "loss": 0.5296, "nll_loss": 0.5001371502876282, "rewards/accuracies": 0.59375, "rewards/chosen": -0.03704426437616348, "rewards/margins": 0.0063286214135587215, "rewards/rejected": -0.04337288811802864, "step": 60 }, { "epoch": 0.07341373885684321, "grad_norm": 2.7579557747488237, "learning_rate": 4.9e-06, "log_odds_chosen": 0.1982727348804474, "log_odds_ratio": -0.7039018869400024, "logits/chosen": -2.716829776763916, "logits/rejected": -2.7165746688842773, "logps/chosen": -0.7602167129516602, "logps/rejected": -0.8683260679244995, "loss": 0.5179, "nll_loss": 0.5095189213752747, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.03801083564758301, "rewards/margins": 0.005405469331890345, "rewards/rejected": -0.043416302651166916, "step": 70 }, { "epoch": 0.08390141583639224, "grad_norm": 2.7333788754363826, "learning_rate": 5.600000000000001e-06, "log_odds_chosen": 0.19610878825187683, "log_odds_ratio": -0.6825613379478455, "logits/chosen": -2.6934926509857178, "logits/rejected": -2.6538023948669434, "logps/chosen": -0.8004279136657715, "logps/rejected": -0.9359849095344543, "loss": 0.5198, "nll_loss": 0.44797396659851074, "rewards/accuracies": 0.53125, "rewards/chosen": -0.040021397173404694, "rewards/margins": 0.006777846720069647, "rewards/rejected": -0.04679924249649048, "step": 80 }, { "epoch": 0.09438909281594127, "grad_norm": 2.643892428655997, "learning_rate": 6.3e-06, "log_odds_chosen": 0.32694971561431885, "log_odds_ratio": -0.6449785828590393, "logits/chosen": -2.6064088344573975, "logits/rejected": -2.600590229034424, "logps/chosen": -0.7779799699783325, "logps/rejected": -0.970491886138916, "loss": 0.5108, "nll_loss": 0.4519652724266052, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.03889899700880051, "rewards/margins": 0.009625596925616264, "rewards/rejected": -0.04852459207177162, "step": 90 }, { "epoch": 0.1048767697954903, "grad_norm": 2.7386435335682178, "learning_rate": 7e-06, "log_odds_chosen": 0.24293240904808044, "log_odds_ratio": -0.65534907579422, "logits/chosen": -2.800649881362915, "logits/rejected": -2.783020257949829, "logps/chosen": -0.7912999391555786, "logps/rejected": -0.931311309337616, "loss": 0.5226, "nll_loss": 0.4863203167915344, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03956499695777893, "rewards/margins": 0.007000570185482502, "rewards/rejected": -0.04656556248664856, "step": 100 }, { "epoch": 0.1048767697954903, "eval_log_odds_chosen": 0.2873421609401703, "eval_log_odds_ratio": -0.632556140422821, "eval_logits/chosen": -2.7859702110290527, "eval_logits/rejected": -2.758275270462036, "eval_logps/chosen": -0.7728292942047119, "eval_logps/rejected": -0.9448140263557434, "eval_loss": 0.5279971957206726, "eval_nll_loss": 0.49532046914100647, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.03864146023988724, "eval_rewards/margins": 0.008599241264164448, "eval_rewards/rejected": -0.04724070429801941, "eval_runtime": 137.6903, "eval_samples_per_second": 14.482, "eval_steps_per_second": 0.458, "step": 100 }, { "epoch": 0.11536444677503933, "grad_norm": 3.1992530570673416, "learning_rate": 6.674238124719146e-06, "log_odds_chosen": 0.34574735164642334, "log_odds_ratio": -0.612960934638977, "logits/chosen": -2.770359516143799, "logits/rejected": -2.785818099975586, "logps/chosen": -0.7360346913337708, "logps/rejected": -0.9339498281478882, "loss": 0.516, "nll_loss": 0.46663737297058105, "rewards/accuracies": 0.65625, "rewards/chosen": -0.03680173680186272, "rewards/margins": 0.009895754046738148, "rewards/rejected": -0.04669748991727829, "step": 110 }, { "epoch": 0.12585212375458835, "grad_norm": 2.389888529611206, "learning_rate": 6.390096504226938e-06, "log_odds_chosen": 0.3332720696926117, "log_odds_ratio": -0.629552960395813, "logits/chosen": -2.765531063079834, "logits/rejected": -2.7438697814941406, "logps/chosen": -0.7498644590377808, "logps/rejected": -0.9586297273635864, "loss": 0.5424, "nll_loss": 0.5031455159187317, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.03749322146177292, "rewards/margins": 0.010438265278935432, "rewards/rejected": -0.0479314923286438, "step": 120 }, { "epoch": 0.1363398007341374, "grad_norm": 2.352563456984363, "learning_rate": 6.139406135149204e-06, "log_odds_chosen": 0.22595734894275665, "log_odds_ratio": -0.6784238219261169, "logits/chosen": -2.7593860626220703, "logits/rejected": -2.743048667907715, "logps/chosen": -0.7811408042907715, "logps/rejected": -0.9164878726005554, "loss": 0.5343, "nll_loss": 0.49365147948265076, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.039057038724422455, "rewards/margins": 0.006767353508621454, "rewards/rejected": -0.04582439363002777, "step": 130 }, { "epoch": 0.14682747771368643, "grad_norm": 2.436711404156596, "learning_rate": 5.916079783099616e-06, "log_odds_chosen": 0.2472628802061081, "log_odds_ratio": -0.6597720384597778, "logits/chosen": -2.6898269653320312, "logits/rejected": -2.669379711151123, "logps/chosen": -0.8302755355834961, "logps/rejected": -0.9775524139404297, "loss": 0.5262, "nll_loss": 0.49079251289367676, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.041513778269290924, "rewards/margins": 0.007363851182162762, "rewards/rejected": -0.04887763410806656, "step": 140 }, { "epoch": 0.15731515469323545, "grad_norm": 2.622232308829729, "learning_rate": 5.715476066494083e-06, "log_odds_chosen": 0.23396515846252441, "log_odds_ratio": -0.7018890976905823, "logits/chosen": -2.6906025409698486, "logits/rejected": -2.685272455215454, "logps/chosen": -0.8395276069641113, "logps/rejected": -0.9926843643188477, "loss": 0.4873, "nll_loss": 0.4751507639884949, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.041976384818553925, "rewards/margins": 0.007657832466065884, "rewards/rejected": -0.04963421821594238, "step": 150 }, { "epoch": 0.16780283167278448, "grad_norm": 2.5349291816098587, "learning_rate": 5.533985905294663e-06, "log_odds_chosen": 0.23518291115760803, "log_odds_ratio": -0.64958655834198, "logits/chosen": -2.7026143074035645, "logits/rejected": -2.690053701400757, "logps/chosen": -0.7785183191299438, "logps/rejected": -0.9093867540359497, "loss": 0.5435, "nll_loss": 0.4887324869632721, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.038925912231206894, "rewards/margins": 0.006543423049151897, "rewards/rejected": -0.045469339936971664, "step": 160 }, { "epoch": 0.1782905086523335, "grad_norm": 2.421225073724309, "learning_rate": 5.368754921931593e-06, "log_odds_chosen": 0.3210265636444092, "log_odds_ratio": -0.6400843262672424, "logits/chosen": -2.7624573707580566, "logits/rejected": -2.7493152618408203, "logps/chosen": -0.7663661241531372, "logps/rejected": -0.9589449763298035, "loss": 0.5263, "nll_loss": 0.4972688555717468, "rewards/accuracies": 0.625, "rewards/chosen": -0.03831830993294716, "rewards/margins": 0.009628941304981709, "rewards/rejected": -0.047947246581315994, "step": 170 }, { "epoch": 0.18877818563188253, "grad_norm": 2.413880479048562, "learning_rate": 5.217491947499509e-06, "log_odds_chosen": 0.29789280891418457, "log_odds_ratio": -0.6485607028007507, "logits/chosen": -2.750358819961548, "logits/rejected": -2.7341530323028564, "logps/chosen": -0.8058354258537292, "logps/rejected": -0.9941579699516296, "loss": 0.5125, "nll_loss": 0.4958602488040924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04029177129268646, "rewards/margins": 0.009416128508746624, "rewards/rejected": -0.04970790073275566, "step": 180 }, { "epoch": 0.19926586261143156, "grad_norm": 2.6903547627560362, "learning_rate": 5.078333750770082e-06, "log_odds_chosen": 0.3165002167224884, "log_odds_ratio": -0.6190484762191772, "logits/chosen": -2.766507387161255, "logits/rejected": -2.747089385986328, "logps/chosen": -0.8013149499893188, "logps/rejected": -0.9806981086730957, "loss": 0.5316, "nll_loss": 0.5532199740409851, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04006574675440788, "rewards/margins": 0.008969161659479141, "rewards/rejected": -0.04903491213917732, "step": 190 }, { "epoch": 0.2097535395909806, "grad_norm": 2.1991852076726754, "learning_rate": 4.949747468305832e-06, "log_odds_chosen": 0.33575549721717834, "log_odds_ratio": -0.651211678981781, "logits/chosen": -2.7371087074279785, "logits/rejected": -2.7220566272735596, "logps/chosen": -0.7840306162834167, "logps/rejected": -1.0072247982025146, "loss": 0.5074, "nll_loss": 0.5064893960952759, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.039201535284519196, "rewards/margins": 0.011159711517393589, "rewards/rejected": -0.05036124587059021, "step": 200 }, { "epoch": 0.2097535395909806, "eval_log_odds_chosen": 0.31895044445991516, "eval_log_odds_ratio": -0.6356511116027832, "eval_logits/chosen": -2.700209140777588, "eval_logits/rejected": -2.673612594604492, "eval_logps/chosen": -0.7611523866653442, "eval_logps/rejected": -0.9565821290016174, "eval_loss": 0.5133659839630127, "eval_nll_loss": 0.47739487886428833, "eval_rewards/accuracies": 0.6408730149269104, "eval_rewards/chosen": -0.03805762156844139, "eval_rewards/margins": 0.009771487675607204, "eval_rewards/rejected": -0.04782910645008087, "eval_runtime": 136.4881, "eval_samples_per_second": 14.609, "eval_steps_per_second": 0.462, "step": 200 }, { "epoch": 0.22024121657052964, "grad_norm": 2.2979124053363367, "learning_rate": 4.830458915396479e-06, "log_odds_chosen": 0.14570581912994385, "log_odds_ratio": -0.7079066038131714, "logits/chosen": -2.6945998668670654, "logits/rejected": -2.693587064743042, "logps/chosen": -0.7664598226547241, "logps/rejected": -0.8435371518135071, "loss": 0.5092, "nll_loss": 0.47726479172706604, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03832298889756203, "rewards/margins": 0.003853868693113327, "rewards/rejected": -0.04217685014009476, "step": 210 }, { "epoch": 0.23072889355007867, "grad_norm": 2.7379211509120998, "learning_rate": 4.719399037242694e-06, "log_odds_chosen": 0.2301570177078247, "log_odds_ratio": -0.6864482164382935, "logits/chosen": -2.7330780029296875, "logits/rejected": -2.738948106765747, "logps/chosen": -0.7607365250587463, "logps/rejected": -0.902021050453186, "loss": 0.5025, "nll_loss": 0.4629960060119629, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.038036830723285675, "rewards/margins": 0.007064227946102619, "rewards/rejected": -0.04510105401277542, "step": 220 }, { "epoch": 0.2412165705296277, "grad_norm": 2.3286309701071986, "learning_rate": 4.615663313770509e-06, "log_odds_chosen": 0.30348774790763855, "log_odds_ratio": -0.6618221402168274, "logits/chosen": -2.681114673614502, "logits/rejected": -2.680468797683716, "logps/chosen": -0.8015350103378296, "logps/rejected": -0.9835436940193176, "loss": 0.5126, "nll_loss": 0.47201746702194214, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04007675126194954, "rewards/margins": 0.009100432507693768, "rewards/rejected": -0.04917718470096588, "step": 230 }, { "epoch": 0.2517042475091767, "grad_norm": 2.498755216094707, "learning_rate": 4.51848057057532e-06, "log_odds_chosen": 0.28177785873413086, "log_odds_ratio": -0.6470693945884705, "logits/chosen": -2.7920804023742676, "logits/rejected": -2.7859511375427246, "logps/chosen": -0.7856557965278625, "logps/rejected": -0.9694973826408386, "loss": 0.5227, "nll_loss": 0.49716347455978394, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03928279131650925, "rewards/margins": 0.009192083030939102, "rewards/rejected": -0.04847487062215805, "step": 240 }, { "epoch": 0.26219192448872575, "grad_norm": 2.5700569103186335, "learning_rate": 4.427188724235731e-06, "log_odds_chosen": 0.2942022681236267, "log_odds_ratio": -0.6677531003952026, "logits/chosen": -2.761166572570801, "logits/rejected": -2.763213634490967, "logps/chosen": -0.77226322889328, "logps/rejected": -0.9335973858833313, "loss": 0.4963, "nll_loss": 0.4665839672088623, "rewards/accuracies": 0.59375, "rewards/chosen": -0.03861316293478012, "rewards/margins": 0.008066706359386444, "rewards/rejected": -0.04667987301945686, "step": 250 }, { "epoch": 0.2726796014682748, "grad_norm": 2.5460185754878415, "learning_rate": 4.341215710622295e-06, "log_odds_chosen": 0.31073135137557983, "log_odds_ratio": -0.6524397134780884, "logits/chosen": -2.721327304840088, "logits/rejected": -2.711200475692749, "logps/chosen": -0.7779613137245178, "logps/rejected": -0.9653064608573914, "loss": 0.478, "nll_loss": 0.40727710723876953, "rewards/accuracies": 0.625, "rewards/chosen": -0.038898058235645294, "rewards/margins": 0.009367265738546848, "rewards/rejected": -0.048265330493450165, "step": 260 }, { "epoch": 0.2831672784478238, "grad_norm": 2.63045792619979, "learning_rate": 4.260064336151291e-06, "log_odds_chosen": 0.2511529326438904, "log_odds_ratio": -0.6676173806190491, "logits/chosen": -2.757246255874634, "logits/rejected": -2.7497289180755615, "logps/chosen": -0.8231350779533386, "logps/rejected": -0.9868103265762329, "loss": 0.5115, "nll_loss": 0.48606061935424805, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04115675389766693, "rewards/margins": 0.008183758705854416, "rewards/rejected": -0.04934050887823105, "step": 270 }, { "epoch": 0.29365495542737285, "grad_norm": 2.074128745122309, "learning_rate": 4.183300132670378e-06, "log_odds_chosen": 0.27424556016921997, "log_odds_ratio": -0.6629655361175537, "logits/chosen": -2.694702625274658, "logits/rejected": -2.695335626602173, "logps/chosen": -0.8050632476806641, "logps/rejected": -0.9577094912528992, "loss": 0.4891, "nll_loss": 0.4250563681125641, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.0402531661093235, "rewards/margins": 0.007632312830537558, "rewards/rejected": -0.0478854700922966, "step": 280 }, { "epoch": 0.30414263240692185, "grad_norm": 2.818316169672816, "learning_rate": 4.110541536602925e-06, "log_odds_chosen": 0.40846139192581177, "log_odds_ratio": -0.6159543991088867, "logits/chosen": -2.689415216445923, "logits/rejected": -2.6885359287261963, "logps/chosen": -0.729388952255249, "logps/rejected": -0.9667993783950806, "loss": 0.5032, "nll_loss": 0.43972086906433105, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.03646944463253021, "rewards/margins": 0.011870523914694786, "rewards/rejected": -0.04833997040987015, "step": 290 }, { "epoch": 0.3146303093864709, "grad_norm": 2.6319487345124495, "learning_rate": 4.0414518843273805e-06, "log_odds_chosen": 0.2938074767589569, "log_odds_ratio": -0.675439178943634, "logits/chosen": -2.746011257171631, "logits/rejected": -2.719851016998291, "logps/chosen": -0.7730266451835632, "logps/rejected": -0.9800483584403992, "loss": 0.5265, "nll_loss": 0.45733898878097534, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.03865132853388786, "rewards/margins": 0.01035108882933855, "rewards/rejected": -0.04900241643190384, "step": 300 }, { "epoch": 0.3146303093864709, "eval_log_odds_chosen": 0.32782861590385437, "eval_log_odds_ratio": -0.6374222040176392, "eval_logits/chosen": -2.75937819480896, "eval_logits/rejected": -2.731720209121704, "eval_logps/chosen": -0.7587753534317017, "eval_logps/rejected": -0.9572128653526306, "eval_loss": 0.5012248754501343, "eval_nll_loss": 0.4652516841888428, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.037938766181468964, "eval_rewards/margins": 0.009921879507601261, "eval_rewards/rejected": -0.04786064475774765, "eval_runtime": 143.3287, "eval_samples_per_second": 13.912, "eval_steps_per_second": 0.44, "step": 300 }, { "epoch": 0.3251179863660199, "grad_norm": 2.303425231373124, "learning_rate": 3.975732839729454e-06, "log_odds_chosen": 0.23192088305950165, "log_odds_ratio": -0.6818796396255493, "logits/chosen": -2.7074503898620605, "logits/rejected": -2.673837661743164, "logps/chosen": -0.7971353530883789, "logps/rejected": -0.9301053285598755, "loss": 0.5302, "nll_loss": 0.48708105087280273, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.039856769144535065, "rewards/margins": 0.006648494862020016, "rewards/rejected": -0.046505264937877655, "step": 310 }, { "epoch": 0.33560566334556896, "grad_norm": 2.5118343787899735, "learning_rate": 3.913118960624632e-06, "log_odds_chosen": 0.3314226567745209, "log_odds_ratio": -0.6417438387870789, "logits/chosen": -2.7188448905944824, "logits/rejected": -2.7005674839019775, "logps/chosen": -0.7902022004127502, "logps/rejected": -0.9723421335220337, "loss": 0.4738, "nll_loss": 0.44032588601112366, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03951011225581169, "rewards/margins": 0.009106996469199657, "rewards/rejected": -0.048617102205753326, "step": 320 }, { "epoch": 0.34609334032511796, "grad_norm": 2.490550595224948, "learning_rate": 3.853373177942262e-06, "log_odds_chosen": 0.29606467485427856, "log_odds_ratio": -0.6935312151908875, "logits/chosen": -2.6737678050994873, "logits/rejected": -2.6778550148010254, "logps/chosen": -0.7957532405853271, "logps/rejected": -0.9609133005142212, "loss": 0.5015, "nll_loss": 0.48406466841697693, "rewards/accuracies": 0.59375, "rewards/chosen": -0.039787657558918, "rewards/margins": 0.008258005604147911, "rewards/rejected": -0.04804566502571106, "step": 330 }, { "epoch": 0.356581017304667, "grad_norm": 2.455512863241718, "learning_rate": 3.796283011826483e-06, "log_odds_chosen": 0.2068498581647873, "log_odds_ratio": -0.6988531947135925, "logits/chosen": -2.656428575515747, "logits/rejected": -2.67673659324646, "logps/chosen": -0.7645977139472961, "logps/rejected": -0.9020528793334961, "loss": 0.5161, "nll_loss": 0.46574801206588745, "rewards/accuracies": 0.5625, "rewards/chosen": -0.038229890167713165, "rewards/margins": 0.006872760597616434, "rewards/rejected": -0.04510264843702316, "step": 340 }, { "epoch": 0.36706869428421607, "grad_norm": 2.3906859020418243, "learning_rate": 3.7416573867739415e-06, "log_odds_chosen": 0.32536062598228455, "log_odds_ratio": -0.6628221273422241, "logits/chosen": -2.7076945304870605, "logits/rejected": -2.6763672828674316, "logps/chosen": -0.7698060274124146, "logps/rejected": -0.9597750902175903, "loss": 0.4925, "nll_loss": 0.468719482421875, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03849030286073685, "rewards/margins": 0.009498453699052334, "rewards/rejected": -0.047988757491111755, "step": 350 }, { "epoch": 0.37755637126376507, "grad_norm": 2.1635991647413824, "learning_rate": 3.689323936863109e-06, "log_odds_chosen": 0.4051761031150818, "log_odds_ratio": -0.6067623496055603, "logits/chosen": -2.6350862979888916, "logits/rejected": -2.635108232498169, "logps/chosen": -0.768888533115387, "logps/rejected": -1.0009427070617676, "loss": 0.5009, "nll_loss": 0.45801717042922974, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.03844442963600159, "rewards/margins": 0.011602705344557762, "rewards/rejected": -0.0500471368432045, "step": 360 }, { "epoch": 0.3880440482433141, "grad_norm": 2.3887899088845037, "learning_rate": 3.6391267143702543e-06, "log_odds_chosen": 0.4100113809108734, "log_odds_ratio": -0.6096552014350891, "logits/chosen": -2.707559108734131, "logits/rejected": -2.6750998497009277, "logps/chosen": -0.7636415362358093, "logps/rejected": -1.0189807415008545, "loss": 0.4701, "nll_loss": 0.45124197006225586, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.038182083517313004, "rewards/margins": 0.012766959145665169, "rewards/rejected": -0.050949037075042725, "step": 370 }, { "epoch": 0.3985317252228631, "grad_norm": 2.5794228625801225, "learning_rate": 3.5909242322980396e-06, "log_odds_chosen": 0.4701065421104431, "log_odds_ratio": -0.5877975821495056, "logits/chosen": -2.7147293090820312, "logits/rejected": -2.700373888015747, "logps/chosen": -0.7640558481216431, "logps/rejected": -1.0210450887680054, "loss": 0.4866, "nll_loss": 0.4662235379219055, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03820279613137245, "rewards/margins": 0.012849463149905205, "rewards/rejected": -0.05105225369334221, "step": 380 }, { "epoch": 0.4090194022024122, "grad_norm": 2.2524505662506007, "learning_rate": 3.544587784792833e-06, "log_odds_chosen": 0.15358106791973114, "log_odds_ratio": -0.6960343718528748, "logits/chosen": -2.6469695568084717, "logits/rejected": -2.6523191928863525, "logps/chosen": -0.8073819875717163, "logps/rejected": -0.9069193005561829, "loss": 0.5052, "nll_loss": 0.48589834570884705, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04036910459399223, "rewards/margins": 0.004976863972842693, "rewards/rejected": -0.0453459694981575, "step": 390 }, { "epoch": 0.4195070791819612, "grad_norm": 2.151733711875547, "learning_rate": 3.5e-06, "log_odds_chosen": 0.3257240355014801, "log_odds_ratio": -0.6618676781654358, "logits/chosen": -2.5556883811950684, "logits/rejected": -2.5709598064422607, "logps/chosen": -0.8370679616928101, "logps/rejected": -1.0387462377548218, "loss": 0.5194, "nll_loss": 0.471977561712265, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.0418534018099308, "rewards/margins": 0.010083912871778011, "rewards/rejected": -0.05193731188774109, "step": 400 }, { "epoch": 0.4195070791819612, "eval_log_odds_chosen": 0.3606604039669037, "eval_log_odds_ratio": -0.6283872127532959, "eval_logits/chosen": -2.6973965167999268, "eval_logits/rejected": -2.664045572280884, "eval_logps/chosen": -0.7416918277740479, "eval_logps/rejected": -0.9558579921722412, "eval_loss": 0.4911641776561737, "eval_nll_loss": 0.455983966588974, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.03708459436893463, "eval_rewards/margins": 0.010708308778703213, "eval_rewards/rejected": -0.04779290035367012, "eval_runtime": 137.3177, "eval_samples_per_second": 14.521, "eval_steps_per_second": 0.459, "step": 400 }, { "epoch": 0.4299947561615102, "grad_norm": 2.234889439349526, "learning_rate": 3.457053588273564e-06, "log_odds_chosen": 0.22749297320842743, "log_odds_ratio": -0.6977051496505737, "logits/chosen": -2.6853058338165283, "logits/rejected": -2.646806001663208, "logps/chosen": -0.7714927792549133, "logps/rejected": -0.9221086502075195, "loss": 0.4951, "nll_loss": 0.43608254194259644, "rewards/accuracies": 0.59375, "rewards/chosen": -0.038574643433094025, "rewards/margins": 0.00753078842535615, "rewards/rejected": -0.04610542953014374, "step": 410 }, { "epoch": 0.4404824331410593, "grad_norm": 2.0285171917411766, "learning_rate": 3.4156502553198657e-06, "log_odds_chosen": 0.3810080885887146, "log_odds_ratio": -0.6389856338500977, "logits/chosen": -2.6045069694519043, "logits/rejected": -2.621366024017334, "logps/chosen": -0.7517096996307373, "logps/rejected": -0.9603899121284485, "loss": 0.4852, "nll_loss": 0.42949992418289185, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.037585485726594925, "rewards/margins": 0.01043400727212429, "rewards/rejected": -0.048019491136074066, "step": 420 }, { "epoch": 0.4509701101206083, "grad_norm": 2.508500818711511, "learning_rate": 3.375699755192885e-06, "log_odds_chosen": 0.3060067594051361, "log_odds_ratio": -0.6428481936454773, "logits/chosen": -2.6315762996673584, "logits/rejected": -2.614450216293335, "logps/chosen": -0.7450464367866516, "logps/rejected": -0.9214862585067749, "loss": 0.5054, "nll_loss": 0.4888521730899811, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.03725232556462288, "rewards/margins": 0.008821990340948105, "rewards/rejected": -0.046074315905570984, "step": 430 }, { "epoch": 0.46145778710015734, "grad_norm": 2.209049048242546, "learning_rate": 3.337119062359573e-06, "log_odds_chosen": 0.2785058617591858, "log_odds_ratio": -0.6411095857620239, "logits/chosen": -2.6460564136505127, "logits/rejected": -2.6254661083221436, "logps/chosen": -0.7616952061653137, "logps/rejected": -0.9235254526138306, "loss": 0.5024, "nll_loss": 0.46845754981040955, "rewards/accuracies": 0.59375, "rewards/chosen": -0.038084764033555984, "rewards/margins": 0.008091514930129051, "rewards/rejected": -0.04617627337574959, "step": 440 }, { "epoch": 0.47194546407970633, "grad_norm": 2.0098987626040574, "learning_rate": 3.2998316455372222e-06, "log_odds_chosen": 0.37491756677627563, "log_odds_ratio": -0.648253321647644, "logits/chosen": -2.6618144512176514, "logits/rejected": -2.643500566482544, "logps/chosen": -0.7266156673431396, "logps/rejected": -0.9600238800048828, "loss": 0.4828, "nll_loss": 0.4462718069553375, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.03633078932762146, "rewards/margins": 0.01167040504515171, "rewards/rejected": -0.04800119251012802, "step": 450 }, { "epoch": 0.4824331410592554, "grad_norm": 2.3085421987869785, "learning_rate": 3.263766828841098e-06, "log_odds_chosen": 0.2140667885541916, "log_odds_ratio": -0.6971082091331482, "logits/chosen": -2.6545071601867676, "logits/rejected": -2.6458332538604736, "logps/chosen": -0.8354724049568176, "logps/rejected": -0.9942563772201538, "loss": 0.4871, "nll_loss": 0.48358869552612305, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04177362099289894, "rewards/margins": 0.007939198985695839, "rewards/rejected": -0.04971281811594963, "step": 460 }, { "epoch": 0.4929208180388044, "grad_norm": 2.58413257051123, "learning_rate": 3.2288592281010976e-06, "log_odds_chosen": 0.30273735523223877, "log_odds_ratio": -0.6744717359542847, "logits/chosen": -2.6462035179138184, "logits/rejected": -2.6307010650634766, "logps/chosen": -0.7793454527854919, "logps/rejected": -0.9655405879020691, "loss": 0.4932, "nll_loss": 0.4597246050834656, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.038967277854681015, "rewards/margins": 0.009309760294854641, "rewards/rejected": -0.048277031630277634, "step": 470 }, { "epoch": 0.5034084950183534, "grad_norm": 2.275276830168767, "learning_rate": 3.195048252113469e-06, "log_odds_chosen": 0.25159093737602234, "log_odds_ratio": -0.6775428056716919, "logits/chosen": -2.6590356826782227, "logits/rejected": -2.649465560913086, "logps/chosen": -0.7499970197677612, "logps/rejected": -0.8869997262954712, "loss": 0.4713, "nll_loss": 0.4634857177734375, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03749985247850418, "rewards/margins": 0.006850133184343576, "rewards/rejected": -0.04434997960925102, "step": 480 }, { "epoch": 0.5138961719979025, "grad_norm": 2.134835184101472, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 0.2592507004737854, "log_odds_ratio": -0.6677337884902954, "logits/chosen": -2.638939619064331, "logits/rejected": -2.5990116596221924, "logps/chosen": -0.8319272994995117, "logps/rejected": -0.9564205408096313, "loss": 0.4941, "nll_loss": 0.4587552547454834, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.041596364229917526, "rewards/margins": 0.006224661134183407, "rewards/rejected": -0.04782102257013321, "step": 490 }, { "epoch": 0.5243838489774515, "grad_norm": 2.3707837495895494, "learning_rate": 3.1304951684997056e-06, "log_odds_chosen": 0.25932976603507996, "log_odds_ratio": -0.6785644292831421, "logits/chosen": -2.690480947494507, "logits/rejected": -2.6417829990386963, "logps/chosen": -0.7875474095344543, "logps/rejected": -0.9345542788505554, "loss": 0.5008, "nll_loss": 0.47637850046157837, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.0393773689866066, "rewards/margins": 0.007350355386734009, "rewards/rejected": -0.04672772437334061, "step": 500 }, { "epoch": 0.5243838489774515, "eval_log_odds_chosen": 0.3873175382614136, "eval_log_odds_ratio": -0.6208989024162292, "eval_logits/chosen": -2.62943434715271, "eval_logits/rejected": -2.5956878662109375, "eval_logps/chosen": -0.7454984188079834, "eval_logps/rejected": -0.9786220192909241, "eval_loss": 0.4847143888473511, "eval_nll_loss": 0.44987979531288147, "eval_rewards/accuracies": 0.6507936716079712, "eval_rewards/chosen": -0.03727491945028305, "eval_rewards/margins": 0.011656176298856735, "eval_rewards/rejected": -0.04893109202384949, "eval_runtime": 138.4279, "eval_samples_per_second": 14.405, "eval_steps_per_second": 0.455, "step": 500 }, { "epoch": 0.5348715259570005, "grad_norm": 1.9535668554599182, "learning_rate": 3.0996520993903337e-06, "log_odds_chosen": 0.32442158460617065, "log_odds_ratio": -0.6475775837898254, "logits/chosen": -2.6708967685699463, "logits/rejected": -2.649402141571045, "logps/chosen": -0.7484665513038635, "logps/rejected": -0.9413715600967407, "loss": 0.4786, "nll_loss": 0.48495978116989136, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.03742332383990288, "rewards/margins": 0.00964525155723095, "rewards/rejected": -0.047068577259778976, "step": 510 }, { "epoch": 0.5453592029365496, "grad_norm": 1.9645096615425393, "learning_rate": 3.069703067574602e-06, "log_odds_chosen": 0.2872227430343628, "log_odds_ratio": -0.6613379716873169, "logits/chosen": -2.6058475971221924, "logits/rejected": -2.577051877975464, "logps/chosen": -0.8017369508743286, "logps/rejected": -0.9904945492744446, "loss": 0.4897, "nll_loss": 0.4331512451171875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04008684307336807, "rewards/margins": 0.009437882341444492, "rewards/rejected": -0.04952472820878029, "step": 520 }, { "epoch": 0.5558468799160986, "grad_norm": 1.9526548988230616, "learning_rate": 3.0406056993414858e-06, "log_odds_chosen": 0.42971426248550415, "log_odds_ratio": -0.641510009765625, "logits/chosen": -2.6119577884674072, "logits/rejected": -2.5998666286468506, "logps/chosen": -0.7399083375930786, "logps/rejected": -1.0167956352233887, "loss": 0.4914, "nll_loss": 0.41224998235702515, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03699541836977005, "rewards/margins": 0.013844366185367107, "rewards/rejected": -0.050839781761169434, "step": 530 }, { "epoch": 0.5663345568956476, "grad_norm": 1.9884035673972174, "learning_rate": 3.012320380383546e-06, "log_odds_chosen": 0.21374063193798065, "log_odds_ratio": -0.6833196878433228, "logits/chosen": -2.6167845726013184, "logits/rejected": -2.599025011062622, "logps/chosen": -0.7700163125991821, "logps/rejected": -0.890272319316864, "loss": 0.5043, "nll_loss": 0.47903138399124146, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03850081190466881, "rewards/margins": 0.006012803874909878, "rewards/rejected": -0.04451362043619156, "step": 540 }, { "epoch": 0.5768222338751966, "grad_norm": 2.186607185927277, "learning_rate": 2.9848100289785456e-06, "log_odds_chosen": 0.45103105902671814, "log_odds_ratio": -0.6082615852355957, "logits/chosen": -2.6567091941833496, "logits/rejected": -2.609574794769287, "logps/chosen": -0.7585142850875854, "logps/rejected": -1.0295699834823608, "loss": 0.4918, "nll_loss": 0.48958802223205566, "rewards/accuracies": 0.625, "rewards/chosen": -0.03792571276426315, "rewards/margins": 0.01355278305709362, "rewards/rejected": -0.05147849768400192, "step": 550 }, { "epoch": 0.5873099108547457, "grad_norm": 2.1145358879634872, "learning_rate": 2.958039891549808e-06, "log_odds_chosen": 0.2827582359313965, "log_odds_ratio": -0.6594165563583374, "logits/chosen": -2.6023669242858887, "logits/rejected": -2.574957847595215, "logps/chosen": -0.7867820858955383, "logps/rejected": -0.9555041193962097, "loss": 0.4774, "nll_loss": 0.45714274048805237, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0393391028046608, "rewards/margins": 0.008436103351414204, "rewards/rejected": -0.047775208950042725, "step": 560 }, { "epoch": 0.5977975878342947, "grad_norm": 2.3757421806444343, "learning_rate": 2.9319773580418683e-06, "log_odds_chosen": 0.2533697485923767, "log_odds_ratio": -0.6926103830337524, "logits/chosen": -2.662379264831543, "logits/rejected": -2.6397509574890137, "logps/chosen": -0.7862294316291809, "logps/rejected": -0.9584717750549316, "loss": 0.463, "nll_loss": 0.4819509983062744, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.039311472326517105, "rewards/margins": 0.00861212145537138, "rewards/rejected": -0.04792358726263046, "step": 570 }, { "epoch": 0.6082852648138437, "grad_norm": 2.172213103107974, "learning_rate": 2.906591794880899e-06, "log_odds_chosen": 0.3392280340194702, "log_odds_ratio": -0.6386864185333252, "logits/chosen": -2.6814630031585693, "logits/rejected": -2.6795036792755127, "logps/chosen": -0.7794855833053589, "logps/rejected": -1.0036094188690186, "loss": 0.4996, "nll_loss": 0.4401033818721771, "rewards/accuracies": 0.59375, "rewards/chosen": -0.038974277675151825, "rewards/margins": 0.011206192895770073, "rewards/rejected": -0.05018047243356705, "step": 580 }, { "epoch": 0.6187729417933928, "grad_norm": 2.0671922387658377, "learning_rate": 2.8818543935741638e-06, "log_odds_chosen": 0.3985132575035095, "log_odds_ratio": -0.6514524221420288, "logits/chosen": -2.6682472229003906, "logits/rejected": -2.679994821548462, "logps/chosen": -0.7318185567855835, "logps/rejected": -0.9744182825088501, "loss": 0.4678, "nll_loss": 0.49909916520118713, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.03659093379974365, "rewards/margins": 0.012129982002079487, "rewards/rejected": -0.048720914870500565, "step": 590 }, { "epoch": 0.6292606187729418, "grad_norm": 2.1967713493078604, "learning_rate": 2.8577380332470414e-06, "log_odds_chosen": 0.35757365822792053, "log_odds_ratio": -0.6395149230957031, "logits/chosen": -2.663159132003784, "logits/rejected": -2.649722099304199, "logps/chosen": -0.7385202646255493, "logps/rejected": -0.9542753100395203, "loss": 0.4725, "nll_loss": 0.4449065625667572, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03692600876092911, "rewards/margins": 0.010787753388285637, "rewards/rejected": -0.04771377146244049, "step": 600 }, { "epoch": 0.6292606187729418, "eval_log_odds_chosen": 0.35674363374710083, "eval_log_odds_ratio": -0.631996214389801, "eval_logits/chosen": -2.647721767425537, "eval_logits/rejected": -2.6147334575653076, "eval_logps/chosen": -0.7248181104660034, "eval_logps/rejected": -0.9394434690475464, "eval_loss": 0.4794267416000366, "eval_nll_loss": 0.44346076250076294, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.03624090179800987, "eval_rewards/margins": 0.01073127705603838, "eval_rewards/rejected": -0.046972181648015976, "eval_runtime": 137.9534, "eval_samples_per_second": 14.454, "eval_steps_per_second": 0.457, "step": 600 }, { "epoch": 0.6397482957524908, "grad_norm": 2.2292431160793216, "learning_rate": 2.834217155626206e-06, "log_odds_chosen": 0.23770160973072052, "log_odds_ratio": -0.6840949654579163, "logits/chosen": -2.5699760913848877, "logits/rejected": -2.5653116703033447, "logps/chosen": -0.7841805219650269, "logps/rejected": -0.9241795539855957, "loss": 0.4832, "nll_loss": 0.4458464980125427, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.039209023118019104, "rewards/margins": 0.006999955512583256, "rewards/rejected": -0.046208981424570084, "step": 610 }, { "epoch": 0.6502359727320398, "grad_norm": 2.2910730765164247, "learning_rate": 2.811267651158746e-06, "log_odds_chosen": 0.21747846901416779, "log_odds_ratio": -0.6945130825042725, "logits/chosen": -2.724179744720459, "logits/rejected": -2.691539764404297, "logps/chosen": -0.7931413054466248, "logps/rejected": -0.943394660949707, "loss": 0.487, "nll_loss": 0.4727168679237366, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03965706750750542, "rewards/margins": 0.007512666285037994, "rewards/rejected": -0.04716973379254341, "step": 620 }, { "epoch": 0.6607236497115889, "grad_norm": 2.2609308397995616, "learning_rate": 2.788866755113585e-06, "log_odds_chosen": 0.29844212532043457, "log_odds_ratio": -0.690433919429779, "logits/chosen": -2.718883991241455, "logits/rejected": -2.7198710441589355, "logps/chosen": -0.7700183391571045, "logps/rejected": -0.9475862383842468, "loss": 0.4893, "nll_loss": 0.48064035177230835, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.038500916212797165, "rewards/margins": 0.00887839961796999, "rewards/rejected": -0.04737931489944458, "step": 630 }, { "epoch": 0.6712113266911379, "grad_norm": 2.6649009571693107, "learning_rate": 2.7669929526473316e-06, "log_odds_chosen": 0.4156903326511383, "log_odds_ratio": -0.6158550977706909, "logits/chosen": -2.7182445526123047, "logits/rejected": -2.6942853927612305, "logps/chosen": -0.7768423557281494, "logps/rejected": -1.0251133441925049, "loss": 0.4711, "nll_loss": 0.41822823882102966, "rewards/accuracies": 0.625, "rewards/chosen": -0.03884211927652359, "rewards/margins": 0.012413550168275833, "rewards/rejected": -0.051255665719509125, "step": 640 }, { "epoch": 0.6816990036706869, "grad_norm": 2.0343884705834268, "learning_rate": 2.745625891934577e-06, "log_odds_chosen": 0.23737592995166779, "log_odds_ratio": -0.6948662996292114, "logits/chosen": -2.74450421333313, "logits/rejected": -2.7467565536499023, "logps/chosen": -0.7428392767906189, "logps/rejected": -0.8866605758666992, "loss": 0.4898, "nll_loss": 0.4688393175601959, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03714196756482124, "rewards/margins": 0.00719106663018465, "rewards/rejected": -0.04433303326368332, "step": 650 }, { "epoch": 0.6921866806502359, "grad_norm": 2.0637062426142556, "learning_rate": 2.7247463045653303e-06, "log_odds_chosen": 0.36518558859825134, "log_odds_ratio": -0.6426655650138855, "logits/chosen": -2.7563986778259277, "logits/rejected": -2.74312424659729, "logps/chosen": -0.7905346751213074, "logps/rejected": -1.0196200609207153, "loss": 0.4859, "nll_loss": 0.4443667531013489, "rewards/accuracies": 0.625, "rewards/chosen": -0.03952673822641373, "rewards/margins": 0.011454259976744652, "rewards/rejected": -0.05098099634051323, "step": 660 }, { "epoch": 0.702674357629785, "grad_norm": 1.992995386941069, "learning_rate": 2.704335932501895e-06, "log_odds_chosen": 0.490286260843277, "log_odds_ratio": -0.6087489724159241, "logits/chosen": -2.72459077835083, "logits/rejected": -2.7280569076538086, "logps/chosen": -0.7373065948486328, "logps/rejected": -1.0489108562469482, "loss": 0.4831, "nll_loss": 0.42895203828811646, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03686532750725746, "rewards/margins": 0.01558021642267704, "rewards/rejected": -0.05244554951786995, "step": 670 }, { "epoch": 0.713162034609334, "grad_norm": 2.8251895935339886, "learning_rate": 2.6843774609657963e-06, "log_odds_chosen": 0.3856969177722931, "log_odds_ratio": -0.6318041086196899, "logits/chosen": -2.7299182415008545, "logits/rejected": -2.699131488800049, "logps/chosen": -0.7913435697555542, "logps/rejected": -1.0201423168182373, "loss": 0.4669, "nll_loss": 0.45303601026535034, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03956717997789383, "rewards/margins": 0.011439927853643894, "rewards/rejected": -0.05100711062550545, "step": 680 }, { "epoch": 0.723649711588883, "grad_norm": 2.3126283290431457, "learning_rate": 2.6648544566940834e-06, "log_odds_chosen": 0.21687667071819305, "log_odds_ratio": -0.7159269452095032, "logits/chosen": -2.7354016304016113, "logits/rejected": -2.722414493560791, "logps/chosen": -0.7863477468490601, "logps/rejected": -0.9429599046707153, "loss": 0.4903, "nll_loss": 0.5047397613525391, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03931739181280136, "rewards/margins": 0.007830603048205376, "rewards/rejected": -0.04714799299836159, "step": 690 }, { "epoch": 0.7341373885684321, "grad_norm": 2.323029961728673, "learning_rate": 2.6457513110645903e-06, "log_odds_chosen": 0.342260479927063, "log_odds_ratio": -0.6298097968101501, "logits/chosen": -2.679320812225342, "logits/rejected": -2.6582911014556885, "logps/chosen": -0.7469282746315002, "logps/rejected": -0.9541714787483215, "loss": 0.4875, "nll_loss": 0.4991229474544525, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03734641522169113, "rewards/margins": 0.010362156666815281, "rewards/rejected": -0.04770857095718384, "step": 700 }, { "epoch": 0.7341373885684321, "eval_log_odds_chosen": 0.417955607175827, "eval_log_odds_ratio": -0.6158252358436584, "eval_logits/chosen": -2.7213134765625, "eval_logits/rejected": -2.691012144088745, "eval_logps/chosen": -0.7365118861198425, "eval_logps/rejected": -0.9954525232315063, "eval_loss": 0.47666841745376587, "eval_nll_loss": 0.441643089056015, "eval_rewards/accuracies": 0.6408730149269104, "eval_rewards/chosen": -0.036825601011514664, "eval_rewards/margins": 0.01294703409075737, "eval_rewards/rejected": -0.049772635102272034, "eval_runtime": 140.8809, "eval_samples_per_second": 14.154, "eval_steps_per_second": 0.447, "step": 700 }, { "epoch": 0.7446250655479811, "grad_norm": 2.2253143227977055, "learning_rate": 2.627053187642805e-06, "log_odds_chosen": 0.31003057956695557, "log_odds_ratio": -0.6495457887649536, "logits/chosen": -2.7463955879211426, "logits/rejected": -2.7364678382873535, "logps/chosen": -0.7539780139923096, "logps/rejected": -0.9565252065658569, "loss": 0.4819, "nll_loss": 0.4394974708557129, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0376988984644413, "rewards/margins": 0.010127360001206398, "rewards/rejected": -0.047826264053583145, "step": 710 }, { "epoch": 0.7551127425275301, "grad_norm": 1.9919741933282713, "learning_rate": 2.6087459737497545e-06, "log_odds_chosen": 0.40133896470069885, "log_odds_ratio": -0.6439169645309448, "logits/chosen": -2.7264726161956787, "logits/rejected": -2.7285008430480957, "logps/chosen": -0.7132266759872437, "logps/rejected": -0.9523170590400696, "loss": 0.4904, "nll_loss": 0.42442673444747925, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.03566133230924606, "rewards/margins": 0.011954517103731632, "rewards/rejected": -0.04761584475636482, "step": 720 }, { "epoch": 0.7656004195070791, "grad_norm": 2.5524316814232657, "learning_rate": 2.5908162356916185e-06, "log_odds_chosen": 0.1571163833141327, "log_odds_ratio": -0.7166911363601685, "logits/chosen": -2.805894613265991, "logits/rejected": -2.7996468544006348, "logps/chosen": -0.7540133595466614, "logps/rejected": -0.8382581472396851, "loss": 0.4937, "nll_loss": 0.4598192572593689, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03770066425204277, "rewards/margins": 0.004212243482470512, "rewards/rejected": -0.041912905871868134, "step": 730 }, { "epoch": 0.7760880964866282, "grad_norm": 2.1353118528501684, "learning_rate": 2.5732511773283276e-06, "log_odds_chosen": 0.35292255878448486, "log_odds_ratio": -0.625573992729187, "logits/chosen": -2.8535656929016113, "logits/rejected": -2.8482494354248047, "logps/chosen": -0.7254922389984131, "logps/rejected": -0.9415895342826843, "loss": 0.4903, "nll_loss": 0.4391508996486664, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.036274611949920654, "rewards/margins": 0.010804859921336174, "rewards/rejected": -0.04707947373390198, "step": 740 }, { "epoch": 0.7865757734661772, "grad_norm": 2.076299852744321, "learning_rate": 2.556038601690775e-06, "log_odds_chosen": 0.27716293931007385, "log_odds_ratio": -0.6662799119949341, "logits/chosen": -2.8263370990753174, "logits/rejected": -2.8200631141662598, "logps/chosen": -0.7884274125099182, "logps/rejected": -0.9425498843193054, "loss": 0.5033, "nll_loss": 0.460857093334198, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.03942137211561203, "rewards/margins": 0.00770611921325326, "rewards/rejected": -0.04712748900055885, "step": 750 }, { "epoch": 0.7970634504457262, "grad_norm": 2.144911846283459, "learning_rate": 2.539166875385041e-06, "log_odds_chosen": 0.28878992795944214, "log_odds_ratio": -0.6523956060409546, "logits/chosen": -2.827876567840576, "logits/rejected": -2.818580389022827, "logps/chosen": -0.7346550226211548, "logps/rejected": -0.9111967086791992, "loss": 0.4719, "nll_loss": 0.3698672354221344, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.0367327556014061, "rewards/margins": 0.008827080950140953, "rewards/rejected": -0.0455598309636116, "step": 760 }, { "epoch": 0.8075511274252754, "grad_norm": 2.457074288822972, "learning_rate": 2.522624895547565e-06, "log_odds_chosen": 0.2632114589214325, "log_odds_ratio": -0.6844597458839417, "logits/chosen": -2.785381317138672, "logits/rejected": -2.7871222496032715, "logps/chosen": -0.796169102191925, "logps/rejected": -0.9764283895492554, "loss": 0.4935, "nll_loss": 0.4608798921108246, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.039808452129364014, "rewards/margins": 0.009012967348098755, "rewards/rejected": -0.048821426928043365, "step": 770 }, { "epoch": 0.8180388044048243, "grad_norm": 2.1250851855347417, "learning_rate": 2.506402059138015e-06, "log_odds_chosen": 0.2769099771976471, "log_odds_ratio": -0.6522020101547241, "logits/chosen": -2.8049657344818115, "logits/rejected": -2.8198862075805664, "logps/chosen": -0.7881239056587219, "logps/rejected": -0.9357802271842957, "loss": 0.5049, "nll_loss": 0.5033601522445679, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.039406199008226395, "rewards/margins": 0.0073828138411045074, "rewards/rejected": -0.0467890128493309, "step": 780 }, { "epoch": 0.8285264813843733, "grad_norm": 2.1157883450641966, "learning_rate": 2.49048823437687e-06, "log_odds_chosen": 0.4010138511657715, "log_odds_ratio": -0.6229840517044067, "logits/chosen": -2.8338706493377686, "logits/rejected": -2.8394291400909424, "logps/chosen": -0.7245864272117615, "logps/rejected": -0.9661226272583008, "loss": 0.4661, "nll_loss": 0.4065842032432556, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.03622932359576225, "rewards/margins": 0.01207680907100439, "rewards/rejected": -0.04830613359808922, "step": 790 }, { "epoch": 0.8390141583639223, "grad_norm": 2.3895076758034515, "learning_rate": 2.474873734152916e-06, "log_odds_chosen": 0.48685508966445923, "log_odds_ratio": -0.5867618918418884, "logits/chosen": -2.813389301300049, "logits/rejected": -2.7975525856018066, "logps/chosen": -0.6979315876960754, "logps/rejected": -1.0023411512374878, "loss": 0.4796, "nll_loss": 0.3860110640525818, "rewards/accuracies": 0.6875, "rewards/chosen": -0.03489658236503601, "rewards/margins": 0.015220480971038342, "rewards/rejected": -0.050117067992687225, "step": 800 }, { "epoch": 0.8390141583639223, "eval_log_odds_chosen": 0.4362943768501282, "eval_log_odds_ratio": -0.6168639063835144, "eval_logits/chosen": -2.8114309310913086, "eval_logits/rejected": -2.791295289993286, "eval_logps/chosen": -0.7415919303894043, "eval_logps/rejected": -1.016213297843933, "eval_loss": 0.4739992916584015, "eval_nll_loss": 0.4396199584007263, "eval_rewards/accuracies": 0.6507936716079712, "eval_rewards/chosen": -0.037079595029354095, "eval_rewards/margins": 0.013731070794165134, "eval_rewards/rejected": -0.050810668617486954, "eval_runtime": 137.8725, "eval_samples_per_second": 14.463, "eval_steps_per_second": 0.457, "step": 800 }, { "epoch": 0.8495018353434715, "grad_norm": 2.2171962411607398, "learning_rate": 2.459549291242073e-06, "log_odds_chosen": 0.4064277708530426, "log_odds_ratio": -0.6227105259895325, "logits/chosen": -2.8798890113830566, "logits/rejected": -2.8490796089172363, "logps/chosen": -0.729169487953186, "logps/rejected": -0.9680086970329285, "loss": 0.4744, "nll_loss": 0.4338308870792389, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.03645847737789154, "rewards/margins": 0.011941960081458092, "rewards/rejected": -0.04840043932199478, "step": 810 }, { "epoch": 0.8599895123230205, "grad_norm": 2.607409368726623, "learning_rate": 2.4445060351935238e-06, "log_odds_chosen": 0.3091586232185364, "log_odds_ratio": -0.6474903225898743, "logits/chosen": -2.820725679397583, "logits/rejected": -2.804964303970337, "logps/chosen": -0.7581018805503845, "logps/rejected": -0.9343080520629883, "loss": 0.4661, "nll_loss": 0.3911210894584656, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.03790510073304176, "rewards/margins": 0.00881030224263668, "rewards/rejected": -0.046715401113033295, "step": 820 }, { "epoch": 0.8704771893025695, "grad_norm": 2.6267861444652034, "learning_rate": 2.4297354707521817e-06, "log_odds_chosen": 0.21734324097633362, "log_odds_ratio": -0.7081775069236755, "logits/chosen": -2.805722236633301, "logits/rejected": -2.8377511501312256, "logps/chosen": -0.777400553226471, "logps/rejected": -0.915818989276886, "loss": 0.4873, "nll_loss": 0.4305228292942047, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03887002915143967, "rewards/margins": 0.0069209253415465355, "rewards/rejected": -0.04579095169901848, "step": 830 }, { "epoch": 0.8809648662821186, "grad_norm": 2.1614161917289363, "learning_rate": 2.4152294576982395e-06, "log_odds_chosen": 0.21988508105278015, "log_odds_ratio": -0.6872502565383911, "logits/chosen": -2.8258466720581055, "logits/rejected": -2.8268680572509766, "logps/chosen": -0.7874829769134521, "logps/rejected": -0.9251054525375366, "loss": 0.4733, "nll_loss": 0.4440709054470062, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.03937415033578873, "rewards/margins": 0.006881123874336481, "rewards/rejected": -0.04625527560710907, "step": 840 }, { "epoch": 0.8914525432616676, "grad_norm": 2.2102319814571074, "learning_rate": 2.4009801919951233e-06, "log_odds_chosen": 0.3129335641860962, "log_odds_ratio": -0.6348214149475098, "logits/chosen": -2.8568568229675293, "logits/rejected": -2.865201473236084, "logps/chosen": -0.749543309211731, "logps/rejected": -0.9329560399055481, "loss": 0.466, "nll_loss": 0.4490523934364319, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03747716546058655, "rewards/margins": 0.009170634672045708, "rewards/rejected": -0.046647801995277405, "step": 850 }, { "epoch": 0.9019402202412166, "grad_norm": 2.082847476776939, "learning_rate": 2.3869801881466573e-06, "log_odds_chosen": 0.2860751152038574, "log_odds_ratio": -0.6700129508972168, "logits/chosen": -2.825407028198242, "logits/rejected": -2.8392233848571777, "logps/chosen": -0.7431017756462097, "logps/rejected": -0.9103603363037109, "loss": 0.4884, "nll_loss": 0.4357692301273346, "rewards/accuracies": 0.5625, "rewards/chosen": -0.037155088037252426, "rewards/margins": 0.008362922817468643, "rewards/rejected": -0.045518018305301666, "step": 860 }, { "epoch": 0.9124278972207656, "grad_norm": 2.188429034443825, "learning_rate": 2.3732222626728365e-06, "log_odds_chosen": 0.3270949423313141, "log_odds_ratio": -0.6543049812316895, "logits/chosen": -2.8709769248962402, "logits/rejected": -2.888324022293091, "logps/chosen": -0.7763268947601318, "logps/rejected": -0.9964207410812378, "loss": 0.454, "nll_loss": 0.4407920837402344, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03881634771823883, "rewards/margins": 0.011004697531461716, "rewards/rejected": -0.04982104152441025, "step": 870 }, { "epoch": 0.9229155742003147, "grad_norm": 1.8451620085670009, "learning_rate": 2.359699518621347e-06, "log_odds_chosen": 0.3485734164714813, "log_odds_ratio": -0.6351412534713745, "logits/chosen": -2.9025185108184814, "logits/rejected": -2.8809902667999268, "logps/chosen": -0.7233132719993591, "logps/rejected": -0.9310896992683411, "loss": 0.4524, "nll_loss": 0.4024543762207031, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.03616566210985184, "rewards/margins": 0.010388821363449097, "rewards/rejected": -0.046554479748010635, "step": 880 }, { "epoch": 0.9334032511798637, "grad_norm": 1.9306573871485972, "learning_rate": 2.3464053310389682e-06, "log_odds_chosen": 0.3904303014278412, "log_odds_ratio": -0.623832106590271, "logits/chosen": -2.84079909324646, "logits/rejected": -2.8426525592803955, "logps/chosen": -0.7186557650566101, "logps/rejected": -0.9262601137161255, "loss": 0.4565, "nll_loss": 0.42616167664527893, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.035932786762714386, "rewards/margins": 0.010380217805504799, "rewards/rejected": -0.046313002705574036, "step": 890 }, { "epoch": 0.9438909281594127, "grad_norm": 2.157911532280212, "learning_rate": 2.333333333333333e-06, "log_odds_chosen": 0.3039458692073822, "log_odds_ratio": -0.6423442959785461, "logits/chosen": -2.896359920501709, "logits/rejected": -2.9049692153930664, "logps/chosen": -0.6981052756309509, "logps/rejected": -0.8672422170639038, "loss": 0.4851, "nll_loss": 0.428159236907959, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03490526229143143, "rewards/margins": 0.008456850424408913, "rewards/rejected": -0.04336211457848549, "step": 900 }, { "epoch": 0.9438909281594127, "eval_log_odds_chosen": 0.36685651540756226, "eval_log_odds_ratio": -0.6244728565216064, "eval_logits/chosen": -2.969223976135254, "eval_logits/rejected": -2.9542508125305176, "eval_logps/chosen": -0.7142534852027893, "eval_logps/rejected": -0.9323597550392151, "eval_loss": 0.47141149640083313, "eval_nll_loss": 0.4360823631286621, "eval_rewards/accuracies": 0.6527777910232544, "eval_rewards/chosen": -0.035712677985429764, "eval_rewards/margins": 0.01090531051158905, "eval_rewards/rejected": -0.046617984771728516, "eval_runtime": 138.0948, "eval_samples_per_second": 14.439, "eval_steps_per_second": 0.456, "step": 900 }, { "epoch": 0.9543786051389617, "grad_norm": 2.4004822961845957, "learning_rate": 2.3204774044612855e-06, "log_odds_chosen": 0.4948676526546478, "log_odds_ratio": -0.626745343208313, "logits/chosen": -2.963355302810669, "logits/rejected": -2.9515814781188965, "logps/chosen": -0.7483548521995544, "logps/rejected": -1.0602718591690063, "loss": 0.4776, "nll_loss": 0.42798590660095215, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.03741774708032608, "rewards/margins": 0.015595847740769386, "rewards/rejected": -0.05301359295845032, "step": 910 }, { "epoch": 0.9648662821185108, "grad_norm": 2.154391749062073, "learning_rate": 2.3078316568852547e-06, "log_odds_chosen": 0.3418871760368347, "log_odds_ratio": -0.6459903717041016, "logits/chosen": -2.8877079486846924, "logits/rejected": -2.9023048877716064, "logps/chosen": -0.7208271622657776, "logps/rejected": -0.9329261779785156, "loss": 0.4496, "nll_loss": 0.39838844537734985, "rewards/accuracies": 0.59375, "rewards/chosen": -0.03604135662317276, "rewards/margins": 0.010604949668049812, "rewards/rejected": -0.04664631187915802, "step": 920 }, { "epoch": 0.9753539590980598, "grad_norm": 2.4150467379552776, "learning_rate": 2.2953904252438353e-06, "log_odds_chosen": 0.31212860345840454, "log_odds_ratio": -0.6628017425537109, "logits/chosen": -2.9404473304748535, "logits/rejected": -2.935260772705078, "logps/chosen": -0.7885305285453796, "logps/rejected": -1.0043061971664429, "loss": 0.4752, "nll_loss": 0.48344022035598755, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0394265279173851, "rewards/margins": 0.010788780637085438, "rewards/rejected": -0.05021531134843826, "step": 930 }, { "epoch": 0.9858416360776088, "grad_norm": 2.2491855597526786, "learning_rate": 2.2831482556870475e-06, "log_odds_chosen": 0.2697109580039978, "log_odds_ratio": -0.6924097537994385, "logits/chosen": -2.9477505683898926, "logits/rejected": -2.9367494583129883, "logps/chosen": -0.7188832759857178, "logps/rejected": -0.8695234060287476, "loss": 0.4739, "nll_loss": 0.44516521692276, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03594416007399559, "rewards/margins": 0.007532012648880482, "rewards/rejected": -0.0434761717915535, "step": 940 }, { "epoch": 0.9963293130571579, "grad_norm": 2.438616188075854, "learning_rate": 2.2710998958306758e-06, "log_odds_chosen": 0.26511335372924805, "log_odds_ratio": -0.6899660229682922, "logits/chosen": -2.9427490234375, "logits/rejected": -2.945517063140869, "logps/chosen": -0.7803043127059937, "logps/rejected": -0.9409860372543335, "loss": 0.4993, "nll_loss": 0.4652082026004791, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03901521861553192, "rewards/margins": 0.008034082129597664, "rewards/rejected": -0.047049302607774734, "step": 950 }, { "epoch": 0.9994756161510225, "step": 953, "total_flos": 0.0, "train_loss": 0.5301580581685054, "train_runtime": 20737.8205, "train_samples_per_second": 2.942, "train_steps_per_second": 0.046 } ], "logging_steps": 10, "max_steps": 953, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }