{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.996784565916399, "eval_steps": 500, "global_step": 699, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 26.875, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.06212496757507324, "log_odds_ratio": -0.7014996409416199, "logits/chosen": -2.1857399940490723, "logits/rejected": -2.1817708015441895, "logps/chosen": -0.9498230814933777, "logps/rejected": -0.9784062504768372, "loss": 1.3118, "nll_loss": 1.3238964080810547, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 10 }, { "epoch": 0.09, "grad_norm": 25.375, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.16793885827064514, "log_odds_ratio": -0.6694984436035156, "logits/chosen": -2.263418674468994, "logits/rejected": -2.186417818069458, "logps/chosen": -0.8150558471679688, "logps/rejected": -0.9238722920417786, "loss": 1.205, "nll_loss": 1.1971018314361572, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 20 }, { "epoch": 0.13, "grad_norm": 7.125, "learning_rate": 1.5e-06, "log_odds_chosen": 0.00997834000736475, "log_odds_ratio": -0.7417184114456177, "logits/chosen": -2.3210864067077637, "logits/rejected": -2.2824535369873047, "logps/chosen": -0.7758495807647705, "logps/rejected": -0.7887269258499146, "loss": 1.0752, "nll_loss": 1.1008635759353638, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 30 }, { "epoch": 0.17, "grad_norm": 6.46875, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.12381462007761002, "log_odds_ratio": -0.6990920305252075, "logits/chosen": -2.387354612350464, "logits/rejected": -2.278778553009033, "logps/chosen": -0.6582767963409424, "logps/rejected": -0.7136567831039429, "loss": 0.9839, "nll_loss": 0.9438871145248413, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 40 }, { "epoch": 0.21, "grad_norm": 5.8125, "learning_rate": 2.5e-06, "log_odds_chosen": 0.10633399337530136, "log_odds_ratio": -0.702051043510437, "logits/chosen": -2.3278417587280273, "logits/rejected": -2.264376163482666, "logps/chosen": -0.6687366366386414, "logps/rejected": -0.7335516214370728, "loss": 0.9634, "nll_loss": 0.9985629320144653, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 50 }, { "epoch": 0.26, "grad_norm": 6.09375, "learning_rate": 3e-06, "log_odds_chosen": 0.03238314390182495, "log_odds_ratio": -0.7480851411819458, "logits/chosen": -2.3533706665039062, "logits/rejected": -2.2710163593292236, "logps/chosen": -0.7110857963562012, "logps/rejected": -0.7359805107116699, "loss": 0.9691, "nll_loss": 0.9716413617134094, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 60 }, { "epoch": 0.3, "grad_norm": 5.96875, "learning_rate": 3.5e-06, "log_odds_chosen": 0.03486456722021103, "log_odds_ratio": -0.7344475984573364, "logits/chosen": -2.3200392723083496, "logits/rejected": -2.2653918266296387, "logps/chosen": -0.6566824316978455, "logps/rejected": -0.6836172938346863, "loss": 0.9322, "nll_loss": 0.9105528593063354, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 70 }, { "epoch": 0.34, "grad_norm": 6.375, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.06068553403019905, "log_odds_ratio": -0.7511457204818726, "logits/chosen": -2.2898621559143066, "logits/rejected": -2.282437324523926, "logps/chosen": -0.6991879343986511, "logps/rejected": -0.7374504208564758, "loss": 0.9628, "nll_loss": 0.9841943979263306, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 80 }, { "epoch": 0.39, "grad_norm": 5.71875, "learning_rate": 4.5e-06, "log_odds_chosen": 0.18408647179603577, "log_odds_ratio": -0.6827269792556763, "logits/chosen": -2.3683393001556396, "logits/rejected": -2.2830727100372314, "logps/chosen": -0.6308820843696594, "logps/rejected": -0.7424929141998291, "loss": 0.8944, "nll_loss": 0.9193886518478394, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 90 }, { "epoch": 0.43, "grad_norm": 5.9375, "learning_rate": 5e-06, "log_odds_chosen": 0.035386841744184494, "log_odds_ratio": -0.7439508438110352, "logits/chosen": -2.35438871383667, "logits/rejected": -2.2569315433502197, "logps/chosen": -0.6244274973869324, "logps/rejected": -0.6488394141197205, "loss": 0.8782, "nll_loss": 0.8835960626602173, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 100 }, { "epoch": 0.47, "grad_norm": 5.875, "learning_rate": 4.996562390352354e-06, "log_odds_chosen": 0.13479042053222656, "log_odds_ratio": -0.7032457590103149, "logits/chosen": -2.338320255279541, "logits/rejected": -2.259917974472046, "logps/chosen": -0.6392095685005188, "logps/rejected": -0.7337976098060608, "loss": 0.904, "nll_loss": 0.9141031503677368, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 110 }, { "epoch": 0.51, "grad_norm": 5.125, "learning_rate": 4.986259015137485e-06, "log_odds_chosen": 0.1813308447599411, "log_odds_ratio": -0.6845273971557617, "logits/chosen": -2.3334765434265137, "logits/rejected": -2.208571434020996, "logps/chosen": -0.6667813062667847, "logps/rejected": -0.7594255208969116, "loss": 0.8882, "nll_loss": 0.9459765553474426, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 120 }, { "epoch": 0.56, "grad_norm": 5.15625, "learning_rate": 4.96911820954103e-06, "log_odds_chosen": 0.11342030763626099, "log_odds_ratio": -0.7043333649635315, "logits/chosen": -2.245288372039795, "logits/rejected": -2.161513328552246, "logps/chosen": -0.6394560933113098, "logps/rejected": -0.7009039521217346, "loss": 0.8905, "nll_loss": 0.8281471133232117, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 130 }, { "epoch": 0.6, "grad_norm": 5.75, "learning_rate": 4.945187112281936e-06, "log_odds_chosen": 0.0401572659611702, "log_odds_ratio": -0.7479963302612305, "logits/chosen": -2.3580965995788574, "logits/rejected": -2.3082127571105957, "logps/chosen": -0.655213475227356, "logps/rejected": -0.6817822456359863, "loss": 0.9124, "nll_loss": 0.9263037443161011, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 140 }, { "epoch": 0.64, "grad_norm": 5.59375, "learning_rate": 4.9145315359768575e-06, "log_odds_chosen": 0.08960182219743729, "log_odds_ratio": -0.7190832495689392, "logits/chosen": -2.3376193046569824, "logits/rejected": -2.2543139457702637, "logps/chosen": -0.6171378493309021, "logps/rejected": -0.6685695648193359, "loss": 0.8818, "nll_loss": 0.8352767825126648, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 150 }, { "epoch": 0.69, "grad_norm": 5.71875, "learning_rate": 4.877235786149681e-06, "log_odds_chosen": 0.001317462301813066, "log_odds_ratio": -0.7520455121994019, "logits/chosen": -2.294182300567627, "logits/rejected": -2.249718189239502, "logps/chosen": -0.5758674144744873, "logps/rejected": -0.5995661020278931, "loss": 0.8668, "nll_loss": 0.8348628282546997, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 160 }, { "epoch": 0.73, "grad_norm": 5.71875, "learning_rate": 4.833402429383947e-06, "log_odds_chosen": 0.007824589498341084, "log_odds_ratio": -0.7456755638122559, "logits/chosen": -2.394925832748413, "logits/rejected": -2.3120040893554688, "logps/chosen": -0.6642740368843079, "logps/rejected": -0.6849616169929504, "loss": 0.8936, "nll_loss": 0.8833368420600891, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 170 }, { "epoch": 0.77, "grad_norm": 5.4375, "learning_rate": 4.783152011255739e-06, "log_odds_chosen": 0.06460268795490265, "log_odds_ratio": -0.7266359925270081, "logits/chosen": -2.2752573490142822, "logits/rejected": -2.2238731384277344, "logps/chosen": -0.6234402656555176, "logps/rejected": -0.6678077578544617, "loss": 0.8939, "nll_loss": 0.8869989514350891, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 180 }, { "epoch": 0.81, "grad_norm": 5.46875, "learning_rate": 4.726622724822781e-06, "log_odds_chosen": 0.07030437141656876, "log_odds_ratio": -0.7147783041000366, "logits/chosen": -2.2865307331085205, "logits/rejected": -2.2341747283935547, "logps/chosen": -0.6607599854469299, "logps/rejected": -0.7076841592788696, "loss": 0.8779, "nll_loss": 0.8933170437812805, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 190 }, { "epoch": 0.86, "grad_norm": 5.125, "learning_rate": 4.663970030581408e-06, "log_odds_chosen": 0.11411430686712265, "log_odds_ratio": -0.7219884395599365, "logits/chosen": -2.235715389251709, "logits/rejected": -2.195624828338623, "logps/chosen": -0.5864480137825012, "logps/rejected": -0.6477882266044617, "loss": 0.8489, "nll_loss": 0.8421682119369507, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 200 }, { "epoch": 0.9, "grad_norm": 5.28125, "learning_rate": 4.59536622893656e-06, "log_odds_chosen": -0.019357014447450638, "log_odds_ratio": -0.7726019024848938, "logits/chosen": -2.271784782409668, "logits/rejected": -2.188135862350464, "logps/chosen": -0.6931017637252808, "logps/rejected": -0.7073479890823364, "loss": 0.8628, "nll_loss": 0.8622463345527649, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 210 }, { "epoch": 0.94, "grad_norm": 5.4375, "learning_rate": 4.520999986360555e-06, "log_odds_chosen": 0.07649385929107666, "log_odds_ratio": -0.7158172726631165, "logits/chosen": -2.250640392303467, "logits/rejected": -2.2138304710388184, "logps/chosen": -0.6532458662986755, "logps/rejected": -0.6902228593826294, "loss": 0.8572, "nll_loss": 0.868952751159668, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 220 }, { "epoch": 0.99, "grad_norm": 5.65625, "learning_rate": 4.441075816543745e-06, "log_odds_chosen": 0.030604243278503418, "log_odds_ratio": -0.7345600128173828, "logits/chosen": -2.3321053981781006, "logits/rejected": -2.2295913696289062, "logps/chosen": -0.6273137331008911, "logps/rejected": -0.6507210731506348, "loss": 0.8789, "nll_loss": 0.8177094459533691, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 230 }, { "epoch": 1.03, "grad_norm": 5.84375, "learning_rate": 4.355813517963924e-06, "log_odds_chosen": 0.40961089730262756, "log_odds_ratio": -0.5910171270370483, "logits/chosen": -2.3318493366241455, "logits/rejected": -2.2643091678619385, "logps/chosen": -0.4916546940803528, "logps/rejected": -0.6644417643547058, "loss": 0.758, "nll_loss": 0.740388810634613, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 240 }, { "epoch": 1.07, "grad_norm": 6.25, "learning_rate": 4.265447569421234e-06, "log_odds_chosen": 0.3989773392677307, "log_odds_ratio": -0.590238630771637, "logits/chosen": -2.3867685794830322, "logits/rejected": -2.3056893348693848, "logps/chosen": -0.5096999406814575, "logps/rejected": -0.6727738976478577, "loss": 0.6881, "nll_loss": 0.7256556153297424, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 250 }, { "epoch": 1.11, "grad_norm": 6.0625, "learning_rate": 4.170226485200899e-06, "log_odds_chosen": 0.48682594299316406, "log_odds_ratio": -0.5600502490997314, "logits/chosen": -2.3787410259246826, "logits/rejected": -2.30033016204834, "logps/chosen": -0.47443705797195435, "logps/rejected": -0.6933677792549133, "loss": 0.725, "nll_loss": 0.6927866339683533, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 260 }, { "epoch": 1.16, "grad_norm": 5.875, "learning_rate": 4.070412131637139e-06, "log_odds_chosen": 0.38443347811698914, "log_odds_ratio": -0.6165519952774048, "logits/chosen": -2.3166797161102295, "logits/rejected": -2.23835825920105, "logps/chosen": -0.4729565978050232, "logps/rejected": -0.623404324054718, "loss": 0.719, "nll_loss": 0.689362645149231, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 270 }, { "epoch": 1.2, "grad_norm": 6.09375, "learning_rate": 3.966279006957781e-06, "log_odds_chosen": 0.4762851297855377, "log_odds_ratio": -0.5657437443733215, "logits/chosen": -2.3141160011291504, "logits/rejected": -2.2335238456726074, "logps/chosen": -0.5018330812454224, "logps/rejected": -0.7071572542190552, "loss": 0.7292, "nll_loss": 0.7584232687950134, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 280 }, { "epoch": 1.24, "grad_norm": 5.625, "learning_rate": 3.858113486390056e-06, "log_odds_chosen": 0.461935818195343, "log_odds_ratio": -0.5941855907440186, "logits/chosen": -2.339001178741455, "logits/rejected": -2.2689056396484375, "logps/chosen": -0.48000845313072205, "logps/rejected": -0.7079610228538513, "loss": 0.7325, "nll_loss": 0.7081630229949951, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 290 }, { "epoch": 1.29, "grad_norm": 6.375, "learning_rate": 3.7462130346036e-06, "log_odds_chosen": 0.4929097592830658, "log_odds_ratio": -0.563489556312561, "logits/chosen": -2.3332343101501465, "logits/rejected": -2.2668535709381104, "logps/chosen": -0.4893871247768402, "logps/rejected": -0.7047632336616516, "loss": 0.6818, "nll_loss": 0.7156692147254944, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 300 }, { "epoch": 1.33, "grad_norm": 5.625, "learning_rate": 3.6308853876565232e-06, "log_odds_chosen": 0.4562360346317291, "log_odds_ratio": -0.5689755082130432, "logits/chosen": -2.3754940032958984, "logits/rejected": -2.27487850189209, "logps/chosen": -0.4727197289466858, "logps/rejected": -0.6664949655532837, "loss": 0.7119, "nll_loss": 0.6855541467666626, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 310 }, { "epoch": 1.37, "grad_norm": 5.375, "learning_rate": 3.512447706694254e-06, "log_odds_chosen": 0.3677065372467041, "log_odds_ratio": -0.6210616230964661, "logits/chosen": -2.3326685428619385, "logits/rejected": -2.284008502960205, "logps/chosen": -0.5006684064865112, "logps/rejected": -0.6665970087051392, "loss": 0.7148, "nll_loss": 0.7629369497299194, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 320 }, { "epoch": 1.41, "grad_norm": 5.34375, "learning_rate": 3.3912257057285684e-06, "log_odds_chosen": 0.5201979279518127, "log_odds_ratio": -0.5558315515518188, "logits/chosen": -2.35974383354187, "logits/rejected": -2.2589011192321777, "logps/chosen": -0.44807687401771545, "logps/rejected": -0.6775830984115601, "loss": 0.7127, "nll_loss": 0.6746495962142944, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 330 }, { "epoch": 1.46, "grad_norm": 5.46875, "learning_rate": 3.2675527558954897e-06, "log_odds_chosen": 0.4437088966369629, "log_odds_ratio": -0.5744566321372986, "logits/chosen": -2.3658008575439453, "logits/rejected": -2.302891969680786, "logps/chosen": -0.5020125508308411, "logps/rejected": -0.703687310218811, "loss": 0.7293, "nll_loss": 0.7448792457580566, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 340 }, { "epoch": 1.5, "grad_norm": 6.21875, "learning_rate": 3.1417689686554144e-06, "log_odds_chosen": 0.4869805872440338, "log_odds_ratio": -0.5718838572502136, "logits/chosen": -2.3735198974609375, "logits/rejected": -2.3357536792755127, "logps/chosen": -0.4905489385128021, "logps/rejected": -0.7079612612724304, "loss": 0.7228, "nll_loss": 0.7263522148132324, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 350 }, { "epoch": 1.54, "grad_norm": 5.25, "learning_rate": 3.0142202604567724e-06, "log_odds_chosen": 0.4622114300727844, "log_odds_ratio": -0.580187201499939, "logits/chosen": -2.4162421226501465, "logits/rejected": -2.32672119140625, "logps/chosen": -0.4755636155605316, "logps/rejected": -0.6753752827644348, "loss": 0.6993, "nll_loss": 0.7006024122238159, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 360 }, { "epoch": 1.59, "grad_norm": 5.90625, "learning_rate": 2.8852574014354394e-06, "log_odds_chosen": 0.4685123562812805, "log_odds_ratio": -0.5639849901199341, "logits/chosen": -2.3573315143585205, "logits/rejected": -2.2810652256011963, "logps/chosen": -0.45586276054382324, "logps/rejected": -0.650667667388916, "loss": 0.6937, "nll_loss": 0.6792919039726257, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 370 }, { "epoch": 1.63, "grad_norm": 5.46875, "learning_rate": 2.7552350507661063e-06, "log_odds_chosen": 0.32628968358039856, "log_odds_ratio": -0.6266478300094604, "logits/chosen": -2.3865950107574463, "logits/rejected": -2.2999796867370605, "logps/chosen": -0.5150719285011292, "logps/rejected": -0.6419684290885925, "loss": 0.7454, "nll_loss": 0.7746638059616089, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 380 }, { "epoch": 1.67, "grad_norm": 5.65625, "learning_rate": 2.6245107813184286e-06, "log_odds_chosen": 0.4124643802642822, "log_odds_ratio": -0.6084017157554626, "logits/chosen": -2.300006151199341, "logits/rejected": -2.2486844062805176, "logps/chosen": -0.500686526298523, "logps/rejected": -0.7045504450798035, "loss": 0.7104, "nll_loss": 0.7621539235115051, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 390 }, { "epoch": 1.71, "grad_norm": 6.3125, "learning_rate": 2.493444096300273e-06, "log_odds_chosen": 0.3271673321723938, "log_odds_ratio": -0.6363809704780579, "logits/chosen": -2.332576274871826, "logits/rejected": -2.2507784366607666, "logps/chosen": -0.5043616890907288, "logps/rejected": -0.6402055621147156, "loss": 0.7316, "nll_loss": 0.7292400598526001, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 400 }, { "epoch": 1.76, "grad_norm": 5.40625, "learning_rate": 2.3623954405923636e-06, "log_odds_chosen": 0.35860806703567505, "log_odds_ratio": -0.6040534973144531, "logits/chosen": -2.3241639137268066, "logits/rejected": -2.2687346935272217, "logps/chosen": -0.4667055010795593, "logps/rejected": -0.6222271919250488, "loss": 0.7055, "nll_loss": 0.6745852828025818, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 410 }, { "epoch": 1.8, "grad_norm": 5.4375, "learning_rate": 2.2317252094932383e-06, "log_odds_chosen": 0.4658966064453125, "log_odds_ratio": -0.5793284177780151, "logits/chosen": -2.3523142337799072, "logits/rejected": -2.2792649269104004, "logps/chosen": -0.4811071753501892, "logps/rejected": -0.6847606897354126, "loss": 0.6927, "nll_loss": 0.7262173295021057, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 420 }, { "epoch": 1.84, "grad_norm": 5.75, "learning_rate": 2.1017927576005657e-06, "log_odds_chosen": 0.4586619436740875, "log_odds_ratio": -0.5749837756156921, "logits/chosen": -2.309678554534912, "logits/rejected": -2.2428524494171143, "logps/chosen": -0.4688965380191803, "logps/rejected": -0.6654346585273743, "loss": 0.6911, "nll_loss": 0.7090088129043579, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 430 }, { "epoch": 1.89, "grad_norm": 5.875, "learning_rate": 1.9729554105544816e-06, "log_odds_chosen": 0.39126816391944885, "log_odds_ratio": -0.5965334177017212, "logits/chosen": -2.246513605117798, "logits/rejected": -2.1855998039245605, "logps/chosen": -0.5077515840530396, "logps/rejected": -0.685882031917572, "loss": 0.73, "nll_loss": 0.696818470954895, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 440 }, { "epoch": 1.93, "grad_norm": 5.78125, "learning_rate": 1.8455674823607312e-06, "log_odds_chosen": 0.2512452006340027, "log_odds_ratio": -0.6640199422836304, "logits/chosen": -2.342719078063965, "logits/rejected": -2.274512529373169, "logps/chosen": -0.521196722984314, "logps/rejected": -0.6386003494262695, "loss": 0.7091, "nll_loss": 0.7769466638565063, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 450 }, { "epoch": 1.97, "grad_norm": 5.46875, "learning_rate": 1.7199793009960766e-06, "log_odds_chosen": 0.4366021156311035, "log_odds_ratio": -0.5971043705940247, "logits/chosen": -2.2552707195281982, "logits/rejected": -2.2170627117156982, "logps/chosen": -0.5236691832542419, "logps/rejected": -0.7121491432189941, "loss": 0.7011, "nll_loss": 0.7103704214096069, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 460 }, { "epoch": 2.02, "grad_norm": 5.9375, "learning_rate": 1.5965362449756317e-06, "log_odds_chosen": 0.5843161344528198, "log_odds_ratio": -0.5287169218063354, "logits/chosen": -2.3433125019073486, "logits/rejected": -2.2500529289245605, "logps/chosen": -0.43613916635513306, "logps/rejected": -0.6702120304107666, "loss": 0.6759, "nll_loss": 0.6880279183387756, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 470 }, { "epoch": 2.06, "grad_norm": 6.5, "learning_rate": 1.4755777935316412e-06, "log_odds_chosen": 0.8127249479293823, "log_odds_ratio": -0.46383658051490784, "logits/chosen": -2.3992159366607666, "logits/rejected": -2.2700352668762207, "logps/chosen": -0.4094364047050476, "logps/rejected": -0.7208075523376465, "loss": 0.6001, "nll_loss": 0.5964864492416382, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 480 }, { "epoch": 2.1, "grad_norm": 6.03125, "learning_rate": 1.3574365930158272e-06, "log_odds_chosen": 0.7765734791755676, "log_odds_ratio": -0.4649999141693115, "logits/chosen": -2.294707775115967, "logits/rejected": -2.244535446166992, "logps/chosen": -0.3731532692909241, "logps/rejected": -0.6663237810134888, "loss": 0.5958, "nll_loss": 0.5779340863227844, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 490 }, { "epoch": 2.14, "grad_norm": 5.34375, "learning_rate": 1.242437542092731e-06, "log_odds_chosen": 0.8575220108032227, "log_odds_ratio": -0.42505908012390137, "logits/chosen": -2.2913150787353516, "logits/rejected": -2.2416951656341553, "logps/chosen": -0.37527984380722046, "logps/rejected": -0.6905524730682373, "loss": 0.6326, "nll_loss": 0.623663604259491, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 500 }, { "epoch": 2.19, "grad_norm": 6.09375, "learning_rate": 1.1308968982398893e-06, "log_odds_chosen": 0.912135899066925, "log_odds_ratio": -0.41568368673324585, "logits/chosen": -2.313683271408081, "logits/rejected": -2.226602554321289, "logps/chosen": -0.3589690625667572, "logps/rejected": -0.7188132405281067, "loss": 0.5746, "nll_loss": 0.5657839179039001, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 510 }, { "epoch": 2.23, "grad_norm": 6.4375, "learning_rate": 1.0231214080120354e-06, "log_odds_chosen": 0.8424990773200989, "log_odds_ratio": -0.4574614465236664, "logits/chosen": -2.2810280323028564, "logits/rejected": -2.2405149936676025, "logps/chosen": -0.39062148332595825, "logps/rejected": -0.705338716506958, "loss": 0.616, "nll_loss": 0.6053126454353333, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 520 }, { "epoch": 2.27, "grad_norm": 5.46875, "learning_rate": 9.194074634611577e-07, "log_odds_chosen": 0.8043780326843262, "log_odds_ratio": -0.4463415741920471, "logits/chosen": -2.3421316146850586, "logits/rejected": -2.27047061920166, "logps/chosen": -0.3879227340221405, "logps/rejected": -0.681617259979248, "loss": 0.5906, "nll_loss": 0.6051921844482422, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 530 }, { "epoch": 2.32, "grad_norm": 5.71875, "learning_rate": 8.200402870323634e-07, "log_odds_chosen": 0.8168398141860962, "log_odds_ratio": -0.4651332497596741, "logits/chosen": -2.2850687503814697, "logits/rejected": -2.203339099884033, "logps/chosen": -0.4002053141593933, "logps/rejected": -0.7145139575004578, "loss": 0.6095, "nll_loss": 0.5440846681594849, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 540 }, { "epoch": 2.36, "grad_norm": 5.53125, "learning_rate": 7.252931471771322e-07, "log_odds_chosen": 0.957280158996582, "log_odds_ratio": -0.4218501150608063, "logits/chosen": -2.3870935440063477, "logits/rejected": -2.3037381172180176, "logps/chosen": -0.3641536235809326, "logps/rejected": -0.7135647535324097, "loss": 0.5756, "nll_loss": 0.5520345568656921, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 550 }, { "epoch": 2.4, "grad_norm": 6.0, "learning_rate": 6.354266068411078e-07, "log_odds_chosen": 0.8184865713119507, "log_odds_ratio": -0.4773890972137451, "logits/chosen": -2.2534213066101074, "logits/rejected": -2.22419810295105, "logps/chosen": -0.41363048553466797, "logps/rejected": -0.7267066240310669, "loss": 0.6069, "nll_loss": 0.6007322072982788, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 560 }, { "epoch": 2.44, "grad_norm": 5.71875, "learning_rate": 5.50687806893139e-07, "log_odds_chosen": 0.8616418838500977, "log_odds_ratio": -0.44923096895217896, "logits/chosen": -2.276895761489868, "logits/rejected": -2.2783918380737305, "logps/chosen": -0.37030458450317383, "logps/rejected": -0.666776180267334, "loss": 0.614, "nll_loss": 0.6462346911430359, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 570 }, { "epoch": 2.49, "grad_norm": 6.1875, "learning_rate": 4.7130978646620807e-07, "log_odds_chosen": 0.9556438326835632, "log_odds_ratio": -0.4390975534915924, "logits/chosen": -2.3128373622894287, "logits/rejected": -2.2027499675750732, "logps/chosen": -0.3874739706516266, "logps/rejected": -0.7368007898330688, "loss": 0.5845, "nll_loss": 0.6006309986114502, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 580 }, { "epoch": 2.53, "grad_norm": 5.5, "learning_rate": 3.975108420793819e-07, "log_odds_chosen": 0.8426518440246582, "log_odds_ratio": -0.44841188192367554, "logits/chosen": -2.2656946182250977, "logits/rejected": -2.2316298484802246, "logps/chosen": -0.3601939082145691, "logps/rejected": -0.6829022765159607, "loss": 0.6035, "nll_loss": 0.5817385911941528, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 590 }, { "epoch": 2.57, "grad_norm": 6.09375, "learning_rate": 3.294939273032272e-07, "log_odds_chosen": 0.9172550439834595, "log_odds_ratio": -0.41558733582496643, "logits/chosen": -2.251704692840576, "logits/rejected": -2.228672742843628, "logps/chosen": -0.3531130254268646, "logps/rejected": -0.703464925289154, "loss": 0.5694, "nll_loss": 0.554794430732727, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 600 }, { "epoch": 2.62, "grad_norm": 5.96875, "learning_rate": 2.6744609461969523e-07, "log_odds_chosen": 0.877754807472229, "log_odds_ratio": -0.4350369870662689, "logits/chosen": -2.3534913063049316, "logits/rejected": -2.2918014526367188, "logps/chosen": -0.37197771668434143, "logps/rejected": -0.7188286781311035, "loss": 0.594, "nll_loss": 0.5847772359848022, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 610 }, { "epoch": 2.66, "grad_norm": 5.84375, "learning_rate": 2.1153798101138405e-07, "log_odds_chosen": 0.9580931663513184, "log_odds_ratio": -0.4062252938747406, "logits/chosen": -2.2987186908721924, "logits/rejected": -2.2561445236206055, "logps/chosen": -0.37130284309387207, "logps/rejected": -0.7186514735221863, "loss": 0.6113, "nll_loss": 0.6211617588996887, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 620 }, { "epoch": 2.7, "grad_norm": 11.5625, "learning_rate": 1.61923338694871e-07, "log_odds_chosen": 0.8980989456176758, "log_odds_ratio": -0.44537553191185, "logits/chosen": -2.3041529655456543, "logits/rejected": -2.246042490005493, "logps/chosen": -0.3840414583683014, "logps/rejected": -0.7412872910499573, "loss": 0.6259, "nll_loss": 0.6146137714385986, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 630 }, { "epoch": 2.74, "grad_norm": 5.65625, "learning_rate": 1.1873861228862998e-07, "log_odds_chosen": 0.9331363439559937, "log_odds_ratio": -0.4274202883243561, "logits/chosen": -2.2929160594940186, "logits/rejected": -2.2195255756378174, "logps/chosen": -0.3945469558238983, "logps/rejected": -0.7453508377075195, "loss": 0.6133, "nll_loss": 0.621927797794342, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 640 }, { "epoch": 2.79, "grad_norm": 6.21875, "learning_rate": 8.210256357836065e-08, "log_odds_chosen": 1.0095536708831787, "log_odds_ratio": -0.3956521451473236, "logits/chosen": -2.263673782348633, "logits/rejected": -2.20683217048645, "logps/chosen": -0.3427670896053314, "logps/rejected": -0.7156583070755005, "loss": 0.574, "nll_loss": 0.5451396703720093, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 650 }, { "epoch": 2.83, "grad_norm": 5.8125, "learning_rate": 5.21159449116615e-08, "log_odds_chosen": 0.770971417427063, "log_odds_ratio": -0.4887896180152893, "logits/chosen": -2.284029006958008, "logits/rejected": -2.214066982269287, "logps/chosen": -0.4013218879699707, "logps/rejected": -0.6818682551383972, "loss": 0.5932, "nll_loss": 0.5923025012016296, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 660 }, { "epoch": 2.87, "grad_norm": 5.4375, "learning_rate": 2.8861222120235845e-08, "log_odds_chosen": 0.8828635215759277, "log_odds_ratio": -0.4312991201877594, "logits/chosen": -2.24355411529541, "logits/rejected": -2.219613552093506, "logps/chosen": -0.34176358580589294, "logps/rejected": -0.6538265943527222, "loss": 0.5864, "nll_loss": 0.58399498462677, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 670 }, { "epoch": 2.92, "grad_norm": 5.53125, "learning_rate": 1.2402347731620412e-08, "log_odds_chosen": 0.7922724485397339, "log_odds_ratio": -0.4506749212741852, "logits/chosen": -2.275207996368408, "logits/rejected": -2.178473949432373, "logps/chosen": -0.40824776887893677, "logps/rejected": -0.7051072120666504, "loss": 0.5958, "nll_loss": 0.5631005167961121, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 680 }, { "epoch": 2.96, "grad_norm": 6.15625, "learning_rate": 2.7845850941254914e-09, "log_odds_chosen": 0.8703590631484985, "log_odds_ratio": -0.4304262101650238, "logits/chosen": -2.343256711959839, "logits/rejected": -2.2737724781036377, "logps/chosen": -0.366679310798645, "logps/rejected": -0.6560879349708557, "loss": 0.6025, "nll_loss": 0.5677012205123901, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 690 }, { "epoch": 3.0, "step": 699, "total_flos": 0.0, "train_loss": 0.7502862380468454, "train_runtime": 21621.4849, "train_samples_per_second": 2.071, "train_steps_per_second": 0.032 } ], "logging_steps": 10, "max_steps": 699, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }