{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9963459196102313, "eval_steps": 500, "global_step": 615, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.048721071863580996, "grad_norm": 556.0, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.11725157499313354, "log_odds_ratio": -0.7116702795028687, "logits/chosen": -2.241136312484741, "logits/rejected": -2.2574338912963867, "logps/chosen": -1.0354639291763306, "logps/rejected": -1.1154835224151611, "loss": 4.3799, "nll_loss": 4.344295501708984, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.05177319049835205, "rewards/margins": 0.004000981338322163, "rewards/rejected": -0.05577417463064194, "step": 10 }, { "epoch": 0.09744214372716199, "grad_norm": 55.25, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.11950838565826416, "log_odds_ratio": -0.7092320322990417, "logits/chosen": -2.8477842807769775, "logits/rejected": -2.8416748046875, "logps/chosen": -0.9889892339706421, "logps/rejected": -1.076500654220581, "loss": 1.8135, "nll_loss": 1.7780277729034424, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.049449462443590164, "rewards/margins": 0.004375572316348553, "rewards/rejected": -0.053825028240680695, "step": 20 }, { "epoch": 0.146163215590743, "grad_norm": 5.40625, "learning_rate": 1.5e-06, "log_odds_chosen": 0.15654945373535156, "log_odds_ratio": -0.690517783164978, "logits/chosen": -2.6702427864074707, "logits/rejected": -2.6656832695007324, "logps/chosen": -0.8676761388778687, "logps/rejected": -0.9682536125183105, "loss": 0.6655, "nll_loss": 0.6310166716575623, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.04338381439447403, "rewards/margins": 0.005028870422393084, "rewards/rejected": -0.04841268062591553, "step": 30 }, { "epoch": 0.19488428745432398, "grad_norm": 3.796875, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.17244641482830048, "log_odds_ratio": -0.6753009557723999, "logits/chosen": -2.296032190322876, "logits/rejected": -2.29901123046875, "logps/chosen": -0.8293353319168091, "logps/rejected": -0.9353267550468445, "loss": 0.5876, "nll_loss": 0.5538541078567505, "rewards/accuracies": 0.578125, "rewards/chosen": -0.041466765105724335, "rewards/margins": 0.005299570504575968, "rewards/rejected": -0.04676634073257446, "step": 40 }, { "epoch": 0.243605359317905, "grad_norm": 3.96875, "learning_rate": 2.5e-06, "log_odds_chosen": 0.21714143455028534, "log_odds_ratio": -0.6588594913482666, "logits/chosen": -2.160338878631592, "logits/rejected": -2.1593730449676514, "logps/chosen": -0.7916803359985352, "logps/rejected": -0.9191333651542664, "loss": 0.5514, "nll_loss": 0.5184302926063538, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03958401829004288, "rewards/margins": 0.0063726468943059444, "rewards/rejected": -0.04595666378736496, "step": 50 }, { "epoch": 0.292326431181486, "grad_norm": 4.71875, "learning_rate": 3e-06, "log_odds_chosen": 0.25414514541625977, "log_odds_ratio": -0.6507801413536072, "logits/chosen": -2.1048073768615723, "logits/rejected": -2.1529033184051514, "logps/chosen": -0.8171817660331726, "logps/rejected": -0.9584506750106812, "loss": 0.591, "nll_loss": 0.5584203004837036, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.04085909202694893, "rewards/margins": 0.007063449826091528, "rewards/rejected": -0.04792254418134689, "step": 60 }, { "epoch": 0.341047503045067, "grad_norm": 4.03125, "learning_rate": 3.5e-06, "log_odds_chosen": 0.2314440906047821, "log_odds_ratio": -0.6606293320655823, "logits/chosen": -2.1196396350860596, "logits/rejected": -2.0706403255462646, "logps/chosen": -0.8082675933837891, "logps/rejected": -0.9356250762939453, "loss": 0.5785, "nll_loss": 0.5454204082489014, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.04041338339447975, "rewards/margins": 0.00636786874383688, "rewards/rejected": -0.046781253069639206, "step": 70 }, { "epoch": 0.38976857490864797, "grad_norm": 4.0625, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.2176472246646881, "log_odds_ratio": -0.6596580743789673, "logits/chosen": -2.1406569480895996, "logits/rejected": -2.1560683250427246, "logps/chosen": -0.7807229161262512, "logps/rejected": -0.8939612507820129, "loss": 0.5547, "nll_loss": 0.5217040181159973, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.03903614357113838, "rewards/margins": 0.005661919247359037, "rewards/rejected": -0.044698067009449005, "step": 80 }, { "epoch": 0.438489646772229, "grad_norm": 4.5, "learning_rate": 4.5e-06, "log_odds_chosen": 0.1901816874742508, "log_odds_ratio": -0.6751978993415833, "logits/chosen": -2.1606240272521973, "logits/rejected": -2.130833387374878, "logps/chosen": -0.8014599084854126, "logps/rejected": -0.9177049398422241, "loss": 0.5793, "nll_loss": 0.5454980134963989, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.04007299616932869, "rewards/margins": 0.005812253803014755, "rewards/rejected": -0.045885246247053146, "step": 90 }, { "epoch": 0.48721071863581, "grad_norm": 4.28125, "learning_rate": 5e-06, "log_odds_chosen": 0.24776022136211395, "log_odds_ratio": -0.6568383574485779, "logits/chosen": -2.105705499649048, "logits/rejected": -2.1442131996154785, "logps/chosen": -0.7829509973526001, "logps/rejected": -0.9177125692367554, "loss": 0.5837, "nll_loss": 0.5508524179458618, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.039147552102804184, "rewards/margins": 0.006738076917827129, "rewards/rejected": -0.04588562995195389, "step": 100 }, { "epoch": 0.535931790499391, "grad_norm": 4.5, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.16261616349220276, "log_odds_ratio": -0.6998116970062256, "logits/chosen": -2.0691206455230713, "logits/rejected": -2.097303867340088, "logps/chosen": -0.8038315773010254, "logps/rejected": -0.890066921710968, "loss": 0.5773, "nll_loss": 0.5423145890235901, "rewards/accuracies": 0.53125, "rewards/chosen": -0.04019157961010933, "rewards/margins": 0.004311761818826199, "rewards/rejected": -0.0445033423602581, "step": 110 }, { "epoch": 0.584652862362972, "grad_norm": 3.8125, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.3128460645675659, "log_odds_ratio": -0.6486107110977173, "logits/chosen": -2.045989513397217, "logits/rejected": -2.06305193901062, "logps/chosen": -0.7878965139389038, "logps/rejected": -0.9589905738830566, "loss": 0.5752, "nll_loss": 0.5428130030632019, "rewards/accuracies": 0.625, "rewards/chosen": -0.03939482569694519, "rewards/margins": 0.00855470634996891, "rewards/rejected": -0.04794953018426895, "step": 120 }, { "epoch": 0.633373934226553, "grad_norm": 4.1875, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.26915115118026733, "log_odds_ratio": -0.6629001498222351, "logits/chosen": -2.160104751586914, "logits/rejected": -2.165109157562256, "logps/chosen": -0.7626894116401672, "logps/rejected": -0.8972024917602539, "loss": 0.5541, "nll_loss": 0.5209673643112183, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.03813446685671806, "rewards/margins": 0.006725652609020472, "rewards/rejected": -0.044860124588012695, "step": 130 }, { "epoch": 0.682095006090134, "grad_norm": 4.71875, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 0.2980436682701111, "log_odds_ratio": -0.644582211971283, "logits/chosen": -2.170442581176758, "logits/rejected": -2.1650166511535645, "logps/chosen": -0.7542210817337036, "logps/rejected": -0.9323924779891968, "loss": 0.58, "nll_loss": 0.547797679901123, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -0.03771105408668518, "rewards/margins": 0.008908570744097233, "rewards/rejected": -0.04661962762475014, "step": 140 }, { "epoch": 0.730816077953715, "grad_norm": 3.375, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.30688202381134033, "log_odds_ratio": -0.6490530371665955, "logits/chosen": -2.1752607822418213, "logits/rejected": -2.20690655708313, "logps/chosen": -0.7646127939224243, "logps/rejected": -0.9283273816108704, "loss": 0.5539, "nll_loss": 0.5214100480079651, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.038230642676353455, "rewards/margins": 0.008185726590454578, "rewards/rejected": -0.04641636833548546, "step": 150 }, { "epoch": 0.7795371498172959, "grad_norm": 4.25, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.29972273111343384, "log_odds_ratio": -0.6498032212257385, "logits/chosen": -2.222160816192627, "logits/rejected": -2.2057909965515137, "logps/chosen": -0.7570406198501587, "logps/rejected": -0.9201406240463257, "loss": 0.586, "nll_loss": 0.5535241365432739, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.03785203397274017, "rewards/margins": 0.008154998533427715, "rewards/rejected": -0.046007029712200165, "step": 160 }, { "epoch": 0.8282582216808769, "grad_norm": 3.75, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.25258108973503113, "log_odds_ratio": -0.6628280878067017, "logits/chosen": -2.217949628829956, "logits/rejected": -2.2262425422668457, "logps/chosen": -0.7875797748565674, "logps/rejected": -0.9305909276008606, "loss": 0.5898, "nll_loss": 0.5566290616989136, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.03937898576259613, "rewards/margins": 0.007150554563850164, "rewards/rejected": -0.04652954638004303, "step": 170 }, { "epoch": 0.876979293544458, "grad_norm": 4.25, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.29803532361984253, "log_odds_ratio": -0.6474822759628296, "logits/chosen": -2.164008617401123, "logits/rejected": -2.1809074878692627, "logps/chosen": -0.7780044674873352, "logps/rejected": -0.935949981212616, "loss": 0.5573, "nll_loss": 0.5249563455581665, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.0389002226293087, "rewards/margins": 0.007897274568676949, "rewards/rejected": -0.0467974953353405, "step": 180 }, { "epoch": 0.925700365408039, "grad_norm": 3.640625, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 0.3355644941329956, "log_odds_ratio": -0.6257966756820679, "logits/chosen": -2.1745223999023438, "logits/rejected": -2.168818712234497, "logps/chosen": -0.7714961767196655, "logps/rejected": -0.9696172475814819, "loss": 0.5722, "nll_loss": 0.5409447550773621, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.038574814796447754, "rewards/margins": 0.009906048886477947, "rewards/rejected": -0.048480864614248276, "step": 190 }, { "epoch": 0.97442143727162, "grad_norm": 3.734375, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 0.24752295017242432, "log_odds_ratio": -0.6712836623191833, "logits/chosen": -2.1981072425842285, "logits/rejected": -2.2192599773406982, "logps/chosen": -0.7749738097190857, "logps/rejected": -0.9182379841804504, "loss": 0.6001, "nll_loss": 0.5665073394775391, "rewards/accuracies": 0.59375, "rewards/chosen": -0.038748692721128464, "rewards/margins": 0.007163210306316614, "rewards/rejected": -0.04591190069913864, "step": 200 }, { "epoch": 1.0231425091352009, "grad_norm": 4.21875, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 0.5075238943099976, "log_odds_ratio": -0.5618590116500854, "logits/chosen": -2.1940836906433105, "logits/rejected": -2.2036900520324707, "logps/chosen": -0.683228611946106, "logps/rejected": -0.935465931892395, "loss": 0.5621, "nll_loss": 0.5340477228164673, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.03416142612695694, "rewards/margins": 0.012611865997314453, "rewards/rejected": -0.04677329212427139, "step": 210 }, { "epoch": 1.071863580998782, "grad_norm": 4.0625, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 0.7354512214660645, "log_odds_ratio": -0.47807660698890686, "logits/chosen": -2.1200661659240723, "logits/rejected": -2.1521944999694824, "logps/chosen": -0.6185888051986694, "logps/rejected": -0.9742799997329712, "loss": 0.4596, "nll_loss": 0.4357299208641052, "rewards/accuracies": 0.765625, "rewards/chosen": -0.03092944249510765, "rewards/margins": 0.01778455823659897, "rewards/rejected": -0.04871400073170662, "step": 220 }, { "epoch": 1.1205846528623629, "grad_norm": 3.9375, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 0.8707483410835266, "log_odds_ratio": -0.44475650787353516, "logits/chosen": -2.1387903690338135, "logits/rejected": -2.1741206645965576, "logps/chosen": -0.6047109365463257, "logps/rejected": -1.0183531045913696, "loss": 0.4634, "nll_loss": 0.44113415479660034, "rewards/accuracies": 0.8125, "rewards/chosen": -0.030235549435019493, "rewards/margins": 0.020682107657194138, "rewards/rejected": -0.05091765522956848, "step": 230 }, { "epoch": 1.169305724725944, "grad_norm": 4.53125, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.8473652005195618, "log_odds_ratio": -0.44497156143188477, "logits/chosen": -2.1265828609466553, "logits/rejected": -2.0939793586730957, "logps/chosen": -0.6094004511833191, "logps/rejected": -1.039342999458313, "loss": 0.4756, "nll_loss": 0.45334190130233765, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.030470022931694984, "rewards/margins": 0.021497130393981934, "rewards/rejected": -0.05196715518832207, "step": 240 }, { "epoch": 1.218026796589525, "grad_norm": 4.28125, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 0.8328806161880493, "log_odds_ratio": -0.4545086920261383, "logits/chosen": -2.0806257724761963, "logits/rejected": -2.0717406272888184, "logps/chosen": -0.6114749312400818, "logps/rejected": -1.0371878147125244, "loss": 0.4832, "nll_loss": 0.4604315757751465, "rewards/accuracies": 0.796875, "rewards/chosen": -0.03057374618947506, "rewards/margins": 0.02128564938902855, "rewards/rejected": -0.051859401166439056, "step": 250 }, { "epoch": 1.266747868453106, "grad_norm": 5.125, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 0.8561245203018188, "log_odds_ratio": -0.4485153257846832, "logits/chosen": -2.0740833282470703, "logits/rejected": -2.1148364543914795, "logps/chosen": -0.603646457195282, "logps/rejected": -1.02623450756073, "loss": 0.482, "nll_loss": 0.4595448970794678, "rewards/accuracies": 0.8031250238418579, "rewards/chosen": -0.030182326212525368, "rewards/margins": 0.021129406988620758, "rewards/rejected": -0.05131173133850098, "step": 260 }, { "epoch": 1.315468940316687, "grad_norm": 4.125, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.9330509305000305, "log_odds_ratio": -0.43321356177330017, "logits/chosen": -2.0687127113342285, "logits/rejected": -2.073645830154419, "logps/chosen": -0.5700651407241821, "logps/rejected": -1.0073726177215576, "loss": 0.4551, "nll_loss": 0.43348783254623413, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.028503259643912315, "rewards/margins": 0.021865375339984894, "rewards/rejected": -0.05036862939596176, "step": 270 }, { "epoch": 1.364190012180268, "grad_norm": 3.78125, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.8365095257759094, "log_odds_ratio": -0.4509243369102478, "logits/chosen": -2.0936789512634277, "logits/rejected": -2.0444083213806152, "logps/chosen": -0.6157883405685425, "logps/rejected": -1.018973469734192, "loss": 0.4629, "nll_loss": 0.440369188785553, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -0.030789416283369064, "rewards/margins": 0.02015925757586956, "rewards/rejected": -0.050948671996593475, "step": 280 }, { "epoch": 1.412911084043849, "grad_norm": 4.0625, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 0.933612048625946, "log_odds_ratio": -0.42886632680892944, "logits/chosen": -2.07072114944458, "logits/rejected": -2.0590546131134033, "logps/chosen": -0.5958141684532166, "logps/rejected": -1.0696933269500732, "loss": 0.4313, "nll_loss": 0.4098600745201111, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.029790710657835007, "rewards/margins": 0.023693958297371864, "rewards/rejected": -0.05348466709256172, "step": 290 }, { "epoch": 1.46163215590743, "grad_norm": 4.125, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 0.8847057223320007, "log_odds_ratio": -0.44255512952804565, "logits/chosen": -2.094334840774536, "logits/rejected": -2.1165547370910645, "logps/chosen": -0.5963028073310852, "logps/rejected": -1.0416440963745117, "loss": 0.4379, "nll_loss": 0.41574278473854065, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.02981514297425747, "rewards/margins": 0.022267062216997147, "rewards/rejected": -0.05208220332860947, "step": 300 }, { "epoch": 1.510353227771011, "grad_norm": 4.84375, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 0.9941097497940063, "log_odds_ratio": -0.4086780548095703, "logits/chosen": -2.080404758453369, "logits/rejected": -2.112900495529175, "logps/chosen": -0.5736903548240662, "logps/rejected": -1.081747055053711, "loss": 0.4603, "nll_loss": 0.4398314952850342, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.028684521093964577, "rewards/margins": 0.02540283277630806, "rewards/rejected": -0.05408735200762749, "step": 310 }, { "epoch": 1.559074299634592, "grad_norm": 4.3125, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 0.883891224861145, "log_odds_ratio": -0.44051599502563477, "logits/chosen": -2.092296600341797, "logits/rejected": -2.066316604614258, "logps/chosen": -0.6078993678092957, "logps/rejected": -1.0367143154144287, "loss": 0.4823, "nll_loss": 0.46028652787208557, "rewards/accuracies": 0.8031250238418579, "rewards/chosen": -0.030394967645406723, "rewards/margins": 0.021440746262669563, "rewards/rejected": -0.051835715770721436, "step": 320 }, { "epoch": 1.607795371498173, "grad_norm": 3.578125, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 0.9730150103569031, "log_odds_ratio": -0.43135911226272583, "logits/chosen": -2.0477657318115234, "logits/rejected": -2.051565408706665, "logps/chosen": -0.5975598096847534, "logps/rejected": -1.0904568433761597, "loss": 0.4697, "nll_loss": 0.4481421113014221, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.02987799420952797, "rewards/margins": 0.024644847959280014, "rewards/rejected": -0.054522834718227386, "step": 330 }, { "epoch": 1.6565164433617539, "grad_norm": 4.09375, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 0.9206117391586304, "log_odds_ratio": -0.43162840604782104, "logits/chosen": -2.1549911499023438, "logits/rejected": -2.1604723930358887, "logps/chosen": -0.5994877815246582, "logps/rejected": -1.04535973072052, "loss": 0.4752, "nll_loss": 0.4536053538322449, "rewards/accuracies": 0.8125, "rewards/chosen": -0.02997438982129097, "rewards/margins": 0.02229359745979309, "rewards/rejected": -0.05226798728108406, "step": 340 }, { "epoch": 1.705237515225335, "grad_norm": 4.6875, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 0.9252131581306458, "log_odds_ratio": -0.44289445877075195, "logits/chosen": -2.0990488529205322, "logits/rejected": -2.1114163398742676, "logps/chosen": -0.601850688457489, "logps/rejected": -1.0636966228485107, "loss": 0.4599, "nll_loss": 0.43774834275245667, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.03009253740310669, "rewards/margins": 0.023092303425073624, "rewards/rejected": -0.05318482965230942, "step": 350 }, { "epoch": 1.753958587088916, "grad_norm": 3.8125, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 0.9648879766464233, "log_odds_ratio": -0.4200161397457123, "logits/chosen": -2.1074578762054443, "logits/rejected": -2.1522512435913086, "logps/chosen": -0.5663856267929077, "logps/rejected": -1.0540645122528076, "loss": 0.4401, "nll_loss": 0.4191323220729828, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.028319280594587326, "rewards/margins": 0.02438395284116268, "rewards/rejected": -0.052703239023685455, "step": 360 }, { "epoch": 1.802679658952497, "grad_norm": 4.90625, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 0.8812922239303589, "log_odds_ratio": -0.459873765707016, "logits/chosen": -2.059195041656494, "logits/rejected": -2.111880302429199, "logps/chosen": -0.6145040392875671, "logps/rejected": -1.0735037326812744, "loss": 0.4598, "nll_loss": 0.4368101954460144, "rewards/accuracies": 0.7906249761581421, "rewards/chosen": -0.030725205317139626, "rewards/margins": 0.022949980571866035, "rewards/rejected": -0.05367518588900566, "step": 370 }, { "epoch": 1.8514007308160778, "grad_norm": 3.71875, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 0.8604068756103516, "log_odds_ratio": -0.4609376788139343, "logits/chosen": -2.0991945266723633, "logits/rejected": -2.093503952026367, "logps/chosen": -0.6222652196884155, "logps/rejected": -1.0542787313461304, "loss": 0.4708, "nll_loss": 0.4477114677429199, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -0.031113261356949806, "rewards/margins": 0.02160067670047283, "rewards/rejected": -0.05271393805742264, "step": 380 }, { "epoch": 1.900121802679659, "grad_norm": 5.09375, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 0.9894090890884399, "log_odds_ratio": -0.4191601872444153, "logits/chosen": -2.1074106693267822, "logits/rejected": -2.1691222190856934, "logps/chosen": -0.5862478017807007, "logps/rejected": -1.0842490196228027, "loss": 0.4614, "nll_loss": 0.440448135137558, "rewards/accuracies": 0.84375, "rewards/chosen": -0.029312390834093094, "rewards/margins": 0.024900058284401894, "rewards/rejected": -0.05421245098114014, "step": 390 }, { "epoch": 1.94884287454324, "grad_norm": 4.28125, "learning_rate": 2.5e-06, "log_odds_chosen": 0.8292287588119507, "log_odds_ratio": -0.4579356610774994, "logits/chosen": -2.0651729106903076, "logits/rejected": -2.0955402851104736, "logps/chosen": -0.6199088096618652, "logps/rejected": -1.0601669549942017, "loss": 0.4434, "nll_loss": 0.4204865097999573, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030995439738035202, "rewards/margins": 0.02201290801167488, "rewards/rejected": -0.05300834774971008, "step": 400 }, { "epoch": 1.997563946406821, "grad_norm": 3.8125, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 1.0317885875701904, "log_odds_ratio": -0.4280542731285095, "logits/chosen": -2.066987991333008, "logits/rejected": -2.069795608520508, "logps/chosen": -0.6221650242805481, "logps/rejected": -1.1912611722946167, "loss": 0.451, "nll_loss": 0.4295925199985504, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.031108248978853226, "rewards/margins": 0.02845480665564537, "rewards/rejected": -0.059563059359788895, "step": 410 }, { "epoch": 2.0462850182704018, "grad_norm": 4.59375, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 1.3798599243164062, "log_odds_ratio": -0.31996873021125793, "logits/chosen": -2.0742099285125732, "logits/rejected": -2.1023483276367188, "logps/chosen": -0.4915800094604492, "logps/rejected": -1.1646963357925415, "loss": 0.3809, "nll_loss": 0.3648887574672699, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -0.02457900531589985, "rewards/margins": 0.033655811101198196, "rewards/rejected": -0.05823482200503349, "step": 420 }, { "epoch": 2.095006090133983, "grad_norm": 4.0, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 1.586724042892456, "log_odds_ratio": -0.28053292632102966, "logits/chosen": -2.0577170848846436, "logits/rejected": -2.0551247596740723, "logps/chosen": -0.46747732162475586, "logps/rejected": -1.2207533121109009, "loss": 0.3688, "nll_loss": 0.35477322340011597, "rewards/accuracies": 0.921875, "rewards/chosen": -0.023373866453766823, "rewards/margins": 0.03766379505395889, "rewards/rejected": -0.06103766709566116, "step": 430 }, { "epoch": 2.143727161997564, "grad_norm": 5.625, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 1.6717441082000732, "log_odds_ratio": -0.26605507731437683, "logits/chosen": -2.0567383766174316, "logits/rejected": -2.067713737487793, "logps/chosen": -0.4729752540588379, "logps/rejected": -1.3001043796539307, "loss": 0.3854, "nll_loss": 0.3720557689666748, "rewards/accuracies": 0.940625011920929, "rewards/chosen": -0.023648761212825775, "rewards/margins": 0.04135645180940628, "rewards/rejected": -0.06500521302223206, "step": 440 }, { "epoch": 2.192448233861145, "grad_norm": 4.96875, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 1.6444238424301147, "log_odds_ratio": -0.26991745829582214, "logits/chosen": -2.2070858478546143, "logits/rejected": -2.1682567596435547, "logps/chosen": -0.4466155469417572, "logps/rejected": -1.245265007019043, "loss": 0.4138, "nll_loss": 0.4003148674964905, "rewards/accuracies": 0.940625011920929, "rewards/chosen": -0.02233077771961689, "rewards/margins": 0.03993247076869011, "rewards/rejected": -0.06226325035095215, "step": 450 }, { "epoch": 2.2411693057247257, "grad_norm": 5.71875, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 1.5212812423706055, "log_odds_ratio": -0.2893092632293701, "logits/chosen": -2.103972911834717, "logits/rejected": -2.1150119304656982, "logps/chosen": -0.4900006353855133, "logps/rejected": -1.2329685688018799, "loss": 0.3907, "nll_loss": 0.37621983885765076, "rewards/accuracies": 0.9375, "rewards/chosen": -0.024500032886862755, "rewards/margins": 0.03714838624000549, "rewards/rejected": -0.0616484172642231, "step": 460 }, { "epoch": 2.289890377588307, "grad_norm": 4.90625, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 1.515355110168457, "log_odds_ratio": -0.2837259769439697, "logits/chosen": -2.0579495429992676, "logits/rejected": -2.06086802482605, "logps/chosen": -0.48503416776657104, "logps/rejected": -1.2265126705169678, "loss": 0.3629, "nll_loss": 0.3486883044242859, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.024251705035567284, "rewards/margins": 0.037073928862810135, "rewards/rejected": -0.06132563203573227, "step": 470 }, { "epoch": 2.338611449451888, "grad_norm": 4.125, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 1.5133790969848633, "log_odds_ratio": -0.2952267527580261, "logits/chosen": -2.070732831954956, "logits/rejected": -2.091557264328003, "logps/chosen": -0.4965738356113434, "logps/rejected": -1.2001986503601074, "loss": 0.3841, "nll_loss": 0.36938291788101196, "rewards/accuracies": 0.921875, "rewards/chosen": -0.02482869103550911, "rewards/margins": 0.03518123924732208, "rewards/rejected": -0.06000993400812149, "step": 480 }, { "epoch": 2.387332521315469, "grad_norm": 3.828125, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 1.6050021648406982, "log_odds_ratio": -0.26656216382980347, "logits/chosen": -2.079113006591797, "logits/rejected": -2.0700507164001465, "logps/chosen": -0.4672022759914398, "logps/rejected": -1.2427198886871338, "loss": 0.379, "nll_loss": 0.36565306782722473, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02336011826992035, "rewards/margins": 0.03877587243914604, "rewards/rejected": -0.06213599443435669, "step": 490 }, { "epoch": 2.43605359317905, "grad_norm": 5.375, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 1.6788904666900635, "log_odds_ratio": -0.27105051279067993, "logits/chosen": -2.0833709239959717, "logits/rejected": -2.119617462158203, "logps/chosen": -0.46906137466430664, "logps/rejected": -1.315861701965332, "loss": 0.3826, "nll_loss": 0.3690149486064911, "rewards/accuracies": 0.9593750238418579, "rewards/chosen": -0.023453067988157272, "rewards/margins": 0.042340025305747986, "rewards/rejected": -0.06579308956861496, "step": 500 }, { "epoch": 2.484774665042631, "grad_norm": 4.90625, "learning_rate": 2.2140372138502386e-06, "log_odds_chosen": 1.6214439868927002, "log_odds_ratio": -0.2923833727836609, "logits/chosen": -2.117619037628174, "logits/rejected": -2.144683599472046, "logps/chosen": -0.483691543340683, "logps/rejected": -1.252079725265503, "loss": 0.4067, "nll_loss": 0.39209839701652527, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0241845790296793, "rewards/margins": 0.03841940313577652, "rewards/rejected": -0.06260398775339127, "step": 510 }, { "epoch": 2.533495736906212, "grad_norm": 4.875, "learning_rate": 2.1926450482675734e-06, "log_odds_chosen": 1.5657522678375244, "log_odds_ratio": -0.2806842625141144, "logits/chosen": -2.1533446311950684, "logits/rejected": -2.1672301292419434, "logps/chosen": -0.4697963297367096, "logps/rejected": -1.2120527029037476, "loss": 0.3891, "nll_loss": 0.37509655952453613, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.02348981983959675, "rewards/margins": 0.03711282089352608, "rewards/rejected": -0.060602642595767975, "step": 520 }, { "epoch": 2.582216808769793, "grad_norm": 4.625, "learning_rate": 2.1718612138153473e-06, "log_odds_chosen": 1.6662880182266235, "log_odds_ratio": -0.2821272611618042, "logits/chosen": -2.0334420204162598, "logits/rejected": -2.054309368133545, "logps/chosen": -0.4613516926765442, "logps/rejected": -1.2510273456573486, "loss": 0.3906, "nll_loss": 0.37650758028030396, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.02306758239865303, "rewards/margins": 0.03948378562927246, "rewards/rejected": -0.06255136430263519, "step": 530 }, { "epoch": 2.630937880633374, "grad_norm": 4.96875, "learning_rate": 2.151657414559676e-06, "log_odds_chosen": 1.5510852336883545, "log_odds_ratio": -0.2845192849636078, "logits/chosen": -2.105376958847046, "logits/rejected": -2.107239246368408, "logps/chosen": -0.481005996465683, "logps/rejected": -1.2433946132659912, "loss": 0.3943, "nll_loss": 0.3801063597202301, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.02405030094087124, "rewards/margins": 0.03811942785978317, "rewards/rejected": -0.06216973066329956, "step": 540 }, { "epoch": 2.679658952496955, "grad_norm": 4.4375, "learning_rate": 2.132007163556104e-06, "log_odds_chosen": 1.535161018371582, "log_odds_ratio": -0.28597292304039, "logits/chosen": -2.1408169269561768, "logits/rejected": -2.1645288467407227, "logps/chosen": -0.4741552472114563, "logps/rejected": -1.1751224994659424, "loss": 0.4205, "nll_loss": 0.4061834216117859, "rewards/accuracies": 0.9281250238418579, "rewards/chosen": -0.023707758635282516, "rewards/margins": 0.035048358142375946, "rewards/rejected": -0.05875612050294876, "step": 550 }, { "epoch": 2.728380024360536, "grad_norm": 4.84375, "learning_rate": 2.1128856368212917e-06, "log_odds_chosen": 1.8219788074493408, "log_odds_ratio": -0.2337840050458908, "logits/chosen": -2.091169595718384, "logits/rejected": -2.1250526905059814, "logps/chosen": -0.4375804364681244, "logps/rejected": -1.326407551765442, "loss": 0.3892, "nll_loss": 0.3775223195552826, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.02187902107834816, "rewards/margins": 0.044441353529691696, "rewards/rejected": -0.06632037460803986, "step": 560 }, { "epoch": 2.7771010962241167, "grad_norm": 4.0, "learning_rate": 2.0942695414584777e-06, "log_odds_chosen": 1.6477181911468506, "log_odds_ratio": -0.2635224461555481, "logits/chosen": -2.0645499229431152, "logits/rejected": -2.097146511077881, "logps/chosen": -0.477451890707016, "logps/rejected": -1.3132137060165405, "loss": 0.3748, "nll_loss": 0.361587256193161, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02387259528040886, "rewards/margins": 0.041788093745708466, "rewards/rejected": -0.06566068530082703, "step": 570 }, { "epoch": 2.825822168087698, "grad_norm": 4.625, "learning_rate": 2.0761369963434992e-06, "log_odds_chosen": 1.7657368183135986, "log_odds_ratio": -0.25923803448677063, "logits/chosen": -2.1280486583709717, "logits/rejected": -2.1294822692871094, "logps/chosen": -0.42457103729248047, "logps/rejected": -1.2813599109649658, "loss": 0.3643, "nll_loss": 0.35132378339767456, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.021228553727269173, "rewards/margins": 0.04283944517374039, "rewards/rejected": -0.06406799703836441, "step": 580 }, { "epoch": 2.874543239951279, "grad_norm": 4.65625, "learning_rate": 2.058467423981546e-06, "log_odds_chosen": 1.6043407917022705, "log_odds_ratio": -0.28762462735176086, "logits/chosen": -2.1606733798980713, "logits/rejected": -2.171504259109497, "logps/chosen": -0.4778304100036621, "logps/rejected": -1.2797237634658813, "loss": 0.4117, "nll_loss": 0.3973469138145447, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -0.023891519755125046, "rewards/margins": 0.04009466618299484, "rewards/rejected": -0.06398618966341019, "step": 590 }, { "epoch": 2.92326431181486, "grad_norm": 3.96875, "learning_rate": 2.0412414523193154e-06, "log_odds_chosen": 1.6591618061065674, "log_odds_ratio": -0.28386634588241577, "logits/chosen": -2.105055809020996, "logits/rejected": -2.16955304145813, "logps/chosen": -0.4936911463737488, "logps/rejected": -1.315927267074585, "loss": 0.3883, "nll_loss": 0.37410837411880493, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -0.02468455769121647, "rewards/margins": 0.04111180454492569, "rewards/rejected": -0.0657963678240776, "step": 600 }, { "epoch": 2.971985383678441, "grad_norm": 5.09375, "learning_rate": 2.0244408254472904e-06, "log_odds_chosen": 1.6492611169815063, "log_odds_ratio": -0.2732219099998474, "logits/chosen": -2.124809741973877, "logits/rejected": -2.1507182121276855, "logps/chosen": -0.477315753698349, "logps/rejected": -1.2909691333770752, "loss": 0.3918, "nll_loss": 0.3781156837940216, "rewards/accuracies": 0.940625011920929, "rewards/chosen": -0.02386578544974327, "rewards/margins": 0.04068267345428467, "rewards/rejected": -0.06454845517873764, "step": 610 }, { "epoch": 2.9963459196102313, "step": 615, "total_flos": 0.0, "train_loss": 0.5590515291787744, "train_runtime": 15994.549, "train_samples_per_second": 1.232, "train_steps_per_second": 0.038 } ], "logging_steps": 10, "max_steps": 615, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }