{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999296814570002, "eval_steps": 500, "global_step": 3555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014063708599957809, "grad_norm": 23.875, "learning_rate": 7.02247191011236e-08, "log_odds_chosen": 0.8099881410598755, "log_odds_ratio": -0.6670631170272827, "logits/chosen": 0.35691261291503906, "logits/rejected": 0.04136817157268524, "logps/chosen": -2.435549259185791, "logps/rejected": -3.192267656326294, "loss": 3.9113, "nll_loss": 3.307011127471924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24355490505695343, "rewards/margins": 0.07567180693149567, "rewards/rejected": -0.3192267417907715, "step": 5 }, { "epoch": 0.0028127417199915617, "grad_norm": 16.5, "learning_rate": 1.404494382022472e-07, "log_odds_chosen": 0.09973736107349396, "log_odds_ratio": -0.8319740295410156, "logits/chosen": 0.4880150854587555, "logits/rejected": 0.18690350651741028, "logps/chosen": -2.3327722549438477, "logps/rejected": -2.4167592525482178, "loss": 3.9321, "nll_loss": 3.0211567878723145, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23327720165252686, "rewards/margins": 0.008398734033107758, "rewards/rejected": -0.2416759431362152, "step": 10 }, { "epoch": 0.0042191125799873426, "grad_norm": 28.375, "learning_rate": 2.106741573033708e-07, "log_odds_chosen": 0.46809667348861694, "log_odds_ratio": -1.068535566329956, "logits/chosen": 0.11707712709903717, "logits/rejected": -0.04653818532824516, "logps/chosen": -3.06805157661438, "logps/rejected": -3.482112407684326, "loss": 4.1233, "nll_loss": 4.485260486602783, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.30680516362190247, "rewards/margins": 0.04140608757734299, "rewards/rejected": -0.34821125864982605, "step": 15 }, { "epoch": 0.005625483439983123, "grad_norm": 26.25, "learning_rate": 2.808988764044944e-07, "log_odds_chosen": 0.4251781404018402, "log_odds_ratio": -0.8865317106246948, "logits/chosen": 0.14278806746006012, "logits/rejected": -0.1758263260126114, "logps/chosen": -2.5326249599456787, "logps/rejected": -2.925166606903076, "loss": 3.9756, "nll_loss": 3.745863437652588, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2532625198364258, "rewards/margins": 0.03925413638353348, "rewards/rejected": -0.29251664876937866, "step": 20 }, { "epoch": 0.007031854299978904, "grad_norm": 34.75, "learning_rate": 3.5112359550561806e-07, "log_odds_chosen": -0.24400389194488525, "log_odds_ratio": -1.3111430406570435, "logits/chosen": 0.28750285506248474, "logits/rejected": 0.3935859799385071, "logps/chosen": -2.9908688068389893, "logps/rejected": -2.76876163482666, "loss": 3.5984, "nll_loss": 3.786576509475708, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.29908689856529236, "rewards/margins": -0.022210732102394104, "rewards/rejected": -0.27687615156173706, "step": 25 }, { "epoch": 0.008438225159974685, "grad_norm": 30.75, "learning_rate": 4.213483146067416e-07, "log_odds_chosen": -0.18453823029994965, "log_odds_ratio": -1.4048383235931396, "logits/chosen": 0.19974537193775177, "logits/rejected": 0.11300679296255112, "logps/chosen": -2.9378466606140137, "logps/rejected": -2.737946033477783, "loss": 3.9317, "nll_loss": 4.119207859039307, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.2937846779823303, "rewards/margins": -0.01999005302786827, "rewards/rejected": -0.27379459142684937, "step": 30 }, { "epoch": 0.009844596019970467, "grad_norm": 14.8125, "learning_rate": 4.915730337078652e-07, "log_odds_chosen": 0.26785966753959656, "log_odds_ratio": -0.9891396760940552, "logits/chosen": 0.1649080067873001, "logits/rejected": 0.061838340014219284, "logps/chosen": -3.313523054122925, "logps/rejected": -3.5287063121795654, "loss": 3.7722, "nll_loss": 4.149771690368652, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3313523232936859, "rewards/margins": 0.021518340334296227, "rewards/rejected": -0.3528706431388855, "step": 35 }, { "epoch": 0.011250966879966247, "grad_norm": 12.5, "learning_rate": 5.617977528089888e-07, "log_odds_chosen": 0.4358310103416443, "log_odds_ratio": -1.1050533056259155, "logits/chosen": 0.384712278842926, "logits/rejected": 0.3466298282146454, "logps/chosen": -2.482825517654419, "logps/rejected": -2.926034927368164, "loss": 3.6796, "nll_loss": 3.56708025932312, "rewards/accuracies": 0.5, "rewards/chosen": -0.2482825517654419, "rewards/margins": 0.04432091861963272, "rewards/rejected": -0.292603462934494, "step": 40 }, { "epoch": 0.012657337739962029, "grad_norm": 11.125, "learning_rate": 6.320224719101125e-07, "log_odds_chosen": 0.1292145997285843, "log_odds_ratio": -1.3003278970718384, "logits/chosen": 0.37478917837142944, "logits/rejected": 0.2303522825241089, "logps/chosen": -3.015897750854492, "logps/rejected": -3.129483699798584, "loss": 3.6349, "nll_loss": 3.1559722423553467, "rewards/accuracies": 0.5, "rewards/chosen": -0.3015897870063782, "rewards/margins": 0.011358583346009254, "rewards/rejected": -0.31294840574264526, "step": 45 }, { "epoch": 0.014063708599957809, "grad_norm": 13.125, "learning_rate": 7.022471910112361e-07, "log_odds_chosen": 0.9067404866218567, "log_odds_ratio": -0.7627468109130859, "logits/chosen": 0.26700717210769653, "logits/rejected": 0.2883050739765167, "logps/chosen": -2.3781630992889404, "logps/rejected": -3.290259838104248, "loss": 3.5748, "nll_loss": 3.8330466747283936, "rewards/accuracies": 0.5, "rewards/chosen": -0.23781633377075195, "rewards/margins": 0.09120965003967285, "rewards/rejected": -0.3290259838104248, "step": 50 }, { "epoch": 0.01547007945995359, "grad_norm": 20.375, "learning_rate": 7.724719101123595e-07, "log_odds_chosen": 0.7076248526573181, "log_odds_ratio": -1.5457459688186646, "logits/chosen": 0.2677770256996155, "logits/rejected": -0.002324029803276062, "logps/chosen": -2.7843823432922363, "logps/rejected": -3.423976182937622, "loss": 3.7694, "nll_loss": 3.2589526176452637, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2784382402896881, "rewards/margins": 0.06395940482616425, "rewards/rejected": -0.34239763021469116, "step": 55 }, { "epoch": 0.01687645031994937, "grad_norm": 11.5, "learning_rate": 8.426966292134832e-07, "log_odds_chosen": 0.19437791407108307, "log_odds_ratio": -0.8209785223007202, "logits/chosen": 0.42011842131614685, "logits/rejected": 0.30973678827285767, "logps/chosen": -1.9150886535644531, "logps/rejected": -2.0626280307769775, "loss": 3.3113, "nll_loss": 2.7434136867523193, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19150885939598083, "rewards/margins": 0.014753949828445911, "rewards/rejected": -0.20626279711723328, "step": 60 }, { "epoch": 0.01828282117994515, "grad_norm": 8.4375, "learning_rate": 9.129213483146068e-07, "log_odds_chosen": -0.5769067406654358, "log_odds_ratio": -1.397226095199585, "logits/chosen": 0.33570200204849243, "logits/rejected": 0.24612624943256378, "logps/chosen": -2.5165371894836426, "logps/rejected": -1.963801383972168, "loss": 3.5497, "nll_loss": 3.5812149047851562, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.2516537308692932, "rewards/margins": -0.055273573845624924, "rewards/rejected": -0.1963801383972168, "step": 65 }, { "epoch": 0.019689192039940934, "grad_norm": 12.25, "learning_rate": 9.831460674157304e-07, "log_odds_chosen": 0.38934630155563354, "log_odds_ratio": -0.7580283880233765, "logits/chosen": 0.6683182716369629, "logits/rejected": 0.12224721908569336, "logps/chosen": -1.6888742446899414, "logps/rejected": -1.999748945236206, "loss": 2.8879, "nll_loss": 2.2645983695983887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1688874214887619, "rewards/margins": 0.031087476760149002, "rewards/rejected": -0.1999748945236206, "step": 70 }, { "epoch": 0.021095562899936714, "grad_norm": 16.125, "learning_rate": 1.053370786516854e-06, "log_odds_chosen": 0.6304991841316223, "log_odds_ratio": -0.6832792162895203, "logits/chosen": 0.39566153287887573, "logits/rejected": 0.08898230642080307, "logps/chosen": -2.1146538257598877, "logps/rejected": -2.7325990200042725, "loss": 2.8228, "nll_loss": 2.631037473678589, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.21146538853645325, "rewards/margins": 0.06179451197385788, "rewards/rejected": -0.2732599079608917, "step": 75 }, { "epoch": 0.022501933759932494, "grad_norm": 8.0, "learning_rate": 1.1235955056179777e-06, "log_odds_chosen": -0.3980503976345062, "log_odds_ratio": -1.2211532592773438, "logits/chosen": 0.26113444566726685, "logits/rejected": 0.28973740339279175, "logps/chosen": -2.7230300903320312, "logps/rejected": -2.336246967315674, "loss": 2.7579, "nll_loss": 2.8366832733154297, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.2723030149936676, "rewards/margins": -0.03867829591035843, "rewards/rejected": -0.23362469673156738, "step": 80 }, { "epoch": 0.023908304619928274, "grad_norm": 13.4375, "learning_rate": 1.1938202247191013e-06, "log_odds_chosen": 0.149004727602005, "log_odds_ratio": -0.8590810894966125, "logits/chosen": 0.5030871629714966, "logits/rejected": 0.10671982914209366, "logps/chosen": -1.9744809865951538, "logps/rejected": -2.104811191558838, "loss": 2.6968, "nll_loss": 2.141251802444458, "rewards/accuracies": 0.5, "rewards/chosen": -0.19744810461997986, "rewards/margins": 0.0130330054089427, "rewards/rejected": -0.21048113703727722, "step": 85 }, { "epoch": 0.025314675479924057, "grad_norm": 11.875, "learning_rate": 1.264044943820225e-06, "log_odds_chosen": 0.8776494860649109, "log_odds_ratio": -0.7410035133361816, "logits/chosen": 0.47830313444137573, "logits/rejected": -0.01859574392437935, "logps/chosen": -2.096220016479492, "logps/rejected": -2.9262399673461914, "loss": 2.5261, "nll_loss": 2.406276226043701, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20962199568748474, "rewards/margins": 0.083002008497715, "rewards/rejected": -0.29262399673461914, "step": 90 }, { "epoch": 0.026721046339919837, "grad_norm": 7.9375, "learning_rate": 1.3342696629213484e-06, "log_odds_chosen": 0.411318302154541, "log_odds_ratio": -0.9822152256965637, "logits/chosen": 0.041368693113327026, "logits/rejected": 0.16315683722496033, "logps/chosen": -2.8642966747283936, "logps/rejected": -3.3535690307617188, "loss": 2.4978, "nll_loss": 2.6924140453338623, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.28642967343330383, "rewards/margins": 0.04892724007368088, "rewards/rejected": -0.3353568911552429, "step": 95 }, { "epoch": 0.028127417199915617, "grad_norm": 3.90625, "learning_rate": 1.4044943820224722e-06, "log_odds_chosen": 0.7318227291107178, "log_odds_ratio": -0.6240326762199402, "logits/chosen": 0.21956415474414825, "logits/rejected": -0.1278270184993744, "logps/chosen": -2.337470531463623, "logps/rejected": -3.0502095222473145, "loss": 2.3359, "nll_loss": 2.661849021911621, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23374707996845245, "rewards/margins": 0.07127388566732407, "rewards/rejected": -0.3050209581851959, "step": 100 }, { "epoch": 0.029533788059911397, "grad_norm": 6.71875, "learning_rate": 1.4747191011235956e-06, "log_odds_chosen": 1.1011137962341309, "log_odds_ratio": -0.7636040449142456, "logits/chosen": 0.2855343818664551, "logits/rejected": 0.18099990487098694, "logps/chosen": -2.057398557662964, "logps/rejected": -3.087165355682373, "loss": 2.2776, "nll_loss": 2.0564818382263184, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20573988556861877, "rewards/margins": 0.10297667980194092, "rewards/rejected": -0.3087165653705597, "step": 105 }, { "epoch": 0.03094015891990718, "grad_norm": 2.640625, "learning_rate": 1.544943820224719e-06, "log_odds_chosen": -0.43961867690086365, "log_odds_ratio": -1.1604411602020264, "logits/chosen": 0.11637775599956512, "logits/rejected": 0.05946706607937813, "logps/chosen": -2.9125075340270996, "logps/rejected": -2.49198579788208, "loss": 2.4607, "nll_loss": 2.7069311141967773, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.2912507653236389, "rewards/margins": -0.04205216467380524, "rewards/rejected": -0.24919860064983368, "step": 110 }, { "epoch": 0.03234652977990296, "grad_norm": 2.328125, "learning_rate": 1.615168539325843e-06, "log_odds_chosen": -0.7555695176124573, "log_odds_ratio": -1.3248316049575806, "logits/chosen": 0.09455225616693497, "logits/rejected": 0.3622409999370575, "logps/chosen": -2.631551742553711, "logps/rejected": -1.9142179489135742, "loss": 2.3807, "nll_loss": 2.1373257637023926, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.2631551921367645, "rewards/margins": -0.07173338532447815, "rewards/rejected": -0.19142180681228638, "step": 115 }, { "epoch": 0.03375290063989874, "grad_norm": 2.296875, "learning_rate": 1.6853932584269663e-06, "log_odds_chosen": -0.5937722325325012, "log_odds_ratio": -1.3305103778839111, "logits/chosen": 0.17280586063861847, "logits/rejected": 0.17354366183280945, "logps/chosen": -2.77569317817688, "logps/rejected": -2.1679203510284424, "loss": 2.2381, "nll_loss": 2.0504109859466553, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.2775692939758301, "rewards/margins": -0.06077728420495987, "rewards/rejected": -0.2167920172214508, "step": 120 }, { "epoch": 0.03515927149989452, "grad_norm": 2.90625, "learning_rate": 1.7556179775280902e-06, "log_odds_chosen": -0.6599341034889221, "log_odds_ratio": -1.344637155532837, "logits/chosen": 0.26881319284439087, "logits/rejected": 0.23464787006378174, "logps/chosen": -2.4841132164001465, "logps/rejected": -1.8618295192718506, "loss": 2.2696, "nll_loss": 2.0306456089019775, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.24841132760047913, "rewards/margins": -0.06222837418317795, "rewards/rejected": -0.18618297576904297, "step": 125 }, { "epoch": 0.0365656423598903, "grad_norm": 2.25, "learning_rate": 1.8258426966292136e-06, "log_odds_chosen": 1.4656527042388916, "log_odds_ratio": -0.699799656867981, "logits/chosen": 0.46662625670433044, "logits/rejected": -0.05031327158212662, "logps/chosen": -1.8286659717559814, "logps/rejected": -3.2496142387390137, "loss": 2.2419, "nll_loss": 2.064641237258911, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18286658823490143, "rewards/margins": 0.14209482073783875, "rewards/rejected": -0.32496142387390137, "step": 130 }, { "epoch": 0.03797201321988609, "grad_norm": 2.0, "learning_rate": 1.8960674157303372e-06, "log_odds_chosen": -0.281602144241333, "log_odds_ratio": -1.1575530767440796, "logits/chosen": 0.3646135926246643, "logits/rejected": 0.10292468965053558, "logps/chosen": -2.2076382637023926, "logps/rejected": -1.889317512512207, "loss": 2.3077, "nll_loss": 2.152637481689453, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22076383233070374, "rewards/margins": -0.031832076609134674, "rewards/rejected": -0.18893174827098846, "step": 135 }, { "epoch": 0.03937838407988187, "grad_norm": 1.5390625, "learning_rate": 1.966292134831461e-06, "log_odds_chosen": 0.01979989930987358, "log_odds_ratio": -1.2207305431365967, "logits/chosen": 0.08597923815250397, "logits/rejected": -0.05333589389920235, "logps/chosen": -2.889000415802002, "logps/rejected": -2.8965907096862793, "loss": 2.0999, "nll_loss": 2.3558218479156494, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28890007734298706, "rewards/margins": 0.0007590189343318343, "rewards/rejected": -0.2896590828895569, "step": 140 }, { "epoch": 0.04078475493987765, "grad_norm": 5.03125, "learning_rate": 2.0365168539325845e-06, "log_odds_chosen": 0.9782406091690063, "log_odds_ratio": -0.7057538032531738, "logits/chosen": 0.1825554370880127, "logits/rejected": 0.11491219699382782, "logps/chosen": -2.4004645347595215, "logps/rejected": -3.380887508392334, "loss": 2.3346, "nll_loss": 2.3498218059539795, "rewards/accuracies": 0.5, "rewards/chosen": -0.240046426653862, "rewards/margins": 0.098042331635952, "rewards/rejected": -0.3380887806415558, "step": 145 }, { "epoch": 0.04219112579987343, "grad_norm": 18.25, "learning_rate": 2.106741573033708e-06, "log_odds_chosen": 0.6918350458145142, "log_odds_ratio": -0.7857062816619873, "logits/chosen": 0.3244979977607727, "logits/rejected": 0.11548665910959244, "logps/chosen": -1.9222161769866943, "logps/rejected": -2.5832555294036865, "loss": 2.3572, "nll_loss": 2.0777595043182373, "rewards/accuracies": 0.5, "rewards/chosen": -0.19222164154052734, "rewards/margins": 0.06610391288995743, "rewards/rejected": -0.25832557678222656, "step": 150 }, { "epoch": 0.04359749665986921, "grad_norm": 2.390625, "learning_rate": 2.1769662921348318e-06, "log_odds_chosen": 0.09566085040569305, "log_odds_ratio": -0.8652345538139343, "logits/chosen": 0.2193879634141922, "logits/rejected": 0.025885796174407005, "logps/chosen": -2.0290634632110596, "logps/rejected": -2.121151924133301, "loss": 2.2886, "nll_loss": 2.070652723312378, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.20290634036064148, "rewards/margins": 0.009208852425217628, "rewards/rejected": -0.21211519837379456, "step": 155 }, { "epoch": 0.04500386751986499, "grad_norm": 1.609375, "learning_rate": 2.2471910112359554e-06, "log_odds_chosen": 0.3223455250263214, "log_odds_ratio": -1.16544771194458, "logits/chosen": 0.2342136800289154, "logits/rejected": 0.018552130088210106, "logps/chosen": -2.3197340965270996, "logps/rejected": -2.6499991416931152, "loss": 2.0612, "nll_loss": 2.120389938354492, "rewards/accuracies": 0.5, "rewards/chosen": -0.23197337985038757, "rewards/margins": 0.03302653506398201, "rewards/rejected": -0.2649999260902405, "step": 160 }, { "epoch": 0.04641023837986077, "grad_norm": 1.9375, "learning_rate": 2.317415730337079e-06, "log_odds_chosen": -0.030302369967103004, "log_odds_ratio": -0.9944000244140625, "logits/chosen": 0.13670721650123596, "logits/rejected": 0.0991348847746849, "logps/chosen": -2.0291049480438232, "logps/rejected": -2.0215368270874023, "loss": 2.1136, "nll_loss": 2.255488872528076, "rewards/accuracies": 0.5, "rewards/chosen": -0.20291049778461456, "rewards/margins": -0.000756812107283622, "rewards/rejected": -0.20215372741222382, "step": 165 }, { "epoch": 0.04781660923985655, "grad_norm": 1.28125, "learning_rate": 2.3876404494382026e-06, "log_odds_chosen": -0.2611065208911896, "log_odds_ratio": -1.1305692195892334, "logits/chosen": 0.16220179200172424, "logits/rejected": 0.12496791034936905, "logps/chosen": -2.1977381706237793, "logps/rejected": -1.8998682498931885, "loss": 1.9801, "nll_loss": 2.043917417526245, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2197737991809845, "rewards/margins": -0.029786983504891396, "rewards/rejected": -0.18998682498931885, "step": 170 }, { "epoch": 0.049222980099852334, "grad_norm": 1.140625, "learning_rate": 2.457865168539326e-06, "log_odds_chosen": -0.07105789333581924, "log_odds_ratio": -1.1078282594680786, "logits/chosen": 0.12216529995203018, "logits/rejected": 0.027472496032714844, "logps/chosen": -2.0756044387817383, "logps/rejected": -1.9600484371185303, "loss": 1.9889, "nll_loss": 1.8455654382705688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2075604498386383, "rewards/margins": -0.011555584147572517, "rewards/rejected": -0.19600485265254974, "step": 175 }, { "epoch": 0.050629350959848114, "grad_norm": 1.78125, "learning_rate": 2.52808988764045e-06, "log_odds_chosen": 1.0096280574798584, "log_odds_ratio": -0.5677310228347778, "logits/chosen": 0.21584467589855194, "logits/rejected": 0.09764888882637024, "logps/chosen": -1.5301876068115234, "logps/rejected": -2.4704413414001465, "loss": 1.9958, "nll_loss": 1.8449862003326416, "rewards/accuracies": 0.75, "rewards/chosen": -0.1530187726020813, "rewards/margins": 0.0940253958106041, "rewards/rejected": -0.2470441311597824, "step": 180 }, { "epoch": 0.052035721819843894, "grad_norm": 1.703125, "learning_rate": 2.598314606741573e-06, "log_odds_chosen": 0.018608326092362404, "log_odds_ratio": -1.0833065509796143, "logits/chosen": 0.20370593667030334, "logits/rejected": -0.01377248764038086, "logps/chosen": -2.2561848163604736, "logps/rejected": -2.2673747539520264, "loss": 2.0576, "nll_loss": 2.2482457160949707, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.22561845183372498, "rewards/margins": 0.0011190299410372972, "rewards/rejected": -0.22673749923706055, "step": 185 }, { "epoch": 0.053442092679839674, "grad_norm": 7.625, "learning_rate": 2.6685393258426968e-06, "log_odds_chosen": 0.6541301608085632, "log_odds_ratio": -0.5813708305358887, "logits/chosen": 0.2531304359436035, "logits/rejected": 0.15365512669086456, "logps/chosen": -1.798595666885376, "logps/rejected": -2.4013969898223877, "loss": 2.0605, "nll_loss": 2.041943073272705, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17985956370830536, "rewards/margins": 0.06028013676404953, "rewards/rejected": -0.2401396930217743, "step": 190 }, { "epoch": 0.054848463539835454, "grad_norm": 0.953125, "learning_rate": 2.7387640449438204e-06, "log_odds_chosen": 0.7900503277778625, "log_odds_ratio": -0.7179470658302307, "logits/chosen": 0.3807224631309509, "logits/rejected": 0.047808244824409485, "logps/chosen": -1.579738974571228, "logps/rejected": -2.2514843940734863, "loss": 1.9199, "nll_loss": 1.7178363800048828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15797391533851624, "rewards/margins": 0.06717453896999359, "rewards/rejected": -0.22514846920967102, "step": 195 }, { "epoch": 0.056254834399831234, "grad_norm": 1.0234375, "learning_rate": 2.8089887640449444e-06, "log_odds_chosen": -0.145659938454628, "log_odds_ratio": -0.9325029253959656, "logits/chosen": 0.23699569702148438, "logits/rejected": -0.00980368535965681, "logps/chosen": -2.0339317321777344, "logps/rejected": -1.8712536096572876, "loss": 2.0171, "nll_loss": 2.233694314956665, "rewards/accuracies": 0.5, "rewards/chosen": -0.20339322090148926, "rewards/margins": -0.016267839819192886, "rewards/rejected": -0.18712535500526428, "step": 200 }, { "epoch": 0.057661205259827014, "grad_norm": 0.8671875, "learning_rate": 2.8792134831460676e-06, "log_odds_chosen": 0.3622695505619049, "log_odds_ratio": -0.7100062370300293, "logits/chosen": 0.16171380877494812, "logits/rejected": 0.16733792424201965, "logps/chosen": -1.59294593334198, "logps/rejected": -1.8916879892349243, "loss": 1.9959, "nll_loss": 1.71487557888031, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15929457545280457, "rewards/margins": 0.029874194413423538, "rewards/rejected": -0.189168781042099, "step": 205 }, { "epoch": 0.059067576119822794, "grad_norm": 1.6328125, "learning_rate": 2.9494382022471913e-06, "log_odds_chosen": 0.14909641444683075, "log_odds_ratio": -0.8013676404953003, "logits/chosen": 0.2264544665813446, "logits/rejected": -0.017689814791083336, "logps/chosen": -1.9082419872283936, "logps/rejected": -2.013080596923828, "loss": 1.9142, "nll_loss": 2.3444690704345703, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.1908242255449295, "rewards/margins": 0.010483830235898495, "rewards/rejected": -0.20130808651447296, "step": 210 }, { "epoch": 0.06047394697981858, "grad_norm": 1.15625, "learning_rate": 3.019662921348315e-06, "log_odds_chosen": 0.8857008218765259, "log_odds_ratio": -0.521682620048523, "logits/chosen": 0.23352375626564026, "logits/rejected": 0.006157740950584412, "logps/chosen": -1.452274203300476, "logps/rejected": -2.248126983642578, "loss": 2.0383, "nll_loss": 2.0336852073669434, "rewards/accuracies": 0.75, "rewards/chosen": -0.14522740244865417, "rewards/margins": 0.07958526909351349, "rewards/rejected": -0.22481270134449005, "step": 215 }, { "epoch": 0.06188031783981436, "grad_norm": 1.5, "learning_rate": 3.089887640449438e-06, "log_odds_chosen": 0.2916131019592285, "log_odds_ratio": -0.8203420639038086, "logits/chosen": 0.14003995060920715, "logits/rejected": 0.21731536090373993, "logps/chosen": -1.518283724784851, "logps/rejected": -1.7996505498886108, "loss": 1.8932, "nll_loss": 1.9349063634872437, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15182837843894958, "rewards/margins": 0.028136665001511574, "rewards/rejected": -0.17996501922607422, "step": 220 }, { "epoch": 0.06328668869981013, "grad_norm": 1.1796875, "learning_rate": 3.160112359550562e-06, "log_odds_chosen": 0.04508267715573311, "log_odds_ratio": -0.8254510760307312, "logits/chosen": 0.1539347618818283, "logits/rejected": -0.02814999222755432, "logps/chosen": -1.587212324142456, "logps/rejected": -1.6075446605682373, "loss": 1.8014, "nll_loss": 1.7296669483184814, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15872123837471008, "rewards/margins": 0.002033218275755644, "rewards/rejected": -0.16075445711612701, "step": 225 }, { "epoch": 0.06469305955980592, "grad_norm": 4.90625, "learning_rate": 3.230337078651686e-06, "log_odds_chosen": -0.3043696880340576, "log_odds_ratio": -0.881208062171936, "logits/chosen": 0.3043157458305359, "logits/rejected": 0.04385875537991524, "logps/chosen": -1.3035953044891357, "logps/rejected": -1.1122184991836548, "loss": 1.7833, "nll_loss": 1.782552719116211, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.13035951554775238, "rewards/margins": -0.019137678667902946, "rewards/rejected": -0.11122184991836548, "step": 230 }, { "epoch": 0.06609943041980171, "grad_norm": 0.74609375, "learning_rate": 3.3005617977528094e-06, "log_odds_chosen": -0.13101080060005188, "log_odds_ratio": -1.0145528316497803, "logits/chosen": 0.09630151093006134, "logits/rejected": 0.09045438468456268, "logps/chosen": -1.6960340738296509, "logps/rejected": -1.576324462890625, "loss": 1.7729, "nll_loss": 1.5516774654388428, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1696033924818039, "rewards/margins": -0.01197095401585102, "rewards/rejected": -0.15763245522975922, "step": 235 }, { "epoch": 0.06750580127979748, "grad_norm": 1.2578125, "learning_rate": 3.3707865168539327e-06, "log_odds_chosen": 0.19244810938835144, "log_odds_ratio": -0.712375819683075, "logits/chosen": 0.2630612254142761, "logits/rejected": 0.15920257568359375, "logps/chosen": -1.307663083076477, "logps/rejected": -1.4294275045394897, "loss": 1.7556, "nll_loss": 1.5486177206039429, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13076630234718323, "rewards/margins": 0.012176448479294777, "rewards/rejected": -0.14294275641441345, "step": 240 }, { "epoch": 0.06891217213979327, "grad_norm": 1.6875, "learning_rate": 3.4410112359550563e-06, "log_odds_chosen": 0.2274598777294159, "log_odds_ratio": -0.8218949437141418, "logits/chosen": 0.1571234166622162, "logits/rejected": 0.050012148916721344, "logps/chosen": -1.4508711099624634, "logps/rejected": -1.6815522909164429, "loss": 1.6875, "nll_loss": 1.500241756439209, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.1450871080160141, "rewards/margins": 0.023068133741617203, "rewards/rejected": -0.168155238032341, "step": 245 }, { "epoch": 0.07031854299978904, "grad_norm": 0.84375, "learning_rate": 3.5112359550561803e-06, "log_odds_chosen": 0.29401296377182007, "log_odds_ratio": -0.657070517539978, "logits/chosen": 0.2837556004524231, "logits/rejected": 0.19875159859657288, "logps/chosen": -1.1506319046020508, "logps/rejected": -1.3889071941375732, "loss": 1.75, "nll_loss": 1.6564500331878662, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11506320536136627, "rewards/margins": 0.023827504366636276, "rewards/rejected": -0.13889071345329285, "step": 250 }, { "epoch": 0.07172491385978483, "grad_norm": 1.0859375, "learning_rate": 3.581460674157304e-06, "log_odds_chosen": 0.6645214557647705, "log_odds_ratio": -0.5389525890350342, "logits/chosen": 0.3206818699836731, "logits/rejected": 0.02894558571279049, "logps/chosen": -1.1443369388580322, "logps/rejected": -1.6359878778457642, "loss": 1.5984, "nll_loss": 1.7387971878051758, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1144336685538292, "rewards/margins": 0.04916510730981827, "rewards/rejected": -0.16359877586364746, "step": 255 }, { "epoch": 0.0731312847197806, "grad_norm": 0.98828125, "learning_rate": 3.651685393258427e-06, "log_odds_chosen": 0.17139050364494324, "log_odds_ratio": -0.7790501713752747, "logits/chosen": -0.0013204365968704224, "logits/rejected": 0.07245022058486938, "logps/chosen": -1.2540526390075684, "logps/rejected": -1.3776941299438477, "loss": 1.7383, "nll_loss": 1.6374698877334595, "rewards/accuracies": 0.5, "rewards/chosen": -0.12540525197982788, "rewards/margins": 0.012364145368337631, "rewards/rejected": -0.137769415974617, "step": 260 }, { "epoch": 0.07453765557977639, "grad_norm": 1.15625, "learning_rate": 3.721910112359551e-06, "log_odds_chosen": -0.09897629916667938, "log_odds_ratio": -0.8483640551567078, "logits/chosen": 0.06739149242639542, "logits/rejected": -0.06628112494945526, "logps/chosen": -1.1900030374526978, "logps/rejected": -1.1566414833068848, "loss": 1.6208, "nll_loss": 1.482954502105713, "rewards/accuracies": 0.5, "rewards/chosen": -0.11900033056735992, "rewards/margins": -0.003336158813908696, "rewards/rejected": -0.11566416174173355, "step": 265 }, { "epoch": 0.07594402643977217, "grad_norm": 0.71484375, "learning_rate": 3.7921348314606744e-06, "log_odds_chosen": 0.614561915397644, "log_odds_ratio": -0.5390773415565491, "logits/chosen": 0.2947372794151306, "logits/rejected": 0.029941141605377197, "logps/chosen": -1.279478669166565, "logps/rejected": -1.805053472518921, "loss": 1.6478, "nll_loss": 1.631860375404358, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1279478818178177, "rewards/margins": 0.05255746841430664, "rewards/rejected": -0.18050536513328552, "step": 270 }, { "epoch": 0.07735039729976795, "grad_norm": 0.71875, "learning_rate": 3.8623595505617985e-06, "log_odds_chosen": 0.70516437292099, "log_odds_ratio": -0.5123119354248047, "logits/chosen": 0.14660146832466125, "logits/rejected": -0.19481025636196136, "logps/chosen": -1.091347098350525, "logps/rejected": -1.606636643409729, "loss": 1.6133, "nll_loss": 1.5436406135559082, "rewards/accuracies": 0.75, "rewards/chosen": -0.10913471132516861, "rewards/margins": 0.051528967916965485, "rewards/rejected": -0.1606636941432953, "step": 275 }, { "epoch": 0.07875676815976373, "grad_norm": 0.82421875, "learning_rate": 3.932584269662922e-06, "log_odds_chosen": 0.42323070764541626, "log_odds_ratio": -0.5872517824172974, "logits/chosen": 0.2884772717952728, "logits/rejected": 0.11415378749370575, "logps/chosen": -0.9810200929641724, "logps/rejected": -1.2774531841278076, "loss": 1.6323, "nll_loss": 1.450991153717041, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09810201078653336, "rewards/margins": 0.029643306508660316, "rewards/rejected": -0.12774533033370972, "step": 280 }, { "epoch": 0.08016313901975951, "grad_norm": 0.8828125, "learning_rate": 4.002808988764045e-06, "log_odds_chosen": 0.240543931722641, "log_odds_ratio": -0.6353433132171631, "logits/chosen": 0.33016669750213623, "logits/rejected": 0.22914664447307587, "logps/chosen": -1.0102094411849976, "logps/rejected": -1.1385387182235718, "loss": 1.529, "nll_loss": 1.167189359664917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10102095454931259, "rewards/margins": 0.012832917273044586, "rewards/rejected": -0.11385388672351837, "step": 285 }, { "epoch": 0.0815695098797553, "grad_norm": 0.79296875, "learning_rate": 4.073033707865169e-06, "log_odds_chosen": 0.24131183326244354, "log_odds_ratio": -0.6839054822921753, "logits/chosen": 0.02991688810288906, "logits/rejected": 0.07139863073825836, "logps/chosen": -1.1842001676559448, "logps/rejected": -1.378699541091919, "loss": 1.6598, "nll_loss": 1.6421235799789429, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11842000484466553, "rewards/margins": 0.019449947401881218, "rewards/rejected": -0.1378699541091919, "step": 290 }, { "epoch": 0.08297588073975107, "grad_norm": 0.63671875, "learning_rate": 4.143258426966292e-06, "log_odds_chosen": -0.0037903576157987118, "log_odds_ratio": -0.752034068107605, "logits/chosen": 0.14442117512226105, "logits/rejected": 0.04852147772908211, "logps/chosen": -1.194391131401062, "logps/rejected": -1.198143482208252, "loss": 1.5798, "nll_loss": 1.469167709350586, "rewards/accuracies": 0.5, "rewards/chosen": -0.11943913996219635, "rewards/margins": 0.00037522317143157125, "rewards/rejected": -0.11981435120105743, "step": 295 }, { "epoch": 0.08438225159974685, "grad_norm": 0.5703125, "learning_rate": 4.213483146067416e-06, "log_odds_chosen": 0.16156907379627228, "log_odds_ratio": -0.6518368721008301, "logits/chosen": 0.3249798119068146, "logits/rejected": 0.059707604348659515, "logps/chosen": -1.191025972366333, "logps/rejected": -1.3182622194290161, "loss": 1.6008, "nll_loss": 1.5026135444641113, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1191026121377945, "rewards/margins": 0.012723615393042564, "rewards/rejected": -0.1318262368440628, "step": 300 }, { "epoch": 0.08578862245974263, "grad_norm": 1.5390625, "learning_rate": 4.28370786516854e-06, "log_odds_chosen": 0.46325716376304626, "log_odds_ratio": -0.5719884634017944, "logits/chosen": 0.04224681854248047, "logits/rejected": -0.18858034908771515, "logps/chosen": -1.2465041875839233, "logps/rejected": -1.5959270000457764, "loss": 1.647, "nll_loss": 1.6545063257217407, "rewards/accuracies": 0.75, "rewards/chosen": -0.12465040385723114, "rewards/margins": 0.03494229167699814, "rewards/rejected": -0.15959270298480988, "step": 305 }, { "epoch": 0.08719499331973841, "grad_norm": 0.8515625, "learning_rate": 4.3539325842696635e-06, "log_odds_chosen": 0.03835631161928177, "log_odds_ratio": -0.7204209566116333, "logits/chosen": 0.21781399846076965, "logits/rejected": 0.1392887532711029, "logps/chosen": -1.0911667346954346, "logps/rejected": -1.093153953552246, "loss": 1.5681, "nll_loss": 1.530747652053833, "rewards/accuracies": 0.5, "rewards/chosen": -0.10911668837070465, "rewards/margins": 0.0001987170398933813, "rewards/rejected": -0.10931539535522461, "step": 310 }, { "epoch": 0.0886013641797342, "grad_norm": 0.7578125, "learning_rate": 4.424157303370787e-06, "log_odds_chosen": 0.3542148470878601, "log_odds_ratio": -0.6624723672866821, "logits/chosen": 0.10045752674341202, "logits/rejected": -0.00516448775306344, "logps/chosen": -1.1036803722381592, "logps/rejected": -1.3415453433990479, "loss": 1.5786, "nll_loss": 1.6732912063598633, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11036805063486099, "rewards/margins": 0.023786501958966255, "rewards/rejected": -0.1341545283794403, "step": 315 }, { "epoch": 0.09000773503972997, "grad_norm": 3.375, "learning_rate": 4.494382022471911e-06, "log_odds_chosen": 0.48890742659568787, "log_odds_ratio": -0.5910844802856445, "logits/chosen": 0.04471005126833916, "logits/rejected": -0.138756662607193, "logps/chosen": -1.1005867719650269, "logps/rejected": -1.4969103336334229, "loss": 1.6061, "nll_loss": 1.6264108419418335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11005868017673492, "rewards/margins": 0.03963235393166542, "rewards/rejected": -0.14969103038311005, "step": 320 }, { "epoch": 0.09141410589972576, "grad_norm": 0.89453125, "learning_rate": 4.564606741573034e-06, "log_odds_chosen": 0.21167974174022675, "log_odds_ratio": -0.7039065361022949, "logits/chosen": -0.022272679954767227, "logits/rejected": -0.04252483695745468, "logps/chosen": -1.2426739931106567, "logps/rejected": -1.4039318561553955, "loss": 1.6706, "nll_loss": 1.8168179988861084, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12426741421222687, "rewards/margins": 0.016125772148370743, "rewards/rejected": -0.1403931826353073, "step": 325 }, { "epoch": 0.09282047675972153, "grad_norm": 1.4140625, "learning_rate": 4.634831460674158e-06, "log_odds_chosen": 0.8199083209037781, "log_odds_ratio": -0.46476811170578003, "logits/chosen": 0.24483537673950195, "logits/rejected": 0.0019326538313180208, "logps/chosen": -0.9420560598373413, "logps/rejected": -1.5152714252471924, "loss": 1.526, "nll_loss": 1.414475440979004, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09420560300350189, "rewards/margins": 0.05732153728604317, "rewards/rejected": -0.15152713656425476, "step": 330 }, { "epoch": 0.09422684761971732, "grad_norm": 0.98046875, "learning_rate": 4.705056179775281e-06, "log_odds_chosen": 0.05631124973297119, "log_odds_ratio": -0.7453809976577759, "logits/chosen": 0.04857509210705757, "logits/rejected": -0.03552461788058281, "logps/chosen": -1.201915979385376, "logps/rejected": -1.2734609842300415, "loss": 1.6201, "nll_loss": 1.506476640701294, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12019158899784088, "rewards/margins": 0.007154509425163269, "rewards/rejected": -0.12734608352184296, "step": 335 }, { "epoch": 0.0956332184797131, "grad_norm": 0.92578125, "learning_rate": 4.775280898876405e-06, "log_odds_chosen": 0.1800452619791031, "log_odds_ratio": -0.6534844636917114, "logits/chosen": 0.08206330239772797, "logits/rejected": -0.0701373964548111, "logps/chosen": -1.2106133699417114, "logps/rejected": -1.341378927230835, "loss": 1.4736, "nll_loss": 1.4450690746307373, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12106132507324219, "rewards/margins": 0.013076579198241234, "rewards/rejected": -0.13413789868354797, "step": 340 }, { "epoch": 0.09703958933970888, "grad_norm": 1.09375, "learning_rate": 4.8455056179775285e-06, "log_odds_chosen": 0.14819678664207458, "log_odds_ratio": -0.701557457447052, "logits/chosen": -0.08788873255252838, "logits/rejected": -0.1474021077156067, "logps/chosen": -1.1518551111221313, "logps/rejected": -1.267639398574829, "loss": 1.5814, "nll_loss": 1.5248239040374756, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11518549919128418, "rewards/margins": 0.011578412726521492, "rewards/rejected": -0.1267639398574829, "step": 345 }, { "epoch": 0.09844596019970467, "grad_norm": 1.2578125, "learning_rate": 4.915730337078652e-06, "log_odds_chosen": 0.19631382822990417, "log_odds_ratio": -0.7674649953842163, "logits/chosen": 0.1635451465845108, "logits/rejected": -0.04669942334294319, "logps/chosen": -1.2185736894607544, "logps/rejected": -1.3684440851211548, "loss": 1.5616, "nll_loss": 1.4181640148162842, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12185736000537872, "rewards/margins": 0.014987033791840076, "rewards/rejected": -0.13684441149234772, "step": 350 }, { "epoch": 0.09985233105970044, "grad_norm": 0.76171875, "learning_rate": 4.985955056179776e-06, "log_odds_chosen": 0.5495853424072266, "log_odds_ratio": -0.5390895009040833, "logits/chosen": -0.0019469677936285734, "logits/rejected": 0.007418841123580933, "logps/chosen": -0.917323887348175, "logps/rejected": -1.2796287536621094, "loss": 1.6679, "nll_loss": 1.480374813079834, "rewards/accuracies": 0.75, "rewards/chosen": -0.09173239022493362, "rewards/margins": 0.03623048588633537, "rewards/rejected": -0.1279628723859787, "step": 355 }, { "epoch": 0.10125870191969623, "grad_norm": 2.484375, "learning_rate": 4.999980711400201e-06, "log_odds_chosen": 0.15539391338825226, "log_odds_ratio": -0.6699272394180298, "logits/chosen": 0.05738813802599907, "logits/rejected": -0.13643920421600342, "logps/chosen": -0.9270496368408203, "logps/rejected": -1.0245447158813477, "loss": 1.6375, "nll_loss": 1.5049155950546265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09270496666431427, "rewards/margins": 0.009749513119459152, "rewards/rejected": -0.10245448350906372, "step": 360 }, { "epoch": 0.102665072779692, "grad_norm": 1.2421875, "learning_rate": 4.999902351973632e-06, "log_odds_chosen": 0.3612229526042938, "log_odds_ratio": -0.6697491407394409, "logits/chosen": 0.2046525478363037, "logits/rejected": -0.2595598101615906, "logps/chosen": -1.0786577463150024, "logps/rejected": -1.2872159481048584, "loss": 1.5486, "nll_loss": 1.5875145196914673, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10786578804254532, "rewards/margins": 0.020855823531746864, "rewards/rejected": -0.12872160971164703, "step": 365 }, { "epoch": 0.10407144363968779, "grad_norm": 1.0859375, "learning_rate": 4.999763718070656e-06, "log_odds_chosen": 0.2860111594200134, "log_odds_ratio": -0.6612197160720825, "logits/chosen": 0.23584775626659393, "logits/rejected": -0.477583646774292, "logps/chosen": -1.2212624549865723, "logps/rejected": -1.4150340557098389, "loss": 1.5084, "nll_loss": 1.620976209640503, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1221262589097023, "rewards/margins": 0.019377145916223526, "rewards/rejected": -0.14150340855121613, "step": 370 }, { "epoch": 0.10547781449968356, "grad_norm": 0.5703125, "learning_rate": 4.999564813033837e-06, "log_odds_chosen": 0.47208815813064575, "log_odds_ratio": -0.6141301989555359, "logits/chosen": 0.3282993733882904, "logits/rejected": -0.17076356709003448, "logps/chosen": -1.0655587911605835, "logps/rejected": -1.4597357511520386, "loss": 1.4887, "nll_loss": 1.4764500856399536, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10655587911605835, "rewards/margins": 0.03941771388053894, "rewards/rejected": -0.14597360789775848, "step": 375 }, { "epoch": 0.10688418535967935, "grad_norm": 1.0078125, "learning_rate": 4.9993056416589215e-06, "log_odds_chosen": 0.4635187089443207, "log_odds_ratio": -0.6125253438949585, "logits/chosen": 0.09420565515756607, "logits/rejected": -0.2581847310066223, "logps/chosen": -1.0355606079101562, "logps/rejected": -1.3603068590164185, "loss": 1.6089, "nll_loss": 1.6906983852386475, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10355605185031891, "rewards/margins": 0.032474637031555176, "rewards/rejected": -0.13603070378303528, "step": 380 }, { "epoch": 0.10829055621967512, "grad_norm": 0.6640625, "learning_rate": 4.9989862101947215e-06, "log_odds_chosen": 0.2843974530696869, "log_odds_ratio": -0.636595606803894, "logits/chosen": 0.014289943501353264, "logits/rejected": -0.03619622439146042, "logps/chosen": -1.036195158958435, "logps/rejected": -1.2099014520645142, "loss": 1.5497, "nll_loss": 1.487868309020996, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1036195158958435, "rewards/margins": 0.01737063005566597, "rewards/rejected": -0.12099014222621918, "step": 385 }, { "epoch": 0.10969692707967091, "grad_norm": 0.55859375, "learning_rate": 4.998606526342963e-06, "log_odds_chosen": 0.6098340153694153, "log_odds_ratio": -0.49714794754981995, "logits/chosen": 0.10547232627868652, "logits/rejected": -0.30191439390182495, "logps/chosen": -0.9376096725463867, "logps/rejected": -1.3693550825119019, "loss": 1.5585, "nll_loss": 1.4435722827911377, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09376096725463867, "rewards/margins": 0.04317455366253853, "rewards/rejected": -0.1369355171918869, "step": 390 }, { "epoch": 0.1111032979396667, "grad_norm": 0.70703125, "learning_rate": 4.998166599258102e-06, "log_odds_chosen": -0.2814989984035492, "log_odds_ratio": -0.9766399264335632, "logits/chosen": 0.017055341973900795, "logits/rejected": 0.1271597445011139, "logps/chosen": -1.3258157968521118, "logps/rejected": -1.114498496055603, "loss": 1.535, "nll_loss": 1.4724485874176025, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.13258156180381775, "rewards/margins": -0.021131718531250954, "rewards/rejected": -0.11144986003637314, "step": 395 }, { "epoch": 0.11250966879966247, "grad_norm": 1.1484375, "learning_rate": 4.997666439547102e-06, "log_odds_chosen": 0.12479138374328613, "log_odds_ratio": -0.6825396418571472, "logits/chosen": 0.1605800986289978, "logits/rejected": -0.03981009125709534, "logps/chosen": -1.0353831052780151, "logps/rejected": -1.1214975118637085, "loss": 1.5717, "nll_loss": 1.446873664855957, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10353831201791763, "rewards/margins": 0.008611435070633888, "rewards/rejected": -0.11214976012706757, "step": 400 }, { "epoch": 0.11391603965965826, "grad_norm": 0.7734375, "learning_rate": 4.997106059269182e-06, "log_odds_chosen": 0.5131736993789673, "log_odds_ratio": -0.5569295883178711, "logits/chosen": 0.11948621273040771, "logits/rejected": 0.006663101725280285, "logps/chosen": -0.6998155117034912, "logps/rejected": -0.9955805540084839, "loss": 1.6032, "nll_loss": 1.5056767463684082, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06998156011104584, "rewards/margins": 0.029576506465673447, "rewards/rejected": -0.09955805540084839, "step": 405 }, { "epoch": 0.11532241051965403, "grad_norm": 1.703125, "learning_rate": 4.996485471935518e-06, "log_odds_chosen": 0.12343521416187286, "log_odds_ratio": -0.7595449686050415, "logits/chosen": 0.14384707808494568, "logits/rejected": 0.16315032541751862, "logps/chosen": -1.0936092138290405, "logps/rejected": -1.1240942478179932, "loss": 1.5141, "nll_loss": 1.4012603759765625, "rewards/accuracies": 0.5, "rewards/chosen": -0.10936091840267181, "rewards/margins": 0.00304849399253726, "rewards/rejected": -0.11240942776203156, "step": 410 }, { "epoch": 0.11672878137964982, "grad_norm": 0.9140625, "learning_rate": 4.995804692508927e-06, "log_odds_chosen": 0.061908699572086334, "log_odds_ratio": -0.766377329826355, "logits/chosen": 0.24375347793102264, "logits/rejected": 0.2323276698589325, "logps/chosen": -0.9721105694770813, "logps/rejected": -1.0543268918991089, "loss": 1.4151, "nll_loss": 1.2445567846298218, "rewards/accuracies": 0.5, "rewards/chosen": -0.09721106290817261, "rewards/margins": 0.008221631869673729, "rewards/rejected": -0.10543270409107208, "step": 415 }, { "epoch": 0.11813515223964559, "grad_norm": 0.62890625, "learning_rate": 4.9950637374035e-06, "log_odds_chosen": 0.07634903490543365, "log_odds_ratio": -0.7504058480262756, "logits/chosen": 0.02690283954143524, "logits/rejected": -0.2870177626609802, "logps/chosen": -1.129374384880066, "logps/rejected": -1.203255295753479, "loss": 1.5333, "nll_loss": 1.7005048990249634, "rewards/accuracies": 0.5, "rewards/chosen": -0.11293745040893555, "rewards/margins": 0.00738809397444129, "rewards/rejected": -0.12032552808523178, "step": 420 }, { "epoch": 0.11954152309964138, "grad_norm": 0.6640625, "learning_rate": 4.994262624484205e-06, "log_odds_chosen": 0.7215684652328491, "log_odds_ratio": -0.52166748046875, "logits/chosen": 0.1289384961128235, "logits/rejected": -0.4422905445098877, "logps/chosen": -0.9521444439888, "logps/rejected": -1.393911600112915, "loss": 1.5728, "nll_loss": 1.7155725955963135, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09521444886922836, "rewards/margins": 0.044176697731018066, "rewards/rejected": -0.13939115405082703, "step": 425 }, { "epoch": 0.12094789395963716, "grad_norm": 0.875, "learning_rate": 4.993401373066463e-06, "log_odds_chosen": 0.20202788710594177, "log_odds_ratio": -0.6701411008834839, "logits/chosen": 0.09874279797077179, "logits/rejected": -0.14084379374980927, "logps/chosen": -0.9783962368965149, "logps/rejected": -1.1814371347427368, "loss": 1.5083, "nll_loss": 1.4307975769042969, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09783962368965149, "rewards/margins": 0.020304083824157715, "rewards/rejected": -0.1181437149643898, "step": 430 }, { "epoch": 0.12235426481963294, "grad_norm": 1.1171875, "learning_rate": 4.992480003915675e-06, "log_odds_chosen": -0.13087473809719086, "log_odds_ratio": -0.8643546104431152, "logits/chosen": 0.27809780836105347, "logits/rejected": 0.1468941867351532, "logps/chosen": -1.152295470237732, "logps/rejected": -1.062312364578247, "loss": 1.5903, "nll_loss": 1.2978155612945557, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.11522956192493439, "rewards/margins": -0.008998315781354904, "rewards/rejected": -0.10623123496770859, "step": 435 }, { "epoch": 0.12376063567962872, "grad_norm": 1.328125, "learning_rate": 4.991498539246728e-06, "log_odds_chosen": 0.3621232807636261, "log_odds_ratio": -0.6083245277404785, "logits/chosen": 0.23023250699043274, "logits/rejected": -0.058020271360874176, "logps/chosen": -1.0368616580963135, "logps/rejected": -1.2852314710617065, "loss": 1.4358, "nll_loss": 1.2882846593856812, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10368617624044418, "rewards/margins": 0.024836981669068336, "rewards/rejected": -0.12852314114570618, "step": 440 }, { "epoch": 0.1251670065396245, "grad_norm": 0.4765625, "learning_rate": 4.990457002723452e-06, "log_odds_chosen": 0.43172144889831543, "log_odds_ratio": -0.6068316102027893, "logits/chosen": 0.2114812582731247, "logits/rejected": -0.2429969310760498, "logps/chosen": -1.0138442516326904, "logps/rejected": -1.36911141872406, "loss": 1.4516, "nll_loss": 1.4831212759017944, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10138442367315292, "rewards/margins": 0.035526715219020844, "rewards/rejected": -0.13691113889217377, "step": 445 }, { "epoch": 0.12657337739962027, "grad_norm": 0.71484375, "learning_rate": 4.989355419458055e-06, "log_odds_chosen": 0.5212758183479309, "log_odds_ratio": -0.6297636032104492, "logits/chosen": 0.08004938811063766, "logits/rejected": -0.07892777025699615, "logps/chosen": -1.1437304019927979, "logps/rejected": -1.4948976039886475, "loss": 1.5292, "nll_loss": 1.4960545301437378, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11437302827835083, "rewards/margins": 0.03511672094464302, "rewards/rejected": -0.14948976039886475, "step": 450 }, { "epoch": 0.12797974825961606, "grad_norm": 1.046875, "learning_rate": 4.988193816010518e-06, "log_odds_chosen": 0.17997342348098755, "log_odds_ratio": -0.7037402391433716, "logits/chosen": -0.012340274639427662, "logits/rejected": -0.11124049127101898, "logps/chosen": -0.9398072361946106, "logps/rejected": -1.043228268623352, "loss": 1.5305, "nll_loss": 1.656345009803772, "rewards/accuracies": 0.5, "rewards/chosen": -0.09398071467876434, "rewards/margins": 0.010342110879719257, "rewards/rejected": -0.10432282835245132, "step": 455 }, { "epoch": 0.12938611911961184, "grad_norm": 0.6484375, "learning_rate": 4.98697222038795e-06, "log_odds_chosen": 0.836463451385498, "log_odds_ratio": -0.4590897560119629, "logits/chosen": 0.2174886167049408, "logits/rejected": -0.2220270186662674, "logps/chosen": -0.8792584538459778, "logps/rejected": -1.4695708751678467, "loss": 1.4685, "nll_loss": 1.4058226346969604, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08792584389448166, "rewards/margins": 0.05903124809265137, "rewards/rejected": -0.14695709943771362, "step": 460 }, { "epoch": 0.13079248997960763, "grad_norm": 0.63671875, "learning_rate": 4.985690662043916e-06, "log_odds_chosen": 0.169576495885849, "log_odds_ratio": -0.7633191347122192, "logits/chosen": 0.09253176301717758, "logits/rejected": -0.011513747274875641, "logps/chosen": -1.078896164894104, "logps/rejected": -1.1068412065505981, "loss": 1.5252, "nll_loss": 1.5368982553482056, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10788961499929428, "rewards/margins": 0.0027945064939558506, "rewards/rejected": -0.1106841191649437, "step": 465 }, { "epoch": 0.13219886083960342, "grad_norm": 0.4296875, "learning_rate": 4.984349171877726e-06, "log_odds_chosen": 0.3642016053199768, "log_odds_ratio": -0.5838258862495422, "logits/chosen": 0.016974186524748802, "logits/rejected": -0.009120392613112926, "logps/chosen": -0.9194883108139038, "logps/rejected": -1.1762017011642456, "loss": 1.5658, "nll_loss": 1.5235271453857422, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09194884449243546, "rewards/margins": 0.025671344250440598, "rewards/rejected": -0.11762018501758575, "step": 470 }, { "epoch": 0.13360523169959918, "grad_norm": 1.71875, "learning_rate": 4.9829477822336905e-06, "log_odds_chosen": 0.19526013731956482, "log_odds_ratio": -0.7123385667800903, "logits/chosen": 0.017217490822076797, "logits/rejected": -0.0015163153875619173, "logps/chosen": -1.1902854442596436, "logps/rejected": -1.3572930097579956, "loss": 1.4638, "nll_loss": 1.6305532455444336, "rewards/accuracies": 0.5, "rewards/chosen": -0.11902855336666107, "rewards/margins": 0.016700739040970802, "rewards/rejected": -0.13572928309440613, "step": 475 }, { "epoch": 0.13501160255959496, "grad_norm": 0.58984375, "learning_rate": 4.981486526900339e-06, "log_odds_chosen": -0.0772426575422287, "log_odds_ratio": -0.8408387899398804, "logits/chosen": -0.21625415980815887, "logits/rejected": -0.19397906959056854, "logps/chosen": -1.1577032804489136, "logps/rejected": -1.0639727115631104, "loss": 1.516, "nll_loss": 1.723187804222107, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.11577033996582031, "rewards/margins": -0.009373062290251255, "rewards/rejected": -0.10639727115631104, "step": 480 }, { "epoch": 0.13641797341959075, "grad_norm": 0.85546875, "learning_rate": 4.9799654411096095e-06, "log_odds_chosen": 0.534473717212677, "log_odds_ratio": -0.5868498086929321, "logits/chosen": 0.2728256583213806, "logits/rejected": -0.06836424767971039, "logps/chosen": -0.9803134202957153, "logps/rejected": -1.4238746166229248, "loss": 1.4931, "nll_loss": 1.3809219598770142, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09803132712841034, "rewards/margins": 0.04435613378882408, "rewards/rejected": -0.1423874795436859, "step": 485 }, { "epoch": 0.13782434427958654, "grad_norm": 1.0703125, "learning_rate": 4.978384561535994e-06, "log_odds_chosen": 0.2063991129398346, "log_odds_ratio": -0.7101866006851196, "logits/chosen": 0.0015419780975207686, "logits/rejected": -0.1946432739496231, "logps/chosen": -1.2708717584609985, "logps/rejected": -1.3941092491149902, "loss": 1.5895, "nll_loss": 1.6133596897125244, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12708717584609985, "rewards/margins": 0.012323752045631409, "rewards/rejected": -0.13941094279289246, "step": 490 }, { "epoch": 0.1392307151395823, "grad_norm": 1.0546875, "learning_rate": 4.976743926295655e-06, "log_odds_chosen": 0.042014528065919876, "log_odds_ratio": -0.7115843296051025, "logits/chosen": 0.24858923256397247, "logits/rejected": 0.1665419489145279, "logps/chosen": -1.0768249034881592, "logps/rejected": -1.092078447341919, "loss": 1.4598, "nll_loss": 1.6003639698028564, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1076824888586998, "rewards/margins": 0.0015253443270921707, "rewards/rejected": -0.10920783132314682, "step": 495 }, { "epoch": 0.14063708599957808, "grad_norm": 0.5703125, "learning_rate": 4.975043574945512e-06, "log_odds_chosen": 0.0838036760687828, "log_odds_ratio": -0.7223269939422607, "logits/chosen": 0.05284743383526802, "logits/rejected": -0.08719642460346222, "logps/chosen": -1.0573341846466064, "logps/rejected": -1.1373835802078247, "loss": 1.4152, "nll_loss": 1.4871622323989868, "rewards/accuracies": 0.5, "rewards/chosen": -0.10573341697454453, "rewards/margins": 0.008004938252270222, "rewards/rejected": -0.11373835802078247, "step": 500 }, { "epoch": 0.14204345685957387, "grad_norm": 0.72265625, "learning_rate": 4.97328354848228e-06, "log_odds_chosen": 0.3124513030052185, "log_odds_ratio": -0.6181926727294922, "logits/chosen": 0.16835010051727295, "logits/rejected": 0.041867442429065704, "logps/chosen": -0.9111245274543762, "logps/rejected": -1.138530969619751, "loss": 1.3475, "nll_loss": 1.3216100931167603, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09111244976520538, "rewards/margins": 0.02274065464735031, "rewards/rejected": -0.1138530969619751, "step": 505 }, { "epoch": 0.14344982771956966, "grad_norm": 0.96875, "learning_rate": 4.971463889341484e-06, "log_odds_chosen": 0.3187350630760193, "log_odds_ratio": -0.5841793417930603, "logits/chosen": 0.18942035734653473, "logits/rejected": 0.06515751779079437, "logps/chosen": -0.8589506149291992, "logps/rejected": -1.076509714126587, "loss": 1.4424, "nll_loss": 1.2851884365081787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08589507639408112, "rewards/margins": 0.021755896508693695, "rewards/rejected": -0.10765095800161362, "step": 510 }, { "epoch": 0.14485619857956544, "grad_norm": 0.73046875, "learning_rate": 4.969584641396442e-06, "log_odds_chosen": 0.4171040952205658, "log_odds_ratio": -0.6752759218215942, "logits/chosen": 0.28343451023101807, "logits/rejected": -0.119059719145298, "logps/chosen": -0.9824682474136353, "logps/rejected": -1.2212116718292236, "loss": 1.4246, "nll_loss": 1.3154187202453613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09824682772159576, "rewards/margins": 0.02387436106801033, "rewards/rejected": -0.1221211776137352, "step": 515 }, { "epoch": 0.1462625694395612, "grad_norm": 0.90234375, "learning_rate": 4.967645849957197e-06, "log_odds_chosen": 0.1666927933692932, "log_odds_ratio": -0.7551862001419067, "logits/chosen": -0.12206075340509415, "logits/rejected": -0.02794760838150978, "logps/chosen": -0.9220685958862305, "logps/rejected": -1.0989015102386475, "loss": 1.4267, "nll_loss": 1.4034297466278076, "rewards/accuracies": 0.5, "rewards/chosen": -0.09220688045024872, "rewards/margins": 0.01768328621983528, "rewards/rejected": -0.10989014804363251, "step": 520 }, { "epoch": 0.147668940299557, "grad_norm": 0.6953125, "learning_rate": 4.965647561769429e-06, "log_odds_chosen": -0.01692991331219673, "log_odds_ratio": -0.7631421685218811, "logits/chosen": 0.2675221860408783, "logits/rejected": -0.0004905223613604903, "logps/chosen": -1.0240669250488281, "logps/rejected": -0.9854789972305298, "loss": 1.4508, "nll_loss": 1.454949975013733, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10240669548511505, "rewards/margins": -0.0038588023744523525, "rewards/rejected": -0.09854789823293686, "step": 525 }, { "epoch": 0.14907531115955278, "grad_norm": 1.3125, "learning_rate": 4.96358982501333e-06, "log_odds_chosen": 0.5942908525466919, "log_odds_ratio": -0.521664023399353, "logits/chosen": 0.15052922070026398, "logits/rejected": -0.12803643941879272, "logps/chosen": -0.9777441024780273, "logps/rejected": -1.4171960353851318, "loss": 1.5198, "nll_loss": 1.4873838424682617, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09777440875768661, "rewards/margins": 0.04394518584012985, "rewards/rejected": -0.14171959459781647, "step": 530 }, { "epoch": 0.15048168201954856, "grad_norm": 0.984375, "learning_rate": 4.961472689302441e-06, "log_odds_chosen": 0.5982332825660706, "log_odds_ratio": -0.5181711912155151, "logits/chosen": 0.05600785091519356, "logits/rejected": -0.03996270149946213, "logps/chosen": -0.924593448638916, "logps/rejected": -1.2906975746154785, "loss": 1.5662, "nll_loss": 1.4862396717071533, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09245933592319489, "rewards/margins": 0.03661042079329491, "rewards/rejected": -0.1290697604417801, "step": 535 }, { "epoch": 0.15188805287954435, "grad_norm": 0.72265625, "learning_rate": 4.959296205682454e-06, "log_odds_chosen": 0.3825303912162781, "log_odds_ratio": -0.6163605451583862, "logits/chosen": 0.10863487422466278, "logits/rejected": -0.2044816017150879, "logps/chosen": -0.8504983186721802, "logps/rejected": -1.0916732549667358, "loss": 1.4963, "nll_loss": 1.5003145933151245, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08504984527826309, "rewards/margins": 0.02411748841404915, "rewards/rejected": -0.10916732251644135, "step": 540 }, { "epoch": 0.1532944237395401, "grad_norm": 0.8046875, "learning_rate": 4.957060426629984e-06, "log_odds_chosen": 0.296190083026886, "log_odds_ratio": -0.6173704862594604, "logits/chosen": 0.22966690361499786, "logits/rejected": 0.16270211338996887, "logps/chosen": -0.8794999122619629, "logps/rejected": -1.0589183568954468, "loss": 1.4761, "nll_loss": 1.3681291341781616, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08794999867677689, "rewards/margins": 0.017941845580935478, "rewards/rejected": -0.10589183866977692, "step": 545 }, { "epoch": 0.1547007945995359, "grad_norm": 0.89453125, "learning_rate": 4.954765406051299e-06, "log_odds_chosen": 0.06485619395971298, "log_odds_ratio": -0.7391001582145691, "logits/chosen": -0.06438665091991425, "logits/rejected": 0.12197915464639664, "logps/chosen": -1.1314219236373901, "logps/rejected": -1.1761893033981323, "loss": 1.4169, "nll_loss": 1.3700284957885742, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.11314219236373901, "rewards/margins": 0.004476743750274181, "rewards/rejected": -0.11761893332004547, "step": 550 }, { "epoch": 0.15610716545953168, "grad_norm": 0.70703125, "learning_rate": 4.952411199281027e-06, "log_odds_chosen": 0.4550943970680237, "log_odds_ratio": -0.5818012952804565, "logits/chosen": 0.048305265605449677, "logits/rejected": -0.13713420927524567, "logps/chosen": -1.0368098020553589, "logps/rejected": -1.358357548713684, "loss": 1.5256, "nll_loss": 1.443561315536499, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10368098318576813, "rewards/margins": 0.03215476870536804, "rewards/rejected": -0.13583573698997498, "step": 555 }, { "epoch": 0.15751353631952747, "grad_norm": 0.64453125, "learning_rate": 4.9499978630808175e-06, "log_odds_chosen": 0.20077376067638397, "log_odds_ratio": -0.6271744966506958, "logits/chosen": 0.06538190692663193, "logits/rejected": -0.004076042678207159, "logps/chosen": -0.9929560422897339, "logps/rejected": -1.1243157386779785, "loss": 1.4502, "nll_loss": 1.4301426410675049, "rewards/accuracies": 0.75, "rewards/chosen": -0.09929562360048294, "rewards/margins": 0.013135967776179314, "rewards/rejected": -0.1124315857887268, "step": 560 }, { "epoch": 0.15891990717952323, "grad_norm": 0.87109375, "learning_rate": 4.9475254556379735e-06, "log_odds_chosen": 0.15719819068908691, "log_odds_ratio": -0.6915684938430786, "logits/chosen": 0.20628222823143005, "logits/rejected": -0.013370787724852562, "logps/chosen": -0.9851440191268921, "logps/rejected": -1.0367156267166138, "loss": 1.4677, "nll_loss": 1.3407604694366455, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09851441532373428, "rewards/margins": 0.005157156381756067, "rewards/rejected": -0.10367156565189362, "step": 565 }, { "epoch": 0.16032627803951902, "grad_norm": 0.765625, "learning_rate": 4.944994036564048e-06, "log_odds_chosen": 0.5135098695755005, "log_odds_ratio": -0.597516655921936, "logits/chosen": 0.05508983135223389, "logits/rejected": 0.02078302577137947, "logps/chosen": -0.8372930288314819, "logps/rejected": -1.0782281160354614, "loss": 1.4411, "nll_loss": 1.2578853368759155, "rewards/accuracies": 0.75, "rewards/chosen": -0.08372931182384491, "rewards/margins": 0.024093495681881905, "rewards/rejected": -0.10782281309366226, "step": 570 }, { "epoch": 0.1617326488995148, "grad_norm": 0.6796875, "learning_rate": 4.94240366689341e-06, "log_odds_chosen": 0.1643310785293579, "log_odds_ratio": -0.73926842212677, "logits/chosen": -0.18839290738105774, "logits/rejected": 0.1574762910604477, "logps/chosen": -1.1360965967178345, "logps/rejected": -1.206714391708374, "loss": 1.4524, "nll_loss": 1.433528184890747, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1136096715927124, "rewards/margins": 0.007061791606247425, "rewards/rejected": -0.12067146599292755, "step": 575 }, { "epoch": 0.1631390197595106, "grad_norm": 1.4140625, "learning_rate": 4.939754409081768e-06, "log_odds_chosen": 0.1948034018278122, "log_odds_ratio": -0.6415718793869019, "logits/chosen": 0.2775643467903137, "logits/rejected": -0.08088856935501099, "logps/chosen": -0.9604623913764954, "logps/rejected": -1.1055552959442139, "loss": 1.4866, "nll_loss": 1.2623450756072998, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09604625403881073, "rewards/margins": 0.014509303495287895, "rewards/rejected": -0.11055555194616318, "step": 580 }, { "epoch": 0.16454539061950638, "grad_norm": 0.80078125, "learning_rate": 4.93704632700467e-06, "log_odds_chosen": 0.10009583085775375, "log_odds_ratio": -0.7434337735176086, "logits/chosen": 0.11388511955738068, "logits/rejected": 0.20686273276805878, "logps/chosen": -1.0210888385772705, "logps/rejected": -1.1122162342071533, "loss": 1.3165, "nll_loss": 1.2572648525238037, "rewards/accuracies": 0.5, "rewards/chosen": -0.102108895778656, "rewards/margins": 0.009112725965678692, "rewards/rejected": -0.11122162640094757, "step": 585 }, { "epoch": 0.16595176147950214, "grad_norm": 0.796875, "learning_rate": 4.934279485955955e-06, "log_odds_chosen": -0.03473677486181259, "log_odds_ratio": -0.7794255018234253, "logits/chosen": -0.0030483484733849764, "logits/rejected": -0.01943325623869896, "logps/chosen": -1.1814887523651123, "logps/rejected": -1.1899473667144775, "loss": 1.4664, "nll_loss": 1.5186388492584229, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.11814887821674347, "rewards/margins": 0.0008458640659227967, "rewards/rejected": -0.11899475008249283, "step": 590 }, { "epoch": 0.16735813233949792, "grad_norm": 1.3359375, "learning_rate": 4.9314539526461895e-06, "log_odds_chosen": -0.05654100328683853, "log_odds_ratio": -0.9299119114875793, "logits/chosen": -0.08810718357563019, "logits/rejected": -0.10039062798023224, "logps/chosen": -1.084962010383606, "logps/rejected": -1.0155553817749023, "loss": 1.5451, "nll_loss": 1.5907752513885498, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10849620401859283, "rewards/margins": -0.006940663792192936, "rewards/rejected": -0.10155554115772247, "step": 595 }, { "epoch": 0.1687645031994937, "grad_norm": 0.671875, "learning_rate": 4.9285697952010496e-06, "log_odds_chosen": 0.40899768471717834, "log_odds_ratio": -0.6665030717849731, "logits/chosen": 0.25060832500457764, "logits/rejected": 0.05417170375585556, "logps/chosen": -0.9990217089653015, "logps/rejected": -1.3330628871917725, "loss": 1.4087, "nll_loss": 1.316851019859314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09990216791629791, "rewards/margins": 0.033404115587472916, "rewards/rejected": -0.13330629467964172, "step": 600 }, { "epoch": 0.1701708740594895, "grad_norm": 0.7109375, "learning_rate": 4.9256270831596835e-06, "log_odds_chosen": 0.3590291142463684, "log_odds_ratio": -0.5888271331787109, "logits/chosen": 0.2401418685913086, "logits/rejected": -0.08578919619321823, "logps/chosen": -0.9272798299789429, "logps/rejected": -1.1708381175994873, "loss": 1.3763, "nll_loss": 1.2497119903564453, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09272798150777817, "rewards/margins": 0.024355821311473846, "rewards/rejected": -0.11708381026983261, "step": 605 }, { "epoch": 0.17157724491948526, "grad_norm": 0.83984375, "learning_rate": 4.922625887473034e-06, "log_odds_chosen": 0.40531492233276367, "log_odds_ratio": -0.5795220732688904, "logits/chosen": -0.025524402037262917, "logits/rejected": 0.0651385709643364, "logps/chosen": -0.9012480974197388, "logps/rejected": -1.1731479167938232, "loss": 1.434, "nll_loss": 1.1538034677505493, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09012481570243835, "rewards/margins": 0.02718997932970524, "rewards/rejected": -0.11731479316949844, "step": 610 }, { "epoch": 0.17298361577948104, "grad_norm": 0.7578125, "learning_rate": 4.919566280502125e-06, "log_odds_chosen": 0.03708020970225334, "log_odds_ratio": -0.7586324214935303, "logits/chosen": 0.250191867351532, "logits/rejected": -0.005167156457901001, "logps/chosen": -0.9644180536270142, "logps/rejected": -1.0048058032989502, "loss": 1.4514, "nll_loss": 1.3735963106155396, "rewards/accuracies": 0.5, "rewards/chosen": -0.09644180536270142, "rewards/margins": 0.004038792569190264, "rewards/rejected": -0.1004805937409401, "step": 615 }, { "epoch": 0.17438998663947683, "grad_norm": 1.21875, "learning_rate": 4.916448336016324e-06, "log_odds_chosen": 0.3182252049446106, "log_odds_ratio": -0.6763931512832642, "logits/chosen": -0.07354754954576492, "logits/rejected": -0.07905907928943634, "logps/chosen": -1.1836451292037964, "logps/rejected": -1.4507120847702026, "loss": 1.5021, "nll_loss": 1.4551026821136475, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11836449801921844, "rewards/margins": 0.026706721633672714, "rewards/rejected": -0.14507122337818146, "step": 620 }, { "epoch": 0.17579635749947262, "grad_norm": 0.68359375, "learning_rate": 4.913272129191554e-06, "log_odds_chosen": 0.5009862184524536, "log_odds_ratio": -0.6594285368919373, "logits/chosen": -0.007948207668960094, "logits/rejected": -0.08926790207624435, "logps/chosen": -1.0072381496429443, "logps/rejected": -1.4435874223709106, "loss": 1.2778, "nll_loss": 1.463144063949585, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10072381794452667, "rewards/margins": 0.04363492876291275, "rewards/rejected": -0.14435873925685883, "step": 625 }, { "epoch": 0.1772027283594684, "grad_norm": 0.53515625, "learning_rate": 4.910037736608487e-06, "log_odds_chosen": 0.9124671816825867, "log_odds_ratio": -0.4731478691101074, "logits/chosen": 0.17726007103919983, "logits/rejected": -0.03324912115931511, "logps/chosen": -0.6981981992721558, "logps/rejected": -1.2315181493759155, "loss": 1.4051, "nll_loss": 1.3857619762420654, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06981982290744781, "rewards/margins": 0.05333200842142105, "rewards/rejected": -0.12315182387828827, "step": 630 }, { "epoch": 0.17860909921946416, "grad_norm": 0.54296875, "learning_rate": 4.906745236250699e-06, "log_odds_chosen": 0.30307167768478394, "log_odds_ratio": -0.639373779296875, "logits/chosen": -0.1709907501935959, "logits/rejected": 0.07192268967628479, "logps/chosen": -1.0287564992904663, "logps/rejected": -1.2007570266723633, "loss": 1.4407, "nll_loss": 1.3757801055908203, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10287564992904663, "rewards/margins": 0.01720007322728634, "rewards/rejected": -0.12007571756839752, "step": 635 }, { "epoch": 0.18001547007945995, "grad_norm": 0.56640625, "learning_rate": 4.903394707502783e-06, "log_odds_chosen": 0.6449100971221924, "log_odds_ratio": -0.5406845808029175, "logits/chosen": -0.1291421502828598, "logits/rejected": -0.20102617144584656, "logps/chosen": -0.8549752235412598, "logps/rejected": -1.3028563261032104, "loss": 1.4641, "nll_loss": 1.5151296854019165, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08549752086400986, "rewards/margins": 0.04478812217712402, "rewards/rejected": -0.13028565049171448, "step": 640 }, { "epoch": 0.18142184093945574, "grad_norm": 0.5390625, "learning_rate": 4.899986231148441e-06, "log_odds_chosen": 0.05296991392970085, "log_odds_ratio": -0.781247615814209, "logits/chosen": 0.007375895977020264, "logits/rejected": 0.02562333643436432, "logps/chosen": -1.1410400867462158, "logps/rejected": -1.1650757789611816, "loss": 1.3886, "nll_loss": 1.3418514728546143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1141040176153183, "rewards/margins": 0.0024035435635596514, "rewards/rejected": -0.11650756746530533, "step": 645 }, { "epoch": 0.18282821179945152, "grad_norm": 0.8125, "learning_rate": 4.896519889368535e-06, "log_odds_chosen": 0.7971788048744202, "log_odds_ratio": -0.4912826120853424, "logits/chosen": 0.04553813487291336, "logits/rejected": -0.10304268449544907, "logps/chosen": -0.8640382885932922, "logps/rejected": -1.428315281867981, "loss": 1.4899, "nll_loss": 1.4770934581756592, "rewards/accuracies": 0.75, "rewards/chosen": -0.08640382438898087, "rewards/margins": 0.05642770975828171, "rewards/rejected": -0.14283153414726257, "step": 650 }, { "epoch": 0.18423458265944728, "grad_norm": 0.765625, "learning_rate": 4.892995765739102e-06, "log_odds_chosen": 0.5198310017585754, "log_odds_ratio": -0.6252527236938477, "logits/chosen": 0.14106041193008423, "logits/rejected": -0.08343149721622467, "logps/chosen": -1.023341178894043, "logps/rejected": -1.3469436168670654, "loss": 1.4602, "nll_loss": 1.1736913919448853, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10233412683010101, "rewards/margins": 0.03236023336648941, "rewards/rejected": -0.13469436764717102, "step": 655 }, { "epoch": 0.18564095351944307, "grad_norm": 0.5703125, "learning_rate": 4.8894139452293446e-06, "log_odds_chosen": 0.741043746471405, "log_odds_ratio": -0.4976826608181, "logits/chosen": 0.04871377348899841, "logits/rejected": -0.0666879341006279, "logps/chosen": -0.7123268246650696, "logps/rejected": -1.1363470554351807, "loss": 1.3701, "nll_loss": 1.330862283706665, "rewards/accuracies": 0.75, "rewards/chosen": -0.07123267650604248, "rewards/margins": 0.04240203648805618, "rewards/rejected": -0.11363470554351807, "step": 660 }, { "epoch": 0.18704732437943886, "grad_norm": 1.453125, "learning_rate": 4.885774514199578e-06, "log_odds_chosen": 0.3378247618675232, "log_odds_ratio": -0.6189150810241699, "logits/chosen": 0.2529948353767395, "logits/rejected": -0.014105233363807201, "logps/chosen": -1.0382691621780396, "logps/rejected": -1.2872415781021118, "loss": 1.4639, "nll_loss": 1.3439507484436035, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10382692515850067, "rewards/margins": 0.024897238239645958, "rewards/rejected": -0.12872417271137238, "step": 665 }, { "epoch": 0.18845369523943464, "grad_norm": 0.67578125, "learning_rate": 4.88207756039915e-06, "log_odds_chosen": 0.08838365226984024, "log_odds_ratio": -0.7437705993652344, "logits/chosen": 0.11691107600927353, "logits/rejected": 0.023869309574365616, "logps/chosen": -1.0130348205566406, "logps/rejected": -1.0903738737106323, "loss": 1.306, "nll_loss": 1.2257177829742432, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10130348056554794, "rewards/margins": 0.0077339173294603825, "rewards/rejected": -0.10903739929199219, "step": 670 }, { "epoch": 0.18986006609943043, "grad_norm": 1.1875, "learning_rate": 4.8783231729643234e-06, "log_odds_chosen": 0.42795419692993164, "log_odds_ratio": -0.5492271780967712, "logits/chosen": 0.23825743794441223, "logits/rejected": -0.2547515034675598, "logps/chosen": -0.8971956372261047, "logps/rejected": -1.2151196002960205, "loss": 1.4355, "nll_loss": 1.3206568956375122, "rewards/accuracies": 0.75, "rewards/chosen": -0.08971955627202988, "rewards/margins": 0.031792402267456055, "rewards/rejected": -0.12151195853948593, "step": 675 }, { "epoch": 0.1912664369594262, "grad_norm": 0.91015625, "learning_rate": 4.874511442416128e-06, "log_odds_chosen": 0.10579367727041245, "log_odds_ratio": -0.7108926773071289, "logits/chosen": -0.018588459119200706, "logits/rejected": 0.0027497292030602694, "logps/chosen": -1.1075600385665894, "logps/rejected": -1.2394804954528809, "loss": 1.4582, "nll_loss": 1.3830382823944092, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11075599491596222, "rewards/margins": 0.013192057609558105, "rewards/rejected": -0.12394805997610092, "step": 680 }, { "epoch": 0.19267280781942198, "grad_norm": 0.671875, "learning_rate": 4.87064246065818e-06, "log_odds_chosen": 0.6352590322494507, "log_odds_ratio": -0.517234206199646, "logits/chosen": 0.07716906070709229, "logits/rejected": -0.1858917623758316, "logps/chosen": -0.9086629152297974, "logps/rejected": -1.3411760330200195, "loss": 1.3114, "nll_loss": 1.2726542949676514, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09086629003286362, "rewards/margins": 0.043251316994428635, "rewards/rejected": -0.13411761820316315, "step": 685 }, { "epoch": 0.19407917867941776, "grad_norm": 0.56640625, "learning_rate": 4.8667163209744625e-06, "log_odds_chosen": 0.24586375057697296, "log_odds_ratio": -0.6478797793388367, "logits/chosen": 0.2538822591304779, "logits/rejected": 0.0186677984893322, "logps/chosen": -0.9751268625259399, "logps/rejected": -1.117552638053894, "loss": 1.3776, "nll_loss": 1.2891550064086914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09751268476247787, "rewards/margins": 0.014242582023143768, "rewards/rejected": -0.11175527423620224, "step": 690 }, { "epoch": 0.19548554953941355, "grad_norm": 0.640625, "learning_rate": 4.862733118027079e-06, "log_odds_chosen": 0.2813799977302551, "log_odds_ratio": -0.631136953830719, "logits/chosen": 0.14162734150886536, "logits/rejected": -0.15908537805080414, "logps/chosen": -0.9692428708076477, "logps/rejected": -1.123443603515625, "loss": 1.4101, "nll_loss": 1.2585976123809814, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09692429006099701, "rewards/margins": 0.015420079231262207, "rewards/rejected": -0.11234436929225922, "step": 695 }, { "epoch": 0.19689192039940934, "grad_norm": 1.296875, "learning_rate": 4.858692947853968e-06, "log_odds_chosen": 0.10032544285058975, "log_odds_ratio": -0.7577157616615295, "logits/chosen": 0.11457610130310059, "logits/rejected": 0.06611824035644531, "logps/chosen": -1.1749951839447021, "logps/rejected": -1.2147281169891357, "loss": 1.3797, "nll_loss": 1.3682631254196167, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11749951541423798, "rewards/margins": 0.003973294049501419, "rewards/rejected": -0.12147282063961029, "step": 700 }, { "epoch": 0.1982982912594051, "grad_norm": 0.6875, "learning_rate": 4.8545959078665915e-06, "log_odds_chosen": -0.11273153126239777, "log_odds_ratio": -0.8723108172416687, "logits/chosen": 0.12857083976268768, "logits/rejected": 0.06154397130012512, "logps/chosen": -0.9911792874336243, "logps/rejected": -0.9738213419914246, "loss": 1.4424, "nll_loss": 1.3504576683044434, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09911791980266571, "rewards/margins": -0.0017357754986733198, "rewards/rejected": -0.09738214313983917, "step": 705 }, { "epoch": 0.19970466211940088, "grad_norm": 0.62890625, "learning_rate": 4.850442096847585e-06, "log_odds_chosen": 0.3669831156730652, "log_odds_ratio": -0.7217445969581604, "logits/chosen": 0.14621445536613464, "logits/rejected": -0.021125638857483864, "logps/chosen": -0.9672040939331055, "logps/rejected": -1.272630214691162, "loss": 1.4287, "nll_loss": 1.391427993774414, "rewards/accuracies": 0.5, "rewards/chosen": -0.09672039747238159, "rewards/margins": 0.030542617663741112, "rewards/rejected": -0.12726303935050964, "step": 710 }, { "epoch": 0.20111103297939667, "grad_norm": 0.796875, "learning_rate": 4.846231614948373e-06, "log_odds_chosen": 0.2515362799167633, "log_odds_ratio": -0.6542503237724304, "logits/chosen": 0.24039408564567566, "logits/rejected": 0.22471091151237488, "logps/chosen": -0.8539594411849976, "logps/rejected": -1.0077803134918213, "loss": 1.3366, "nll_loss": 1.3203866481781006, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.085395947098732, "rewards/margins": 0.015382101759314537, "rewards/rejected": -0.10077805817127228, "step": 715 }, { "epoch": 0.20251740383939246, "grad_norm": 1.125, "learning_rate": 4.841964563686757e-06, "log_odds_chosen": 0.49359601736068726, "log_odds_ratio": -0.5489069819450378, "logits/chosen": 0.24976961314678192, "logits/rejected": -0.014539213851094246, "logps/chosen": -0.9639101028442383, "logps/rejected": -1.3213074207305908, "loss": 1.3393, "nll_loss": 1.4156157970428467, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09639101475477219, "rewards/margins": 0.03573973849415779, "rewards/rejected": -0.13213074207305908, "step": 720 }, { "epoch": 0.20392377469938822, "grad_norm": 0.64453125, "learning_rate": 4.83764104594447e-06, "log_odds_chosen": 0.3946678936481476, "log_odds_ratio": -0.6607686281204224, "logits/chosen": -0.019294610247015953, "logits/rejected": 0.14000949263572693, "logps/chosen": -0.9210721254348755, "logps/rejected": -1.1474473476409912, "loss": 1.3942, "nll_loss": 1.277699589729309, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09210722148418427, "rewards/margins": 0.0226375050842762, "rewards/rejected": -0.11474472284317017, "step": 725 }, { "epoch": 0.205330145559384, "grad_norm": 0.8984375, "learning_rate": 4.833261165964688e-06, "log_odds_chosen": 0.33924877643585205, "log_odds_ratio": -0.6919046640396118, "logits/chosen": -0.14085440337657928, "logits/rejected": 0.25654488801956177, "logps/chosen": -0.865805447101593, "logps/rejected": -1.049051284790039, "loss": 1.4256, "nll_loss": 1.3545150756835938, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0865805447101593, "rewards/margins": 0.018324587494134903, "rewards/rejected": -0.1049051284790039, "step": 730 }, { "epoch": 0.2067365164193798, "grad_norm": 0.59375, "learning_rate": 4.828825029349527e-06, "log_odds_chosen": 0.3080621361732483, "log_odds_ratio": -0.6225201487541199, "logits/chosen": 0.09295627474784851, "logits/rejected": 0.1192513257265091, "logps/chosen": -0.9420360326766968, "logps/rejected": -1.122133493423462, "loss": 1.3749, "nll_loss": 1.2215619087219238, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09420361369848251, "rewards/margins": 0.018009738996624947, "rewards/rejected": -0.11221335083246231, "step": 735 }, { "epoch": 0.20814288727937558, "grad_norm": 0.8828125, "learning_rate": 4.8243327430574885e-06, "log_odds_chosen": 0.5264579057693481, "log_odds_ratio": -0.5474775433540344, "logits/chosen": 0.19925551116466522, "logits/rejected": -0.10264059156179428, "logps/chosen": -0.9675121307373047, "logps/rejected": -1.2763597965240479, "loss": 1.2819, "nll_loss": 1.2236970663070679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09675121307373047, "rewards/margins": 0.030884766951203346, "rewards/rejected": -0.12763598561286926, "step": 740 }, { "epoch": 0.20954925813937136, "grad_norm": 0.81640625, "learning_rate": 4.819784415400884e-06, "log_odds_chosen": 0.3006175458431244, "log_odds_ratio": -0.6740007400512695, "logits/chosen": 0.11504560708999634, "logits/rejected": -0.1865064650774002, "logps/chosen": -0.8989768028259277, "logps/rejected": -1.1000462770462036, "loss": 1.4487, "nll_loss": 1.4835877418518066, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08989769965410233, "rewards/margins": 0.020106937736272812, "rewards/rejected": -0.11000462621450424, "step": 745 }, { "epoch": 0.21095562899936712, "grad_norm": 0.63671875, "learning_rate": 4.8151801560432255e-06, "log_odds_chosen": 0.15093275904655457, "log_odds_ratio": -0.6930734515190125, "logits/chosen": 0.1037089005112648, "logits/rejected": 0.005026382394134998, "logps/chosen": -0.961024284362793, "logps/rejected": -1.100200891494751, "loss": 1.4144, "nll_loss": 1.2849223613739014, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09610243141651154, "rewards/margins": 0.013917678967118263, "rewards/rejected": -0.11002011597156525, "step": 750 }, { "epoch": 0.2123619998593629, "grad_norm": 0.59765625, "learning_rate": 4.810520075996577e-06, "log_odds_chosen": 0.22566702961921692, "log_odds_ratio": -0.6738010048866272, "logits/chosen": 0.11294198036193848, "logits/rejected": 0.05789243057370186, "logps/chosen": -0.807028591632843, "logps/rejected": -0.9467730522155762, "loss": 1.3684, "nll_loss": 1.3143486976623535, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08070285618305206, "rewards/margins": 0.013974443078041077, "rewards/rejected": -0.09467729926109314, "step": 755 }, { "epoch": 0.2137683707193587, "grad_norm": 0.65234375, "learning_rate": 4.80580428761888e-06, "log_odds_chosen": 0.33120518922805786, "log_odds_ratio": -0.6059235334396362, "logits/chosen": 0.12496509402990341, "logits/rejected": -0.2966843247413635, "logps/chosen": -1.018349289894104, "logps/rejected": -1.1662967205047607, "loss": 1.3628, "nll_loss": 1.3103525638580322, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10183493793010712, "rewards/margins": 0.014794737100601196, "rewards/rejected": -0.11662967503070831, "step": 760 }, { "epoch": 0.21517474157935448, "grad_norm": 0.8203125, "learning_rate": 4.801032904611249e-06, "log_odds_chosen": 0.17109887301921844, "log_odds_ratio": -0.7322261929512024, "logits/chosen": 0.08799419552087784, "logits/rejected": 0.047529660165309906, "logps/chosen": -1.0703462362289429, "logps/rejected": -1.121490716934204, "loss": 1.4076, "nll_loss": 1.515067458152771, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10703463852405548, "rewards/margins": 0.005114448722451925, "rewards/rejected": -0.11214907467365265, "step": 765 }, { "epoch": 0.21658111243935024, "grad_norm": 0.7578125, "learning_rate": 4.79620604201522e-06, "log_odds_chosen": 0.5879980325698853, "log_odds_ratio": -0.5942040681838989, "logits/chosen": 0.03295837342739105, "logits/rejected": 0.08761349320411682, "logps/chosen": -0.9856408834457397, "logps/rejected": -1.3201617002487183, "loss": 1.2938, "nll_loss": 1.329833745956421, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09856408834457397, "rewards/margins": 0.033452074974775314, "rewards/rejected": -0.1320161670446396, "step": 770 }, { "epoch": 0.21798748329934603, "grad_norm": 0.7421875, "learning_rate": 4.791323816209984e-06, "log_odds_chosen": 0.06851278245449066, "log_odds_ratio": -0.7348783016204834, "logits/chosen": 0.24454455077648163, "logits/rejected": -0.1613084226846695, "logps/chosen": -1.074292778968811, "logps/rejected": -1.1023520231246948, "loss": 1.4124, "nll_loss": 1.3813612461090088, "rewards/accuracies": 0.5, "rewards/chosen": -0.10742926597595215, "rewards/margins": 0.002805921947583556, "rewards/rejected": -0.11023519188165665, "step": 775 }, { "epoch": 0.21939385415934182, "grad_norm": 1.421875, "learning_rate": 4.786386344909583e-06, "log_odds_chosen": 0.5286887884140015, "log_odds_ratio": -0.551024317741394, "logits/chosen": 0.10972050577402115, "logits/rejected": -0.1185971274971962, "logps/chosen": -1.0658420324325562, "logps/rejected": -1.467869520187378, "loss": 1.4933, "nll_loss": 1.596457839012146, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10658420622348785, "rewards/margins": 0.04020275920629501, "rewards/rejected": -0.14678695797920227, "step": 780 }, { "epoch": 0.2208002250193376, "grad_norm": 0.921875, "learning_rate": 4.781393747160065e-06, "log_odds_chosen": 0.39119476079940796, "log_odds_ratio": -0.5956941843032837, "logits/chosen": 0.2315189391374588, "logits/rejected": 0.09489177167415619, "logps/chosen": -0.9102287292480469, "logps/rejected": -1.1937782764434814, "loss": 1.3777, "nll_loss": 1.2680063247680664, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09102287143468857, "rewards/margins": 0.02835494838654995, "rewards/rejected": -0.11937782913446426, "step": 785 }, { "epoch": 0.2222065958793334, "grad_norm": 0.6875, "learning_rate": 4.776346143336616e-06, "log_odds_chosen": 0.4964830279350281, "log_odds_ratio": -0.5945664644241333, "logits/chosen": 0.2937574088573456, "logits/rejected": -0.03588557988405228, "logps/chosen": -0.9127880930900574, "logps/rejected": -1.2661429643630981, "loss": 1.302, "nll_loss": 1.1640938520431519, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0912788063287735, "rewards/margins": 0.0353354886174202, "rewards/rejected": -0.1266143023967743, "step": 790 }, { "epoch": 0.22361296673932915, "grad_norm": 0.86328125, "learning_rate": 4.771243655140662e-06, "log_odds_chosen": 0.20250296592712402, "log_odds_ratio": -0.697245717048645, "logits/chosen": 0.1340058445930481, "logits/rejected": -0.004743742756545544, "logps/chosen": -0.9509406089782715, "logps/rejected": -1.1008963584899902, "loss": 1.2904, "nll_loss": 1.1836265325546265, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09509406238794327, "rewards/margins": 0.014995579607784748, "rewards/rejected": -0.11008964478969574, "step": 795 }, { "epoch": 0.22501933759932494, "grad_norm": 0.6484375, "learning_rate": 4.766086405596932e-06, "log_odds_chosen": 0.2294325828552246, "log_odds_ratio": -0.7108098268508911, "logits/chosen": -0.09615223109722137, "logits/rejected": 0.049136556684970856, "logps/chosen": -1.0668703317642212, "logps/rejected": -1.175934076309204, "loss": 1.407, "nll_loss": 1.3889058828353882, "rewards/accuracies": 0.5, "rewards/chosen": -0.1066870465874672, "rewards/margins": 0.010906368494033813, "rewards/rejected": -0.11759340763092041, "step": 800 }, { "epoch": 0.22642570845932072, "grad_norm": 0.73828125, "learning_rate": 4.760874519050486e-06, "log_odds_chosen": 0.2722220718860626, "log_odds_ratio": -0.6615415811538696, "logits/chosen": 0.3164128363132477, "logits/rejected": 0.02606889046728611, "logps/chosen": -0.8725587129592896, "logps/rejected": -1.0102307796478271, "loss": 1.361, "nll_loss": 1.2800710201263428, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08725588023662567, "rewards/margins": 0.013767195865511894, "rewards/rejected": -0.10102306306362152, "step": 805 }, { "epoch": 0.2278320793193165, "grad_norm": 0.4921875, "learning_rate": 4.755608121163726e-06, "log_odds_chosen": 0.05866674333810806, "log_odds_ratio": -0.7410587072372437, "logits/chosen": 0.20300361514091492, "logits/rejected": 0.20391185581684113, "logps/chosen": -0.8736883997917175, "logps/rejected": -0.8953003883361816, "loss": 1.3856, "nll_loss": 1.2794065475463867, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08736883848905563, "rewards/margins": 0.0021611980628222227, "rewards/rejected": -0.08953003585338593, "step": 810 }, { "epoch": 0.2292384501793123, "grad_norm": 0.578125, "learning_rate": 4.750287338913364e-06, "log_odds_chosen": 0.18778538703918457, "log_odds_ratio": -0.7045443654060364, "logits/chosen": 0.2000313699245453, "logits/rejected": 0.3296867907047272, "logps/chosen": -0.9048763513565063, "logps/rejected": -1.0671868324279785, "loss": 1.276, "nll_loss": 1.0233865976333618, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09048764407634735, "rewards/margins": 0.016231058165431023, "rewards/rejected": -0.10671870410442352, "step": 815 }, { "epoch": 0.23064482103930806, "grad_norm": 0.6171875, "learning_rate": 4.744912300587354e-06, "log_odds_chosen": 0.46959003806114197, "log_odds_ratio": -0.6398900151252747, "logits/chosen": 0.05968532711267471, "logits/rejected": -0.033852558583021164, "logps/chosen": -0.9375017285346985, "logps/rejected": -1.2653578519821167, "loss": 1.3935, "nll_loss": 1.3479769229888916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09375017881393433, "rewards/margins": 0.032785605639219284, "rewards/rejected": -0.1265358030796051, "step": 820 }, { "epoch": 0.23205119189930384, "grad_norm": 0.6796875, "learning_rate": 4.739483135781807e-06, "log_odds_chosen": 0.4090171754360199, "log_odds_ratio": -0.6206759214401245, "logits/chosen": 0.058540333062410355, "logits/rejected": 0.16028887033462524, "logps/chosen": -0.8152543306350708, "logps/rejected": -1.080603003501892, "loss": 1.4404, "nll_loss": 1.306620478630066, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08152543008327484, "rewards/margins": 0.026534873992204666, "rewards/rejected": -0.1080603152513504, "step": 825 }, { "epoch": 0.23345756275929963, "grad_norm": 0.59765625, "learning_rate": 4.733999975397862e-06, "log_odds_chosen": -0.07625510543584824, "log_odds_ratio": -0.784987211227417, "logits/chosen": 0.191916361451149, "logits/rejected": 0.08183418214321136, "logps/chosen": -1.00628662109375, "logps/rejected": -0.9959591031074524, "loss": 1.3965, "nll_loss": 1.2619175910949707, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.10062865167856216, "rewards/margins": -0.001032742322422564, "rewards/rejected": -0.09959591180086136, "step": 830 }, { "epoch": 0.23486393361929542, "grad_norm": 0.66015625, "learning_rate": 4.728462951638531e-06, "log_odds_chosen": 0.22590819001197815, "log_odds_ratio": -0.6560646295547485, "logits/chosen": 0.020906496793031693, "logits/rejected": 0.11052076518535614, "logps/chosen": -0.8826691508293152, "logps/rejected": -1.0015770196914673, "loss": 1.4155, "nll_loss": 1.2367753982543945, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08826692402362823, "rewards/margins": 0.011890767142176628, "rewards/rejected": -0.10015769302845001, "step": 835 }, { "epoch": 0.23627030447929118, "grad_norm": 0.640625, "learning_rate": 4.722872198005514e-06, "log_odds_chosen": 0.17907896637916565, "log_odds_ratio": -0.7391910552978516, "logits/chosen": -0.023514145985245705, "logits/rejected": 0.08931633830070496, "logps/chosen": -1.0978233814239502, "logps/rejected": -1.1743838787078857, "loss": 1.4892, "nll_loss": 1.3842908143997192, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10978235304355621, "rewards/margins": 0.007656055502593517, "rewards/rejected": -0.11743839085102081, "step": 840 }, { "epoch": 0.23767667533928696, "grad_norm": 1.8203125, "learning_rate": 4.717227849295972e-06, "log_odds_chosen": 0.6538313627243042, "log_odds_ratio": -0.5776088237762451, "logits/chosen": 0.21445605158805847, "logits/rejected": -0.05735556036233902, "logps/chosen": -0.9003429412841797, "logps/rejected": -1.4265720844268799, "loss": 1.4124, "nll_loss": 1.4089030027389526, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09003429114818573, "rewards/margins": 0.05262289568781853, "rewards/rejected": -0.14265719056129456, "step": 845 }, { "epoch": 0.23908304619928275, "grad_norm": 0.62890625, "learning_rate": 4.711530041599287e-06, "log_odds_chosen": 0.5957034826278687, "log_odds_ratio": -0.5471062660217285, "logits/chosen": 0.18590515851974487, "logits/rejected": 0.05791671946644783, "logps/chosen": -0.852972149848938, "logps/rejected": -1.2458776235580444, "loss": 1.2858, "nll_loss": 1.1510000228881836, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08529721200466156, "rewards/margins": 0.03929056599736214, "rewards/rejected": -0.1245877742767334, "step": 850 }, { "epoch": 0.24048941705927854, "grad_norm": 1.0625, "learning_rate": 4.705778912293777e-06, "log_odds_chosen": 0.06852801144123077, "log_odds_ratio": -0.7809044718742371, "logits/chosen": -0.08191190659999847, "logits/rejected": 0.04405444115400314, "logps/chosen": -1.0600035190582275, "logps/rejected": -1.1098763942718506, "loss": 1.3292, "nll_loss": 1.3718576431274414, "rewards/accuracies": 0.5, "rewards/chosen": -0.10600034892559052, "rewards/margins": 0.004987289663404226, "rewards/rejected": -0.11098764091730118, "step": 855 }, { "epoch": 0.24189578791927432, "grad_norm": 0.6796875, "learning_rate": 4.699974600043378e-06, "log_odds_chosen": 0.2891826629638672, "log_odds_ratio": -0.6256308555603027, "logits/chosen": 0.07481182366609573, "logits/rejected": 0.04786193370819092, "logps/chosen": -0.7966269254684448, "logps/rejected": -0.9316326379776001, "loss": 1.3299, "nll_loss": 1.2451672554016113, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07966269552707672, "rewards/margins": 0.013500571250915527, "rewards/rejected": -0.09316325932741165, "step": 860 }, { "epoch": 0.24330215877927008, "grad_norm": 0.78125, "learning_rate": 4.694117244794311e-06, "log_odds_chosen": 0.19615532457828522, "log_odds_ratio": -0.6909521818161011, "logits/chosen": 0.3612063229084015, "logits/rejected": 0.14174401760101318, "logps/chosen": -0.881375789642334, "logps/rejected": -1.0324804782867432, "loss": 1.342, "nll_loss": 1.1763957738876343, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08813757449388504, "rewards/margins": 0.01511046290397644, "rewards/rejected": -0.10324803739786148, "step": 865 }, { "epoch": 0.24470852963926587, "grad_norm": 0.4375, "learning_rate": 4.6882069877717e-06, "log_odds_chosen": 0.55852872133255, "log_odds_ratio": -0.5228425860404968, "logits/chosen": 0.2908174395561218, "logits/rejected": 0.044635575264692307, "logps/chosen": -0.7578222155570984, "logps/rejected": -1.0437644720077515, "loss": 1.3032, "nll_loss": 1.1439415216445923, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07578221708536148, "rewards/margins": 0.028594231233000755, "rewards/rejected": -0.10437645763158798, "step": 870 }, { "epoch": 0.24611490049926166, "grad_norm": 0.48828125, "learning_rate": 4.68224397147617e-06, "log_odds_chosen": 0.20548930764198303, "log_odds_ratio": -0.7238609790802002, "logits/chosen": 0.028538722544908524, "logits/rejected": -0.13830550014972687, "logps/chosen": -0.8864482641220093, "logps/rejected": -0.9824331402778625, "loss": 1.3505, "nll_loss": 1.33633291721344, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0886448323726654, "rewards/margins": 0.00959849078208208, "rewards/rejected": -0.09824331849813461, "step": 875 }, { "epoch": 0.24752127135925744, "grad_norm": 0.67578125, "learning_rate": 4.67622833968041e-06, "log_odds_chosen": 0.24598467350006104, "log_odds_ratio": -0.6492888927459717, "logits/chosen": 0.11724593490362167, "logits/rejected": -0.10359089076519012, "logps/chosen": -0.8895597457885742, "logps/rejected": -1.0241923332214355, "loss": 1.3641, "nll_loss": 1.3283023834228516, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08895598351955414, "rewards/margins": 0.013463238254189491, "rewards/rejected": -0.10241921991109848, "step": 880 }, { "epoch": 0.2489276422192532, "grad_norm": 0.796875, "learning_rate": 4.670160237425709e-06, "log_odds_chosen": 0.3207041621208191, "log_odds_ratio": -0.6082428693771362, "logits/chosen": 0.1917845606803894, "logits/rejected": 0.04012478515505791, "logps/chosen": -0.9117262959480286, "logps/rejected": -1.131466269493103, "loss": 1.3221, "nll_loss": 1.2137980461120605, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09117262810468674, "rewards/margins": 0.021974004805088043, "rewards/rejected": -0.11314662545919418, "step": 885 }, { "epoch": 0.250334013079249, "grad_norm": 0.5390625, "learning_rate": 4.6640398110184546e-06, "log_odds_chosen": 0.24420297145843506, "log_odds_ratio": -0.6132012605667114, "logits/chosen": 0.15292124450206757, "logits/rejected": 0.04410483315587044, "logps/chosen": -0.8954147100448608, "logps/rejected": -1.0340155363082886, "loss": 1.3204, "nll_loss": 1.1968119144439697, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0895414799451828, "rewards/margins": 0.013860085979104042, "rewards/rejected": -0.1034015566110611, "step": 890 }, { "epoch": 0.2517403839392448, "grad_norm": 0.80859375, "learning_rate": 4.657867208026612e-06, "log_odds_chosen": 0.48092031478881836, "log_odds_ratio": -0.5752378702163696, "logits/chosen": 0.18317563831806183, "logits/rejected": 0.06989286839962006, "logps/chosen": -0.7608040571212769, "logps/rejected": -1.0253283977508545, "loss": 1.3631, "nll_loss": 1.3409475088119507, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07608039677143097, "rewards/margins": 0.026452431455254555, "rewards/rejected": -0.10253284126520157, "step": 895 }, { "epoch": 0.25314675479924054, "grad_norm": 0.6484375, "learning_rate": 4.651642577276157e-06, "log_odds_chosen": 0.10233037173748016, "log_odds_ratio": -0.8109905123710632, "logits/chosen": 0.05576135590672493, "logits/rejected": -0.31019073724746704, "logps/chosen": -1.1359487771987915, "logps/rejected": -1.205318808555603, "loss": 1.4058, "nll_loss": 1.4091219902038574, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.11359486728906631, "rewards/margins": 0.006937010679394007, "rewards/rejected": -0.12053187936544418, "step": 900 }, { "epoch": 0.2545531256592363, "grad_norm": 1.1640625, "learning_rate": 4.645366068847495e-06, "log_odds_chosen": 0.49762052297592163, "log_odds_ratio": -0.5452755689620972, "logits/chosen": 0.3103446066379547, "logits/rejected": -0.05523936077952385, "logps/chosen": -0.8181917071342468, "logps/rejected": -1.0620474815368652, "loss": 1.3866, "nll_loss": 1.1911251544952393, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08181916922330856, "rewards/margins": 0.024385575205087662, "rewards/rejected": -0.10620476305484772, "step": 905 }, { "epoch": 0.2559594965192321, "grad_norm": 1.1328125, "learning_rate": 4.639037834071843e-06, "log_odds_chosen": 0.11570564657449722, "log_odds_ratio": -0.7543329000473022, "logits/chosen": 0.18902553617954254, "logits/rejected": 0.06252843141555786, "logps/chosen": -1.1758387088775635, "logps/rejected": -1.215280532836914, "loss": 1.365, "nll_loss": 1.3247127532958984, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11758387088775635, "rewards/margins": 0.003944178577512503, "rewards/rejected": -0.12152805179357529, "step": 910 }, { "epoch": 0.2573658673792279, "grad_norm": 0.74609375, "learning_rate": 4.6326580255275755e-06, "log_odds_chosen": 0.1841403841972351, "log_odds_ratio": -0.7191182374954224, "logits/chosen": 0.03459787741303444, "logits/rejected": -0.11460791528224945, "logps/chosen": -1.063010334968567, "logps/rejected": -1.1565712690353394, "loss": 1.3794, "nll_loss": 1.4235786199569702, "rewards/accuracies": 0.5, "rewards/chosen": -0.10630103200674057, "rewards/margins": 0.00935608334839344, "rewards/rejected": -0.11565710604190826, "step": 915 }, { "epoch": 0.2587722382392237, "grad_norm": 0.875, "learning_rate": 4.626226797036547e-06, "log_odds_chosen": 0.3253302574157715, "log_odds_ratio": -0.6299694180488586, "logits/chosen": 0.1421690285205841, "logits/rejected": -0.04063946381211281, "logps/chosen": -0.8765993118286133, "logps/rejected": -1.1254886388778687, "loss": 1.3979, "nll_loss": 1.1592350006103516, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08765992522239685, "rewards/margins": 0.024888943880796432, "rewards/rejected": -0.11254886537790298, "step": 920 }, { "epoch": 0.26017860909921947, "grad_norm": 0.8359375, "learning_rate": 4.619744303660386e-06, "log_odds_chosen": 0.8051989674568176, "log_odds_ratio": -0.47940540313720703, "logits/chosen": 0.19530947506427765, "logits/rejected": -0.23512431979179382, "logps/chosen": -0.8018245697021484, "logps/rejected": -1.2894532680511475, "loss": 1.4028, "nll_loss": 1.2535035610198975, "rewards/accuracies": 0.75, "rewards/chosen": -0.08018245548009872, "rewards/margins": 0.04876288026571274, "rewards/rejected": -0.12894532084465027, "step": 925 }, { "epoch": 0.26158497995921526, "grad_norm": 0.63671875, "learning_rate": 4.6132107016967565e-06, "log_odds_chosen": 0.6038027405738831, "log_odds_ratio": -0.5177197456359863, "logits/chosen": 0.23678426444530487, "logits/rejected": -0.09270961582660675, "logps/chosen": -0.8819047212600708, "logps/rejected": -1.276870608329773, "loss": 1.3713, "nll_loss": 1.2016524076461792, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0881904736161232, "rewards/margins": 0.03949659690260887, "rewards/rejected": -0.12768706679344177, "step": 930 }, { "epoch": 0.26299135081921104, "grad_norm": 0.9375, "learning_rate": 4.606626148675585e-06, "log_odds_chosen": 0.45221585035324097, "log_odds_ratio": -0.5754260420799255, "logits/chosen": 0.08490542322397232, "logits/rejected": 0.010112226009368896, "logps/chosen": -0.8532091379165649, "logps/rejected": -1.138056993484497, "loss": 1.3425, "nll_loss": 1.079709768295288, "rewards/accuracies": 0.75, "rewards/chosen": -0.08532091230154037, "rewards/margins": 0.028484785929322243, "rewards/rejected": -0.11380569636821747, "step": 935 }, { "epoch": 0.26439772167920683, "grad_norm": 0.671875, "learning_rate": 4.599990803355267e-06, "log_odds_chosen": 0.2118256539106369, "log_odds_ratio": -0.6624730229377747, "logits/chosen": 0.3913845717906952, "logits/rejected": 0.11448683589696884, "logps/chosen": -0.965084433555603, "logps/rejected": -1.144004464149475, "loss": 1.3229, "nll_loss": 1.1798003911972046, "rewards/accuracies": 0.5, "rewards/chosen": -0.0965084433555603, "rewards/margins": 0.017892012372612953, "rewards/rejected": -0.11440044641494751, "step": 940 }, { "epoch": 0.26580409253920256, "grad_norm": 0.86328125, "learning_rate": 4.5933048257188385e-06, "log_odds_chosen": 0.6222201585769653, "log_odds_ratio": -0.5195111036300659, "logits/chosen": 0.25307655334472656, "logits/rejected": -0.07342733442783356, "logps/chosen": -0.7892023921012878, "logps/rejected": -1.1987894773483276, "loss": 1.2383, "nll_loss": 1.1316816806793213, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07892025262117386, "rewards/margins": 0.0409587137401104, "rewards/rejected": -0.11987896263599396, "step": 945 }, { "epoch": 0.26721046339919835, "grad_norm": 0.828125, "learning_rate": 4.586568376970115e-06, "log_odds_chosen": 0.5186957120895386, "log_odds_ratio": -0.591475248336792, "logits/chosen": 0.09654757380485535, "logits/rejected": -0.09392206370830536, "logps/chosen": -0.8592597246170044, "logps/rejected": -1.1736754179000854, "loss": 1.3742, "nll_loss": 1.428426742553711, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08592596650123596, "rewards/margins": 0.031441580504179, "rewards/rejected": -0.11736755073070526, "step": 950 }, { "epoch": 0.26861683425919414, "grad_norm": 0.921875, "learning_rate": 4.57978161952981e-06, "log_odds_chosen": 0.1481349766254425, "log_odds_ratio": -0.6983728408813477, "logits/chosen": 0.15174202620983124, "logits/rejected": 0.04254768043756485, "logps/chosen": -0.9679096341133118, "logps/rejected": -1.0594542026519775, "loss": 1.3007, "nll_loss": 1.2575373649597168, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09679095447063446, "rewards/margins": 0.009154459461569786, "rewards/rejected": -0.10594542324542999, "step": 955 }, { "epoch": 0.2700232051191899, "grad_norm": 1.015625, "learning_rate": 4.572944717031615e-06, "log_odds_chosen": 0.1742623746395111, "log_odds_ratio": -0.6798437833786011, "logits/chosen": -0.0789838507771492, "logits/rejected": -0.35425859689712524, "logps/chosen": -0.9721781611442566, "logps/rejected": -1.0981299877166748, "loss": 1.4649, "nll_loss": 1.488358736038208, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09721782058477402, "rewards/margins": 0.012595164589583874, "rewards/rejected": -0.10981299728155136, "step": 960 }, { "epoch": 0.2714295759791857, "grad_norm": 0.6328125, "learning_rate": 4.566057834318256e-06, "log_odds_chosen": 0.2752152681350708, "log_odds_ratio": -0.6796419620513916, "logits/chosen": -0.16116170585155487, "logits/rejected": 0.11222386360168457, "logps/chosen": -0.8004133105278015, "logps/rejected": -0.9160813093185425, "loss": 1.3356, "nll_loss": 1.340496301651001, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08004133403301239, "rewards/margins": 0.01156679354608059, "rewards/rejected": -0.09160811454057693, "step": 965 }, { "epoch": 0.2728359468391815, "grad_norm": 1.0703125, "learning_rate": 4.559121137437518e-06, "log_odds_chosen": 0.028671523556113243, "log_odds_ratio": -0.7715519070625305, "logits/chosen": -0.004032718483358622, "logits/rejected": 0.037678755819797516, "logps/chosen": -0.9671252369880676, "logps/rejected": -0.9393098950386047, "loss": 1.377, "nll_loss": 1.2385261058807373, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09671252220869064, "rewards/margins": -0.0027815198991447687, "rewards/rejected": -0.09393098950386047, "step": 970 }, { "epoch": 0.2742423176991773, "grad_norm": 1.140625, "learning_rate": 4.552134793638244e-06, "log_odds_chosen": 0.4256436824798584, "log_odds_ratio": -0.6906386017799377, "logits/chosen": -0.00512584438547492, "logits/rejected": -0.10282160341739655, "logps/chosen": -0.7688851356506348, "logps/rejected": -1.0349538326263428, "loss": 1.4431, "nll_loss": 1.375449299812317, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07688851654529572, "rewards/margins": 0.026606876403093338, "rewards/rejected": -0.10349539667367935, "step": 975 }, { "epoch": 0.27564868855917307, "grad_norm": 1.03125, "learning_rate": 4.545098971366298e-06, "log_odds_chosen": 0.23097069561481476, "log_odds_ratio": -0.6853961944580078, "logits/chosen": 0.07004253566265106, "logits/rejected": -0.0871606096625328, "logps/chosen": -1.0082285404205322, "logps/rejected": -1.2149624824523926, "loss": 1.2518, "nll_loss": 1.2343276739120483, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10082285106182098, "rewards/margins": 0.020673388615250587, "rewards/rejected": -0.12149624526500702, "step": 980 }, { "epoch": 0.27705505941916886, "grad_norm": 0.85546875, "learning_rate": 4.538013840260508e-06, "log_odds_chosen": -0.06891898065805435, "log_odds_ratio": -0.7656804323196411, "logits/chosen": -0.02114402875304222, "logits/rejected": 0.011167839169502258, "logps/chosen": -1.045703411102295, "logps/rejected": -0.9952338933944702, "loss": 1.3659, "nll_loss": 1.2949765920639038, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10457032918930054, "rewards/margins": -0.00504694040864706, "rewards/rejected": -0.0995233878493309, "step": 985 }, { "epoch": 0.2784614302791646, "grad_norm": 0.71875, "learning_rate": 4.530879571148572e-06, "log_odds_chosen": 0.3072592616081238, "log_odds_ratio": -0.6513957977294922, "logits/chosen": 0.17522796988487244, "logits/rejected": -0.03323373943567276, "logps/chosen": -1.0087103843688965, "logps/rejected": -1.2768394947052002, "loss": 1.2754, "nll_loss": 1.3225579261779785, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10087104141712189, "rewards/margins": 0.02681291475892067, "rewards/rejected": -0.12768395245075226, "step": 990 }, { "epoch": 0.2798678011391604, "grad_norm": 0.9453125, "learning_rate": 4.523696336042945e-06, "log_odds_chosen": 0.38424450159072876, "log_odds_ratio": -0.6066820621490479, "logits/chosen": 0.11725147068500519, "logits/rejected": -0.1419847458600998, "logps/chosen": -0.9277389645576477, "logps/rejected": -1.226851224899292, "loss": 1.2525, "nll_loss": 1.1180111169815063, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09277389943599701, "rewards/margins": 0.029911210760474205, "rewards/rejected": -0.12268511205911636, "step": 995 }, { "epoch": 0.28127417199915616, "grad_norm": 0.83203125, "learning_rate": 4.5164643081366844e-06, "log_odds_chosen": 0.21584255993366241, "log_odds_ratio": -0.6696838140487671, "logits/chosen": 0.0936116874217987, "logits/rejected": 0.20582985877990723, "logps/chosen": -0.8180558085441589, "logps/rejected": -0.8953365087509155, "loss": 1.3023, "nll_loss": 1.1692216396331787, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08180558681488037, "rewards/margins": 0.007728065364062786, "rewards/rejected": -0.08953364193439484, "step": 1000 }, { "epoch": 0.28268054285915195, "grad_norm": 1.3828125, "learning_rate": 4.509183661799279e-06, "log_odds_chosen": 0.3438703417778015, "log_odds_ratio": -0.6086449027061462, "logits/chosen": -0.15333302319049835, "logits/rejected": 0.09063899517059326, "logps/chosen": -0.8000918626785278, "logps/rejected": -1.026597023010254, "loss": 1.3283, "nll_loss": 1.3134121894836426, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08000917732715607, "rewards/margins": 0.022650521248579025, "rewards/rejected": -0.10265970230102539, "step": 1005 }, { "epoch": 0.28408691371914774, "grad_norm": 0.9453125, "learning_rate": 4.501854572572445e-06, "log_odds_chosen": 0.28882235288619995, "log_odds_ratio": -0.6323953866958618, "logits/chosen": 0.02059057354927063, "logits/rejected": 0.10210821777582169, "logps/chosen": -0.9650726318359375, "logps/rejected": -1.1212360858917236, "loss": 1.2868, "nll_loss": 1.2516919374465942, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09650726616382599, "rewards/margins": 0.015616334974765778, "rewards/rejected": -0.11212359368801117, "step": 1010 }, { "epoch": 0.2854932845791435, "grad_norm": 0.62890625, "learning_rate": 4.494477217165889e-06, "log_odds_chosen": 0.5828167796134949, "log_odds_ratio": -0.6320462822914124, "logits/chosen": -0.007489413022994995, "logits/rejected": -0.10742165893316269, "logps/chosen": -0.7481693029403687, "logps/rejected": -1.1290299892425537, "loss": 1.4211, "nll_loss": 1.2319283485412598, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07481692731380463, "rewards/margins": 0.038086071610450745, "rewards/rejected": -0.11290299892425537, "step": 1015 }, { "epoch": 0.2868996554391393, "grad_norm": 1.0546875, "learning_rate": 4.487051773453054e-06, "log_odds_chosen": 0.049561046063899994, "log_odds_ratio": -0.7927902936935425, "logits/chosen": 0.15820041298866272, "logits/rejected": -0.08799884468317032, "logps/chosen": -1.0042955875396729, "logps/rejected": -1.0792685747146606, "loss": 1.3464, "nll_loss": 1.2037123441696167, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10042955726385117, "rewards/margins": 0.007497308310121298, "rewards/rejected": -0.1079268679022789, "step": 1020 }, { "epoch": 0.2883060262991351, "grad_norm": 0.75, "learning_rate": 4.479578420466824e-06, "log_odds_chosen": 0.2891156077384949, "log_odds_ratio": -0.6404193043708801, "logits/chosen": 0.15136688947677612, "logits/rejected": -0.2594587206840515, "logps/chosen": -0.8895992040634155, "logps/rejected": -1.109318733215332, "loss": 1.309, "nll_loss": 1.2559032440185547, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08895992487668991, "rewards/margins": 0.021971937268972397, "rewards/rejected": -0.11093185842037201, "step": 1025 }, { "epoch": 0.2897123971591309, "grad_norm": 1.328125, "learning_rate": 4.472057338395214e-06, "log_odds_chosen": 0.2149926722049713, "log_odds_ratio": -0.6861375570297241, "logits/chosen": 0.03491468355059624, "logits/rejected": -0.005147813353687525, "logps/chosen": -0.8401373624801636, "logps/rejected": -0.9184094667434692, "loss": 1.3409, "nll_loss": 1.316861629486084, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08401374518871307, "rewards/margins": 0.007827198132872581, "rewards/rejected": -0.0918409451842308, "step": 1030 }, { "epoch": 0.2911187680191266, "grad_norm": 1.0078125, "learning_rate": 4.464488708577019e-06, "log_odds_chosen": 0.3361745774745941, "log_odds_ratio": -0.6563747525215149, "logits/chosen": 0.0792590007185936, "logits/rejected": -0.048874109983444214, "logps/chosen": -1.068371057510376, "logps/rejected": -1.257805585861206, "loss": 1.3291, "nll_loss": 1.420210838317871, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10683709383010864, "rewards/margins": 0.01894346997141838, "rewards/rejected": -0.12578055262565613, "step": 1035 }, { "epoch": 0.2925251388791224, "grad_norm": 0.9296875, "learning_rate": 4.456872713497447e-06, "log_odds_chosen": 0.015982721000909805, "log_odds_ratio": -0.7759819030761719, "logits/chosen": -0.03836756944656372, "logits/rejected": -0.07915479689836502, "logps/chosen": -0.9412604570388794, "logps/rejected": -1.0049259662628174, "loss": 1.3297, "nll_loss": 1.4640296697616577, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09412603825330734, "rewards/margins": 0.0063665686175227165, "rewards/rejected": -0.10049261152744293, "step": 1040 }, { "epoch": 0.2939315097391182, "grad_norm": 0.7734375, "learning_rate": 4.449209536783718e-06, "log_odds_chosen": 0.45826300978660583, "log_odds_ratio": -0.5908263921737671, "logits/chosen": 0.1461760699748993, "logits/rejected": 0.0322549007833004, "logps/chosen": -0.8778446912765503, "logps/rejected": -1.1638209819793701, "loss": 1.4564, "nll_loss": 1.1667400598526, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08778446912765503, "rewards/margins": 0.028597641736268997, "rewards/rejected": -0.11638212203979492, "step": 1045 }, { "epoch": 0.295337880599114, "grad_norm": 1.421875, "learning_rate": 4.441499363200632e-06, "log_odds_chosen": 0.10152752697467804, "log_odds_ratio": -0.8112316131591797, "logits/chosen": 0.00877746008336544, "logits/rejected": -0.027704555541276932, "logps/chosen": -0.8361288905143738, "logps/rejected": -0.9080076217651367, "loss": 1.3844, "nll_loss": 1.109171986579895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08361288905143738, "rewards/margins": 0.0071878740563988686, "rewards/rejected": -0.09080077707767487, "step": 1050 }, { "epoch": 0.29674425145910976, "grad_norm": 1.15625, "learning_rate": 4.433742378646122e-06, "log_odds_chosen": 0.6703130602836609, "log_odds_ratio": -0.5025144815444946, "logits/chosen": 0.05710332840681076, "logits/rejected": 0.0904233530163765, "logps/chosen": -0.9191819429397583, "logps/rejected": -1.3129583597183228, "loss": 1.2766, "nll_loss": 1.1437983512878418, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09191820025444031, "rewards/margins": 0.039377644658088684, "rewards/rejected": -0.1312958300113678, "step": 1055 }, { "epoch": 0.29815062231910555, "grad_norm": 1.140625, "learning_rate": 4.425938770146765e-06, "log_odds_chosen": 0.5356465578079224, "log_odds_ratio": -0.5849270224571228, "logits/chosen": 0.2629484236240387, "logits/rejected": -0.29369235038757324, "logps/chosen": -0.9397749900817871, "logps/rejected": -1.3595625162124634, "loss": 1.3173, "nll_loss": 1.2218631505966187, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09397749602794647, "rewards/margins": 0.041978754103183746, "rewards/rejected": -0.13595624268054962, "step": 1060 }, { "epoch": 0.29955699317910134, "grad_norm": 0.90234375, "learning_rate": 4.418088725853278e-06, "log_odds_chosen": 0.5873881578445435, "log_odds_ratio": -0.553581178188324, "logits/chosen": 0.1879061758518219, "logits/rejected": 0.029256004840135574, "logps/chosen": -0.7824908494949341, "logps/rejected": -1.1668497323989868, "loss": 1.3333, "nll_loss": 1.2202861309051514, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07824908196926117, "rewards/margins": 0.038435906171798706, "rewards/rejected": -0.11668499559164047, "step": 1065 }, { "epoch": 0.3009633640390971, "grad_norm": 0.68359375, "learning_rate": 4.4101924350359755e-06, "log_odds_chosen": 0.22440704703330994, "log_odds_ratio": -0.6922354102134705, "logits/chosen": 0.2807529866695404, "logits/rejected": -0.06608657538890839, "logps/chosen": -0.8481131792068481, "logps/rejected": -0.9911687970161438, "loss": 1.3235, "nll_loss": 1.2630895376205444, "rewards/accuracies": 0.5, "rewards/chosen": -0.08481131494045258, "rewards/margins": 0.014305558986961842, "rewards/rejected": -0.09911688417196274, "step": 1070 }, { "epoch": 0.3023697348990929, "grad_norm": 0.490234375, "learning_rate": 4.402250088080214e-06, "log_odds_chosen": -0.048357464373111725, "log_odds_ratio": -0.7783852815628052, "logits/chosen": -0.09076674282550812, "logits/rejected": 0.06765065342187881, "logps/chosen": -1.0395662784576416, "logps/rejected": -1.0062190294265747, "loss": 1.2533, "nll_loss": 1.2233105897903442, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10395662486553192, "rewards/margins": -0.003334715496748686, "rewards/rejected": -0.10062190145254135, "step": 1075 }, { "epoch": 0.3037761057590887, "grad_norm": 0.5, "learning_rate": 4.394261876481795e-06, "log_odds_chosen": -0.12141172587871552, "log_odds_ratio": -0.8287912607192993, "logits/chosen": 0.006760761141777039, "logits/rejected": -0.22260034084320068, "logps/chosen": -1.0042288303375244, "logps/rejected": -0.9768675565719604, "loss": 1.3572, "nll_loss": 1.3437844514846802, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10042288154363632, "rewards/margins": -0.0027361277025192976, "rewards/rejected": -0.0976867526769638, "step": 1080 }, { "epoch": 0.30518247661908443, "grad_norm": 0.484375, "learning_rate": 4.386227992842347e-06, "log_odds_chosen": 0.2469898760318756, "log_odds_ratio": -0.6603808403015137, "logits/chosen": 0.1306043416261673, "logits/rejected": -0.05761692672967911, "logps/chosen": -0.9411749839782715, "logps/rejected": -1.1152271032333374, "loss": 1.2368, "nll_loss": 1.2312201261520386, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09411749988794327, "rewards/margins": 0.017405226826667786, "rewards/rejected": -0.11152271926403046, "step": 1085 }, { "epoch": 0.3065888474790802, "grad_norm": 1.765625, "learning_rate": 4.378148630864689e-06, "log_odds_chosen": 0.5335085391998291, "log_odds_ratio": -0.5338913798332214, "logits/chosen": 0.23110118508338928, "logits/rejected": -0.10254959017038345, "logps/chosen": -0.8923786878585815, "logps/rejected": -1.204424262046814, "loss": 1.3393, "nll_loss": 1.3731284141540527, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08923786878585815, "rewards/margins": 0.031204570084810257, "rewards/rejected": -0.12044243514537811, "step": 1090 }, { "epoch": 0.307995218339076, "grad_norm": 0.734375, "learning_rate": 4.3700239853481565e-06, "log_odds_chosen": 0.282719224691391, "log_odds_ratio": -0.6776636838912964, "logits/chosen": -0.025338435545563698, "logits/rejected": -0.006705662701278925, "logps/chosen": -1.007938265800476, "logps/rejected": -1.2182714939117432, "loss": 1.2801, "nll_loss": 1.3006494045257568, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10079382359981537, "rewards/margins": 0.021033337339758873, "rewards/rejected": -0.1218271479010582, "step": 1095 }, { "epoch": 0.3094015891990718, "grad_norm": 0.58984375, "learning_rate": 4.361854252183902e-06, "log_odds_chosen": -0.020149126648902893, "log_odds_ratio": -0.8116379976272583, "logits/chosen": 0.12741239368915558, "logits/rejected": -0.08182956278324127, "logps/chosen": -1.0148308277130127, "logps/rejected": -0.9748057126998901, "loss": 1.3741, "nll_loss": 1.3178448677062988, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10148308426141739, "rewards/margins": -0.004002511501312256, "rewards/rejected": -0.09748057276010513, "step": 1100 }, { "epoch": 0.3108079600590676, "grad_norm": 0.94140625, "learning_rate": 4.353639628350174e-06, "log_odds_chosen": 0.2361874282360077, "log_odds_ratio": -0.6560953855514526, "logits/chosen": 0.33975356817245483, "logits/rejected": -0.17692479491233826, "logps/chosen": -0.957141101360321, "logps/rejected": -1.1203813552856445, "loss": 1.3241, "nll_loss": 1.2143763303756714, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09571412950754166, "rewards/margins": 0.01632402278482914, "rewards/rejected": -0.11203813552856445, "step": 1105 }, { "epoch": 0.31221433091906337, "grad_norm": 0.400390625, "learning_rate": 4.345380311907569e-06, "log_odds_chosen": 0.127059668302536, "log_odds_ratio": -0.7422958016395569, "logits/chosen": 0.09059157967567444, "logits/rejected": 0.06283347308635712, "logps/chosen": -0.9153604507446289, "logps/rejected": -0.8998891115188599, "loss": 1.4761, "nll_loss": 1.3094428777694702, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0915360301733017, "rewards/margins": -0.0015471221413463354, "rewards/rejected": -0.08998890966176987, "step": 1110 }, { "epoch": 0.31362070177905915, "grad_norm": 0.482421875, "learning_rate": 4.3370765019942555e-06, "log_odds_chosen": -0.006569194607436657, "log_odds_ratio": -0.7842230796813965, "logits/chosen": 0.2436453104019165, "logits/rejected": 0.11122441291809082, "logps/chosen": -1.0099600553512573, "logps/rejected": -0.9945418238639832, "loss": 1.2582, "nll_loss": 1.2466042041778564, "rewards/accuracies": 0.5, "rewards/chosen": -0.10099601745605469, "rewards/margins": -0.0015418336261063814, "rewards/rejected": -0.09945418685674667, "step": 1115 }, { "epoch": 0.31502707263905494, "grad_norm": 0.462890625, "learning_rate": 4.32872839882117e-06, "log_odds_chosen": 0.4390491545200348, "log_odds_ratio": -0.5690140128135681, "logits/chosen": 0.30528688430786133, "logits/rejected": 0.0776129812002182, "logps/chosen": -0.8607474565505981, "logps/rejected": -1.1015281677246094, "loss": 1.3295, "nll_loss": 1.2498326301574707, "rewards/accuracies": 0.75, "rewards/chosen": -0.08607475459575653, "rewards/margins": 0.02407807670533657, "rewards/rejected": -0.11015282571315765, "step": 1120 }, { "epoch": 0.3164334434990507, "grad_norm": 0.5078125, "learning_rate": 4.320336203667195e-06, "log_odds_chosen": -0.011590385809540749, "log_odds_ratio": -0.7730804681777954, "logits/chosen": 0.12901046872138977, "logits/rejected": 0.2612306475639343, "logps/chosen": -0.8959698677062988, "logps/rejected": -0.8929961323738098, "loss": 1.2513, "nll_loss": 1.190316915512085, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08959699422121048, "rewards/margins": -0.00029737595468759537, "rewards/rejected": -0.08929961174726486, "step": 1125 }, { "epoch": 0.31783981435904646, "grad_norm": 0.39453125, "learning_rate": 4.311900118874301e-06, "log_odds_chosen": 0.7718832492828369, "log_odds_ratio": -0.5875697731971741, "logits/chosen": 0.20271439850330353, "logits/rejected": -0.027479147538542747, "logps/chosen": -0.9410564303398132, "logps/rejected": -1.4128764867782593, "loss": 1.2279, "nll_loss": 1.2182931900024414, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09410564601421356, "rewards/margins": 0.04718201607465744, "rewards/rejected": -0.1412876546382904, "step": 1130 }, { "epoch": 0.31924618521904224, "grad_norm": 0.5234375, "learning_rate": 4.303420347842669e-06, "log_odds_chosen": 0.06899069249629974, "log_odds_ratio": -0.8071345090866089, "logits/chosen": -0.03334174305200577, "logits/rejected": -0.13531741499900818, "logps/chosen": -1.0268479585647583, "logps/rejected": -1.0569186210632324, "loss": 1.3351, "nll_loss": 1.388672113418579, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10268481075763702, "rewards/margins": 0.0030070613138377666, "rewards/rejected": -0.10569186508655548, "step": 1135 }, { "epoch": 0.32065255607903803, "grad_norm": 0.392578125, "learning_rate": 4.294897095025791e-06, "log_odds_chosen": 0.4574614465236664, "log_odds_ratio": -0.6911519765853882, "logits/chosen": 0.26376354694366455, "logits/rejected": -0.10399019718170166, "logps/chosen": -1.0972447395324707, "logps/rejected": -1.4647719860076904, "loss": 1.3333, "nll_loss": 1.2746883630752563, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10972447693347931, "rewards/margins": 0.03675273805856705, "rewards/rejected": -0.14647720754146576, "step": 1140 }, { "epoch": 0.3220589269390338, "grad_norm": 1.2265625, "learning_rate": 4.2863305659255315e-06, "log_odds_chosen": 0.4631293714046478, "log_odds_ratio": -0.5717926025390625, "logits/chosen": 0.3028055727481842, "logits/rejected": 0.16470107436180115, "logps/chosen": -0.875199019908905, "logps/rejected": -1.1849122047424316, "loss": 1.3673, "nll_loss": 1.169250726699829, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08751990646123886, "rewards/margins": 0.03097131848335266, "rewards/rejected": -0.11849121749401093, "step": 1145 }, { "epoch": 0.3234652977990296, "grad_norm": 0.609375, "learning_rate": 4.277720967087181e-06, "log_odds_chosen": 0.2828753590583801, "log_odds_ratio": -0.6513091921806335, "logits/chosen": 0.23147746920585632, "logits/rejected": 0.042463745921850204, "logps/chosen": -0.9318034052848816, "logps/rejected": -1.0875053405761719, "loss": 1.2955, "nll_loss": 1.1813573837280273, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.093180350959301, "rewards/margins": 0.01557018794119358, "rewards/rejected": -0.10875053703784943, "step": 1150 }, { "epoch": 0.3248716686590254, "grad_norm": 0.4296875, "learning_rate": 4.269068506094472e-06, "log_odds_chosen": 0.3769608736038208, "log_odds_ratio": -0.6138890981674194, "logits/chosen": 0.09225358814001083, "logits/rejected": 0.04900515824556351, "logps/chosen": -0.7790621519088745, "logps/rejected": -1.01277756690979, "loss": 1.3202, "nll_loss": 1.2686938047409058, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07790622860193253, "rewards/margins": 0.023371532559394836, "rewards/rejected": -0.10127775371074677, "step": 1155 }, { "epoch": 0.3262780395190212, "grad_norm": 0.40625, "learning_rate": 4.2603733915645776e-06, "log_odds_chosen": 0.3151751160621643, "log_odds_ratio": -0.7018331289291382, "logits/chosen": 0.030745208263397217, "logits/rejected": -0.0075850216671824455, "logps/chosen": -0.8284457325935364, "logps/rejected": -1.0537253618240356, "loss": 1.3178, "nll_loss": 1.3805687427520752, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0828445702791214, "rewards/margins": 0.022527966648340225, "rewards/rejected": -0.10537254810333252, "step": 1160 }, { "epoch": 0.32768441037901697, "grad_norm": 1.2265625, "learning_rate": 4.251635833143075e-06, "log_odds_chosen": 0.6363018155097961, "log_odds_ratio": -0.5621328949928284, "logits/chosen": -0.004075920674949884, "logits/rejected": 0.024677347391843796, "logps/chosen": -0.843280017375946, "logps/rejected": -1.1920336484909058, "loss": 1.2733, "nll_loss": 1.3524099588394165, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08432799577713013, "rewards/margins": 0.034875381737947464, "rewards/rejected": -0.11920337378978729, "step": 1165 }, { "epoch": 0.32909078123901275, "grad_norm": 0.96875, "learning_rate": 4.242856041498895e-06, "log_odds_chosen": 0.2801567614078522, "log_odds_ratio": -0.6821891665458679, "logits/chosen": -0.19107994437217712, "logits/rejected": 0.10342366993427277, "logps/chosen": -0.9781293869018555, "logps/rejected": -1.211730718612671, "loss": 1.2636, "nll_loss": 1.2653511762619019, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0978129506111145, "rewards/margins": 0.023360123857855797, "rewards/rejected": -0.12117306888103485, "step": 1170 }, { "epoch": 0.3304971520990085, "grad_norm": 0.333984375, "learning_rate": 4.2340342283192456e-06, "log_odds_chosen": -0.11513074487447739, "log_odds_ratio": -0.832987904548645, "logits/chosen": 0.3309639096260071, "logits/rejected": -0.025387341156601906, "logps/chosen": -0.9378958940505981, "logps/rejected": -0.9139649271965027, "loss": 1.32, "nll_loss": 1.1061030626296997, "rewards/accuracies": 0.5, "rewards/chosen": -0.09378959238529205, "rewards/margins": -0.002393099246546626, "rewards/rejected": -0.09139649569988251, "step": 1175 }, { "epoch": 0.33190352295900427, "grad_norm": 0.609375, "learning_rate": 4.2251706063045025e-06, "log_odds_chosen": 0.20498593151569366, "log_odds_ratio": -0.6471208333969116, "logits/chosen": 0.05163681507110596, "logits/rejected": 0.05878226086497307, "logps/chosen": -0.9705168604850769, "logps/rejected": -1.1223108768463135, "loss": 1.2788, "nll_loss": 1.2215819358825684, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09705167263746262, "rewards/margins": 0.015179403126239777, "rewards/rejected": -0.11223109066486359, "step": 1180 }, { "epoch": 0.33330989381900006, "grad_norm": 1.15625, "learning_rate": 4.216265389163083e-06, "log_odds_chosen": 0.6214634776115417, "log_odds_ratio": -0.5356272459030151, "logits/chosen": -0.01187597680836916, "logits/rejected": 0.1219060868024826, "logps/chosen": -0.8745874166488647, "logps/rejected": -1.199029564857483, "loss": 1.3475, "nll_loss": 1.2316919565200806, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08745874464511871, "rewards/margins": 0.03244420513510704, "rewards/rejected": -0.11990294605493546, "step": 1185 }, { "epoch": 0.33471626467899585, "grad_norm": 1.4921875, "learning_rate": 4.207318791606296e-06, "log_odds_chosen": 0.26392292976379395, "log_odds_ratio": -0.6821704506874084, "logits/chosen": 0.11818800866603851, "logits/rejected": -0.13393503427505493, "logps/chosen": -0.9134756922721863, "logps/rejected": -1.0980814695358276, "loss": 1.318, "nll_loss": 1.3457120656967163, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09134756773710251, "rewards/margins": 0.01846056990325451, "rewards/rejected": -0.10980813205242157, "step": 1190 }, { "epoch": 0.33612263553899163, "grad_norm": 0.7578125, "learning_rate": 4.198331029343156e-06, "log_odds_chosen": 0.4352284371852875, "log_odds_ratio": -0.6051638722419739, "logits/chosen": 0.1391836702823639, "logits/rejected": -0.012159859761595726, "logps/chosen": -0.9116001129150391, "logps/rejected": -1.1639275550842285, "loss": 1.2891, "nll_loss": 1.2817579507827759, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09116000682115555, "rewards/margins": 0.02523273602128029, "rewards/rejected": -0.11639275401830673, "step": 1195 }, { "epoch": 0.3375290063989874, "grad_norm": 0.77734375, "learning_rate": 4.189302319075195e-06, "log_odds_chosen": 0.15520837903022766, "log_odds_ratio": -0.7217892408370972, "logits/chosen": 0.19287073612213135, "logits/rejected": 0.0029908656142652035, "logps/chosen": -0.9474450945854187, "logps/rejected": -0.98936527967453, "loss": 1.2673, "nll_loss": 1.23770010471344, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09474451839923859, "rewards/margins": 0.004192027263343334, "rewards/rejected": -0.0989365354180336, "step": 1200 }, { "epoch": 0.3389353772589832, "grad_norm": 1.1484375, "learning_rate": 4.18023287849123e-06, "log_odds_chosen": 0.15082181990146637, "log_odds_ratio": -0.7107568979263306, "logits/chosen": 0.04572378844022751, "logits/rejected": -0.023113315925002098, "logps/chosen": -0.9957239031791687, "logps/rejected": -1.001149296760559, "loss": 1.2459, "nll_loss": 1.250544786453247, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09957239776849747, "rewards/margins": 0.000542531895916909, "rewards/rejected": -0.10011491924524307, "step": 1205 }, { "epoch": 0.340341748118979, "grad_norm": 0.58203125, "learning_rate": 4.1711229262621145e-06, "log_odds_chosen": 0.30810657143592834, "log_odds_ratio": -0.6075814366340637, "logits/chosen": 0.3693988621234894, "logits/rejected": -0.019700681790709496, "logps/chosen": -0.8349758386611938, "logps/rejected": -1.0323964357376099, "loss": 1.2642, "nll_loss": 1.0299533605575562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08349757641553879, "rewards/margins": 0.01974206045269966, "rewards/rejected": -0.10323964059352875, "step": 1210 }, { "epoch": 0.3417481189789748, "grad_norm": 1.1953125, "learning_rate": 4.161972682035469e-06, "log_odds_chosen": 0.0991004928946495, "log_odds_ratio": -0.6919318437576294, "logits/chosen": 0.1498071849346161, "logits/rejected": -0.09738024324178696, "logps/chosen": -0.8192486763000488, "logps/rejected": -0.9177687764167786, "loss": 1.3983, "nll_loss": 1.2638217210769653, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08192487061023712, "rewards/margins": 0.009852008894085884, "rewards/rejected": -0.09177687764167786, "step": 1215 }, { "epoch": 0.3431544898389705, "grad_norm": 0.4609375, "learning_rate": 4.152782366430381e-06, "log_odds_chosen": 0.5309596061706543, "log_odds_ratio": -0.560706377029419, "logits/chosen": 0.08695764094591141, "logits/rejected": -0.2072925567626953, "logps/chosen": -0.8290009498596191, "logps/rejected": -1.2268956899642944, "loss": 1.2977, "nll_loss": 1.3526235818862915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08290009945631027, "rewards/margins": 0.03978949040174484, "rewards/rejected": -0.12268956750631332, "step": 1220 }, { "epoch": 0.3445608606989663, "grad_norm": 1.03125, "learning_rate": 4.143552201032092e-06, "log_odds_chosen": 0.3517477810382843, "log_odds_ratio": -0.5913905501365662, "logits/chosen": 0.05310209468007088, "logits/rejected": 0.03654911741614342, "logps/chosen": -0.8787948489189148, "logps/rejected": -1.1121970415115356, "loss": 1.2062, "nll_loss": 0.9948512315750122, "rewards/accuracies": 0.75, "rewards/chosen": -0.0878794938325882, "rewards/margins": 0.023340212181210518, "rewards/rejected": -0.11121970415115356, "step": 1225 }, { "epoch": 0.3459672315589621, "grad_norm": 0.49609375, "learning_rate": 4.134282408386646e-06, "log_odds_chosen": 0.08063089102506638, "log_odds_ratio": -0.7300704717636108, "logits/chosen": 0.2532581090927124, "logits/rejected": 0.12257635593414307, "logps/chosen": -0.9871991872787476, "logps/rejected": -1.0392825603485107, "loss": 1.2664, "nll_loss": 1.1173219680786133, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09871991723775864, "rewards/margins": 0.00520833395421505, "rewards/rejected": -0.10392825305461884, "step": 1230 }, { "epoch": 0.34737360241895787, "grad_norm": 0.455078125, "learning_rate": 4.124973211995535e-06, "log_odds_chosen": 0.13872423768043518, "log_odds_ratio": -0.7976440191268921, "logits/chosen": -0.0937075987458229, "logits/rejected": 0.05718477815389633, "logps/chosen": -0.9579688906669617, "logps/rejected": -1.0377689599990845, "loss": 1.3545, "nll_loss": 1.3179527521133423, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09579687565565109, "rewards/margins": 0.00798002164810896, "rewards/rejected": -0.10377690941095352, "step": 1235 }, { "epoch": 0.34877997327895366, "grad_norm": 0.9453125, "learning_rate": 4.1156248363103e-06, "log_odds_chosen": 0.17224308848381042, "log_odds_ratio": -0.7132772207260132, "logits/chosen": 0.18819832801818848, "logits/rejected": -0.1392843872308731, "logps/chosen": -0.8691417574882507, "logps/rejected": -1.0359532833099365, "loss": 1.3706, "nll_loss": 1.304761528968811, "rewards/accuracies": 0.5, "rewards/chosen": -0.08691417425870895, "rewards/margins": 0.016681160777807236, "rewards/rejected": -0.10359533876180649, "step": 1240 }, { "epoch": 0.35018634413894945, "grad_norm": 0.447265625, "learning_rate": 4.1062375067271245e-06, "log_odds_chosen": 0.30390891432762146, "log_odds_ratio": -0.6522955894470215, "logits/chosen": 0.051540445536375046, "logits/rejected": 0.03162222355604172, "logps/chosen": -1.0302698612213135, "logps/rejected": -1.25276517868042, "loss": 1.3463, "nll_loss": 1.4013944864273071, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10302698612213135, "rewards/margins": 0.02224954031407833, "rewards/rejected": -0.12527652084827423, "step": 1245 }, { "epoch": 0.35159271499894523, "grad_norm": 0.341796875, "learning_rate": 4.096811449581399e-06, "log_odds_chosen": 0.3173461854457855, "log_odds_ratio": -0.6898201107978821, "logits/chosen": 0.24673572182655334, "logits/rejected": 0.1782643347978592, "logps/chosen": -0.9055770039558411, "logps/rejected": -1.2110660076141357, "loss": 1.2689, "nll_loss": 1.0442687273025513, "rewards/accuracies": 0.5, "rewards/chosen": -0.09055770188570023, "rewards/margins": 0.030548905953764915, "rewards/rejected": -0.1211066022515297, "step": 1250 }, { "epoch": 0.352999085858941, "grad_norm": 0.61328125, "learning_rate": 4.087346892142265e-06, "log_odds_chosen": 0.05978800728917122, "log_odds_ratio": -0.7154570817947388, "logits/chosen": 0.13341276347637177, "logits/rejected": -0.057683832943439484, "logps/chosen": -1.0031936168670654, "logps/rejected": -1.052685260772705, "loss": 1.2844, "nll_loss": 1.4338276386260986, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10031934827566147, "rewards/margins": 0.004949171096086502, "rewards/rejected": -0.10526851564645767, "step": 1255 }, { "epoch": 0.3544054567189368, "grad_norm": 0.80078125, "learning_rate": 4.077844062607133e-06, "log_odds_chosen": 0.08474165201187134, "log_odds_ratio": -0.6954008340835571, "logits/chosen": 0.15004608035087585, "logits/rejected": 0.07776209712028503, "logps/chosen": -1.1367336511611938, "logps/rejected": -1.1752803325653076, "loss": 1.2721, "nll_loss": 1.165010690689087, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11367335170507431, "rewards/margins": 0.003854684066027403, "rewards/rejected": -0.1175280213356018, "step": 1260 }, { "epoch": 0.35581182757893254, "grad_norm": 0.69140625, "learning_rate": 4.068303190096182e-06, "log_odds_chosen": 0.5245984792709351, "log_odds_ratio": -0.5845207571983337, "logits/chosen": 0.1559588462114334, "logits/rejected": -0.036134567111730576, "logps/chosen": -0.7722035646438599, "logps/rejected": -1.1271470785140991, "loss": 1.3113, "nll_loss": 1.1578443050384521, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07722035050392151, "rewards/margins": 0.03549434617161751, "rewards/rejected": -0.11271469295024872, "step": 1265 }, { "epoch": 0.3572181984389283, "grad_norm": 1.9453125, "learning_rate": 4.058724504646834e-06, "log_odds_chosen": 0.2877119183540344, "log_odds_ratio": -0.6282137036323547, "logits/chosen": 0.3439601957798004, "logits/rejected": -0.022573407739400864, "logps/chosen": -0.9233312606811523, "logps/rejected": -1.1261590719223022, "loss": 1.3093, "nll_loss": 1.2163238525390625, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09233313053846359, "rewards/margins": 0.020282771438360214, "rewards/rejected": -0.11261589825153351, "step": 1270 }, { "epoch": 0.3586245692989241, "grad_norm": 1.3125, "learning_rate": 4.049108237208212e-06, "log_odds_chosen": 0.1699952781200409, "log_odds_ratio": -0.6839112043380737, "logits/chosen": 0.03113476000726223, "logits/rejected": 0.36601823568344116, "logps/chosen": -0.8622844815254211, "logps/rejected": -0.9559763669967651, "loss": 1.2663, "nll_loss": 1.125409722328186, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08622844517230988, "rewards/margins": 0.00936918519437313, "rewards/rejected": -0.09559763967990875, "step": 1275 }, { "epoch": 0.3600309401589199, "grad_norm": 0.5546875, "learning_rate": 4.039454619635563e-06, "log_odds_chosen": 0.10882600396871567, "log_odds_ratio": -0.7116864323616028, "logits/chosen": -0.08300259709358215, "logits/rejected": 0.009808266535401344, "logps/chosen": -0.9910035133361816, "logps/rejected": -1.0440236330032349, "loss": 1.269, "nll_loss": 1.1854488849639893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09910035878419876, "rewards/margins": 0.0053020054474473, "rewards/rejected": -0.10440234839916229, "step": 1280 }, { "epoch": 0.3614373110189157, "grad_norm": 0.9296875, "learning_rate": 4.02976388468468e-06, "log_odds_chosen": 0.3029418885707855, "log_odds_ratio": -0.6561595797538757, "logits/chosen": 0.23958845436573029, "logits/rejected": -0.4308515191078186, "logps/chosen": -0.8872898817062378, "logps/rejected": -1.0813348293304443, "loss": 1.2944, "nll_loss": 1.2912275791168213, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08872898668050766, "rewards/margins": 0.019404493272304535, "rewards/rejected": -0.1081334799528122, "step": 1285 }, { "epoch": 0.3628436818789115, "grad_norm": 0.72265625, "learning_rate": 4.020036266006276e-06, "log_odds_chosen": 0.7652324438095093, "log_odds_ratio": -0.46309083700180054, "logits/chosen": 0.2030514031648636, "logits/rejected": 0.08312040567398071, "logps/chosen": -0.8167656660079956, "logps/rejected": -1.2904177904129028, "loss": 1.321, "nll_loss": 1.096789836883545, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08167656511068344, "rewards/margins": 0.047365207225084305, "rewards/rejected": -0.12904179096221924, "step": 1290 }, { "epoch": 0.36425005273890726, "grad_norm": 0.296875, "learning_rate": 4.0102719981403625e-06, "log_odds_chosen": 0.5203356146812439, "log_odds_ratio": -0.6198484301567078, "logits/chosen": 0.061830371618270874, "logits/rejected": 0.011904525570571423, "logps/chosen": -0.842174232006073, "logps/rejected": -1.1951404809951782, "loss": 1.2896, "nll_loss": 1.170758605003357, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08421741425991058, "rewards/margins": 0.035296615213155746, "rewards/rejected": -0.11951403319835663, "step": 1295 }, { "epoch": 0.36565642359890305, "grad_norm": 0.54296875, "learning_rate": 4.000471316510588e-06, "log_odds_chosen": 0.21910062432289124, "log_odds_ratio": -0.6447972059249878, "logits/chosen": 0.1635589897632599, "logits/rejected": -0.052931904792785645, "logps/chosen": -0.8704059720039368, "logps/rejected": -1.0420140027999878, "loss": 1.4201, "nll_loss": 1.3763192892074585, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08704060316085815, "rewards/margins": 0.017160795629024506, "rewards/rejected": -0.10420139878988266, "step": 1300 }, { "epoch": 0.36706279445889883, "grad_norm": 0.96875, "learning_rate": 3.9906344574185625e-06, "log_odds_chosen": -0.06714977324008942, "log_odds_ratio": -0.8473297357559204, "logits/chosen": 0.13796642422676086, "logits/rejected": 0.08382640033960342, "logps/chosen": -1.0411431789398193, "logps/rejected": -1.025268793106079, "loss": 1.3294, "nll_loss": 1.2165088653564453, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10411433130502701, "rewards/margins": -0.001587429316714406, "rewards/rejected": -0.10252688825130463, "step": 1305 }, { "epoch": 0.36846916531889456, "grad_norm": 0.36328125, "learning_rate": 3.9807616580381645e-06, "log_odds_chosen": 0.11012011766433716, "log_odds_ratio": -0.7090429067611694, "logits/chosen": 0.2194376289844513, "logits/rejected": 0.13040907680988312, "logps/chosen": -0.9564974904060364, "logps/rejected": -1.000109076499939, "loss": 1.294, "nll_loss": 1.029192328453064, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09564974904060364, "rewards/margins": 0.0043611666187644005, "rewards/rejected": -0.10001091659069061, "step": 1310 }, { "epoch": 0.36987553617889035, "grad_norm": 0.55859375, "learning_rate": 3.970853156409816e-06, "log_odds_chosen": 0.37843549251556396, "log_odds_ratio": -0.693618655204773, "logits/chosen": 0.22795596718788147, "logits/rejected": -0.10526075214147568, "logps/chosen": -0.8523596525192261, "logps/rejected": -1.2103275060653687, "loss": 1.3429, "nll_loss": 1.0443923473358154, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08523597568273544, "rewards/margins": 0.035796768963336945, "rewards/rejected": -0.12103275209665298, "step": 1315 }, { "epoch": 0.37128190703888614, "grad_norm": 0.48046875, "learning_rate": 3.960909191434746e-06, "log_odds_chosen": 0.0975802093744278, "log_odds_ratio": -0.7350735664367676, "logits/chosen": 0.09811149537563324, "logits/rejected": -0.0825861319899559, "logps/chosen": -0.885798454284668, "logps/rejected": -0.9871622920036316, "loss": 1.3356, "nll_loss": 1.3084779977798462, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08857984840869904, "rewards/margins": 0.010136393830180168, "rewards/rejected": -0.09871623665094376, "step": 1320 }, { "epoch": 0.3726882778988819, "grad_norm": 0.5078125, "learning_rate": 3.9509300028692345e-06, "log_odds_chosen": 0.07902495563030243, "log_odds_ratio": -0.7018908262252808, "logits/chosen": 0.3462804853916168, "logits/rejected": -0.16924339532852173, "logps/chosen": -1.059748888015747, "logps/rejected": -1.1173927783966064, "loss": 1.4139, "nll_loss": 1.3403122425079346, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10597489029169083, "rewards/margins": 0.00576439592987299, "rewards/rejected": -0.11173927783966064, "step": 1325 }, { "epoch": 0.3740946487588777, "grad_norm": 0.96484375, "learning_rate": 3.940915831318824e-06, "log_odds_chosen": 0.6405239105224609, "log_odds_ratio": -0.49955257773399353, "logits/chosen": 0.21703024208545685, "logits/rejected": -0.14214935898780823, "logps/chosen": -0.8258606791496277, "logps/rejected": -1.2524826526641846, "loss": 1.3476, "nll_loss": 1.2318499088287354, "rewards/accuracies": 0.75, "rewards/chosen": -0.08258606493473053, "rewards/margins": 0.042662184685468674, "rewards/rejected": -0.1252482533454895, "step": 1330 }, { "epoch": 0.3755010196188735, "grad_norm": 0.76171875, "learning_rate": 3.930866918232525e-06, "log_odds_chosen": 0.3761736750602722, "log_odds_ratio": -0.6042557954788208, "logits/chosen": -0.05178029090166092, "logits/rejected": -0.06902565062046051, "logps/chosen": -0.9542252421379089, "logps/rejected": -1.2151601314544678, "loss": 1.2994, "nll_loss": 1.4641225337982178, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09542252868413925, "rewards/margins": 0.0260935015976429, "rewards/rejected": -0.12151602655649185, "step": 1335 }, { "epoch": 0.3769073904788693, "grad_norm": 0.4921875, "learning_rate": 3.9207835058969905e-06, "log_odds_chosen": -0.12533536553382874, "log_odds_ratio": -0.7958296537399292, "logits/chosen": 0.25583916902542114, "logits/rejected": 0.17151054739952087, "logps/chosen": -0.9483383893966675, "logps/rejected": -0.8815383911132812, "loss": 1.2106, "nll_loss": 1.078672170639038, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0948338508605957, "rewards/margins": -0.006680003367364407, "rewards/rejected": -0.08815383911132812, "step": 1340 }, { "epoch": 0.3783137613388651, "grad_norm": 0.8828125, "learning_rate": 3.910665837430676e-06, "log_odds_chosen": -0.0325247123837471, "log_odds_ratio": -0.7807704210281372, "logits/chosen": 0.04034139961004257, "logits/rejected": -0.07567404210567474, "logps/chosen": -0.9568163752555847, "logps/rejected": -0.9523313641548157, "loss": 1.3861, "nll_loss": 1.3908307552337646, "rewards/accuracies": 0.5, "rewards/chosen": -0.09568165242671967, "rewards/margins": -0.00044852012069895864, "rewards/rejected": -0.09523313492536545, "step": 1345 }, { "epoch": 0.37972013219886086, "grad_norm": 0.7265625, "learning_rate": 3.900514156777977e-06, "log_odds_chosen": 0.5176669359207153, "log_odds_ratio": -0.7029728293418884, "logits/chosen": -0.015370416454970837, "logits/rejected": -0.1847151219844818, "logps/chosen": -0.831338107585907, "logps/rejected": -1.2131564617156982, "loss": 1.3746, "nll_loss": 1.280045747756958, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08313381671905518, "rewards/margins": 0.03818183392286301, "rewards/rejected": -0.12131565809249878, "step": 1350 }, { "epoch": 0.38112650305885665, "grad_norm": 0.56640625, "learning_rate": 3.890328708703349e-06, "log_odds_chosen": 0.5413855314254761, "log_odds_ratio": -0.5090984106063843, "logits/chosen": 0.14026743173599243, "logits/rejected": 0.08661554008722305, "logps/chosen": -0.7370747327804565, "logps/rejected": -1.0743495225906372, "loss": 1.376, "nll_loss": 1.152942419052124, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0737074688076973, "rewards/margins": 0.0337274894118309, "rewards/rejected": -0.1074349656701088, "step": 1355 }, { "epoch": 0.3825328739188524, "grad_norm": 0.5859375, "learning_rate": 3.880109738785404e-06, "log_odds_chosen": 0.06151856109499931, "log_odds_ratio": -0.7875824570655823, "logits/chosen": 0.1456756442785263, "logits/rejected": 0.09310576319694519, "logps/chosen": -1.1162899732589722, "logps/rejected": -1.1293188333511353, "loss": 1.3632, "nll_loss": 1.2380468845367432, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.11162900924682617, "rewards/margins": 0.0013028827961534262, "rewards/rejected": -0.11293188482522964, "step": 1360 }, { "epoch": 0.38393924477884817, "grad_norm": 0.7734375, "learning_rate": 3.86985749341099e-06, "log_odds_chosen": 0.38305407762527466, "log_odds_ratio": -0.6281946897506714, "logits/chosen": 0.039482396095991135, "logits/rejected": -0.06709714233875275, "logps/chosen": -0.8778185844421387, "logps/rejected": -1.067224144935608, "loss": 1.3404, "nll_loss": 1.395684838294983, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0877818614244461, "rewards/margins": 0.018940549343824387, "rewards/rejected": -0.10672241449356079, "step": 1365 }, { "epoch": 0.38534561563884395, "grad_norm": 0.94921875, "learning_rate": 3.859572219769251e-06, "log_odds_chosen": 0.3674304187297821, "log_odds_ratio": -0.563770592212677, "logits/chosen": 0.11389932781457901, "logits/rejected": 0.07518327236175537, "logps/chosen": -0.8340722322463989, "logps/rejected": -1.0481908321380615, "loss": 1.3566, "nll_loss": 1.2202757596969604, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08340722322463989, "rewards/margins": 0.021411865949630737, "rewards/rejected": -0.10481909662485123, "step": 1370 }, { "epoch": 0.38675198649883974, "grad_norm": 0.5546875, "learning_rate": 3.849254165845665e-06, "log_odds_chosen": 0.5124444365501404, "log_odds_ratio": -0.5738476514816284, "logits/chosen": -0.12536796927452087, "logits/rejected": -0.03598599508404732, "logps/chosen": -0.7154397368431091, "logps/rejected": -0.9961905479431152, "loss": 1.3949, "nll_loss": 1.3729028701782227, "rewards/accuracies": 0.75, "rewards/chosen": -0.07154396921396255, "rewards/margins": 0.028075072914361954, "rewards/rejected": -0.09961903840303421, "step": 1375 }, { "epoch": 0.3881583573588355, "grad_norm": 0.4375, "learning_rate": 3.83890358041607e-06, "log_odds_chosen": 0.19218651950359344, "log_odds_ratio": -0.6770265102386475, "logits/chosen": 0.1676643192768097, "logits/rejected": 0.09929686784744263, "logps/chosen": -0.8764567375183105, "logps/rejected": -1.0005100965499878, "loss": 1.3095, "nll_loss": 1.1619200706481934, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08764568716287613, "rewards/margins": 0.012405339628458023, "rewards/rejected": -0.10005102306604385, "step": 1380 }, { "epoch": 0.3895647282188313, "grad_norm": 0.57421875, "learning_rate": 3.82852071304066e-06, "log_odds_chosen": 0.04810095950961113, "log_odds_ratio": -0.804132342338562, "logits/chosen": 0.25406962633132935, "logits/rejected": 0.07150840759277344, "logps/chosen": -0.9535115957260132, "logps/rejected": -1.0202562808990479, "loss": 1.3359, "nll_loss": 1.2485761642456055, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09535115957260132, "rewards/margins": 0.006674474570900202, "rewards/rejected": -0.10202564299106598, "step": 1385 }, { "epoch": 0.3909710990788271, "grad_norm": 0.29296875, "learning_rate": 3.818105814057971e-06, "log_odds_chosen": 0.0038131296169012785, "log_odds_ratio": -0.7812002301216125, "logits/chosen": 0.16467972099781036, "logits/rejected": 0.19734536111354828, "logps/chosen": -1.058872103691101, "logps/rejected": -1.0703850984573364, "loss": 1.3523, "nll_loss": 1.3367329835891724, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10588721185922623, "rewards/margins": 0.0011512942146509886, "rewards/rejected": -0.10703851282596588, "step": 1390 }, { "epoch": 0.3923774699388229, "grad_norm": 0.61328125, "learning_rate": 3.8076591345788434e-06, "log_odds_chosen": 0.42102327942848206, "log_odds_ratio": -0.5969361066818237, "logits/chosen": 0.015062945894896984, "logits/rejected": -0.07421614974737167, "logps/chosen": -0.8461735844612122, "logps/rejected": -1.1441727876663208, "loss": 1.3275, "nll_loss": 1.239793062210083, "rewards/accuracies": 0.75, "rewards/chosen": -0.08461736142635345, "rewards/margins": 0.0297999270260334, "rewards/rejected": -0.11441727727651596, "step": 1395 }, { "epoch": 0.3937838407988187, "grad_norm": 0.69921875, "learning_rate": 3.7971809264803705e-06, "log_odds_chosen": 0.3402322828769684, "log_odds_ratio": -0.7314016819000244, "logits/chosen": 0.1510065346956253, "logits/rejected": 0.2304486781358719, "logps/chosen": -0.8683599233627319, "logps/rejected": -1.0512754917144775, "loss": 1.1824, "nll_loss": 1.1514182090759277, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08683599531650543, "rewards/margins": 0.018291553482413292, "rewards/rejected": -0.10512755066156387, "step": 1400 }, { "epoch": 0.3951902116588144, "grad_norm": 0.55078125, "learning_rate": 3.786671442399823e-06, "log_odds_chosen": 0.2950545847415924, "log_odds_ratio": -0.6508955955505371, "logits/chosen": 0.07452909648418427, "logits/rejected": 0.03641175478696823, "logps/chosen": -0.9904881715774536, "logps/rejected": -1.1894201040267944, "loss": 1.3093, "nll_loss": 1.2014591693878174, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09904881566762924, "rewards/margins": 0.01989319920539856, "rewards/rejected": -0.1189420074224472, "step": 1405 }, { "epoch": 0.3965965825188102, "grad_norm": 0.71875, "learning_rate": 3.776130935728558e-06, "log_odds_chosen": 0.957088828086853, "log_odds_ratio": -0.46606189012527466, "logits/chosen": 0.16692259907722473, "logits/rejected": 0.09750144928693771, "logps/chosen": -0.6723443865776062, "logps/rejected": -1.2955124378204346, "loss": 1.2867, "nll_loss": 1.1986668109893799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06723444163799286, "rewards/margins": 0.06231679767370224, "rewards/rejected": -0.1295512467622757, "step": 1410 }, { "epoch": 0.398002953378806, "grad_norm": 0.41796875, "learning_rate": 3.7655596606059095e-06, "log_odds_chosen": 0.018707597628235817, "log_odds_ratio": -0.7543188333511353, "logits/chosen": 0.2600110173225403, "logits/rejected": -0.2204551249742508, "logps/chosen": -0.9564399719238281, "logps/rejected": -0.9729903340339661, "loss": 1.3391, "nll_loss": 1.373101830482483, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09564399719238281, "rewards/margins": 0.0016550387954339385, "rewards/rejected": -0.09729902446269989, "step": 1415 }, { "epoch": 0.39940932423880177, "grad_norm": 0.494140625, "learning_rate": 3.754957871913064e-06, "log_odds_chosen": 0.3720802366733551, "log_odds_ratio": -0.6222091913223267, "logits/chosen": 0.11969141662120819, "logits/rejected": 0.029351050034165382, "logps/chosen": -1.0278053283691406, "logps/rejected": -1.3334497213363647, "loss": 1.2639, "nll_loss": 1.2064927816390991, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1027805432677269, "rewards/margins": 0.030564438551664352, "rewards/rejected": -0.13334497809410095, "step": 1420 }, { "epoch": 0.40081569509879755, "grad_norm": 0.8203125, "learning_rate": 3.7443258252669084e-06, "log_odds_chosen": 0.3200578987598419, "log_odds_ratio": -0.6280742883682251, "logits/chosen": 0.06024733930826187, "logits/rejected": 0.02671833336353302, "logps/chosen": -0.8416322469711304, "logps/rejected": -1.0510714054107666, "loss": 1.3019, "nll_loss": 1.1888415813446045, "rewards/accuracies": 0.75, "rewards/chosen": -0.08416323363780975, "rewards/margins": 0.020943904295563698, "rewards/rejected": -0.1051071435213089, "step": 1425 }, { "epoch": 0.40222206595879334, "grad_norm": 0.84765625, "learning_rate": 3.733663777013875e-06, "log_odds_chosen": 0.4754953980445862, "log_odds_ratio": -0.5904639363288879, "logits/chosen": 0.08866497129201889, "logits/rejected": 0.1178668737411499, "logps/chosen": -0.8428691029548645, "logps/rejected": -1.0717285871505737, "loss": 1.3726, "nll_loss": 1.2191121578216553, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08428691327571869, "rewards/margins": 0.022885948419570923, "rewards/rejected": -0.10717286169528961, "step": 1430 }, { "epoch": 0.4036284368187891, "grad_norm": 0.48828125, "learning_rate": 3.7229719842237545e-06, "log_odds_chosen": 0.5185042023658752, "log_odds_ratio": -0.541755199432373, "logits/chosen": 0.03424149751663208, "logits/rejected": -0.25478774309158325, "logps/chosen": -0.9219148755073547, "logps/rejected": -1.2493301630020142, "loss": 1.2789, "nll_loss": 1.1576497554779053, "rewards/accuracies": 0.75, "rewards/chosen": -0.09219150245189667, "rewards/margins": 0.032741524279117584, "rewards/rejected": -0.12493302673101425, "step": 1435 }, { "epoch": 0.4050348076787849, "grad_norm": 0.71484375, "learning_rate": 3.712250704683501e-06, "log_odds_chosen": 0.6584421992301941, "log_odds_ratio": -0.557796835899353, "logits/chosen": 0.2356627881526947, "logits/rejected": 0.009463606402277946, "logps/chosen": -0.8283861875534058, "logps/rejected": -1.178853154182434, "loss": 1.1522, "nll_loss": 1.0821020603179932, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08283861726522446, "rewards/margins": 0.035046692937612534, "rewards/rejected": -0.11788531392812729, "step": 1440 }, { "epoch": 0.4064411785387807, "grad_norm": 0.68359375, "learning_rate": 3.701500196891015e-06, "log_odds_chosen": 0.2571748197078705, "log_odds_ratio": -0.6834226846694946, "logits/chosen": -0.009924083948135376, "logits/rejected": -0.04143872857093811, "logps/chosen": -0.9206323623657227, "logps/rejected": -1.069331407546997, "loss": 1.331, "nll_loss": 1.2972078323364258, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09206323325634003, "rewards/margins": 0.014869892969727516, "rewards/rejected": -0.10693313926458359, "step": 1445 }, { "epoch": 0.40784754939877643, "grad_norm": 0.671875, "learning_rate": 3.690720720048913e-06, "log_odds_chosen": 0.25981101393699646, "log_odds_ratio": -0.6484376192092896, "logits/chosen": 0.28591400384902954, "logits/rejected": -0.010500210337340832, "logps/chosen": -0.9713215827941895, "logps/rejected": -1.1231167316436768, "loss": 1.3804, "nll_loss": 1.1480700969696045, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09713216125965118, "rewards/margins": 0.015179498121142387, "rewards/rejected": -0.11231166124343872, "step": 1450 }, { "epoch": 0.4092539202587722, "grad_norm": 1.078125, "learning_rate": 3.6799125340582742e-06, "log_odds_chosen": 0.1738591492176056, "log_odds_ratio": -0.7462955713272095, "logits/chosen": 0.19391432404518127, "logits/rejected": -0.003649419639259577, "logps/chosen": -0.9834516644477844, "logps/rejected": -1.057770013809204, "loss": 1.3333, "nll_loss": 1.0925512313842773, "rewards/accuracies": 0.5, "rewards/chosen": -0.09834517538547516, "rewards/margins": 0.007431824691593647, "rewards/rejected": -0.10577700287103653, "step": 1455 }, { "epoch": 0.410660291118768, "grad_norm": 0.69921875, "learning_rate": 3.6690758995123788e-06, "log_odds_chosen": 0.5114420652389526, "log_odds_ratio": -0.5689659714698792, "logits/chosen": 0.2528618276119232, "logits/rejected": 0.020400792360305786, "logps/chosen": -0.7881637215614319, "logps/rejected": -1.0973840951919556, "loss": 1.3211, "nll_loss": 1.1730833053588867, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07881636917591095, "rewards/margins": 0.030922044068574905, "rewards/rejected": -0.10973842442035675, "step": 1460 }, { "epoch": 0.4120666619787638, "grad_norm": 0.470703125, "learning_rate": 3.658211077690421e-06, "log_odds_chosen": 0.4712817072868347, "log_odds_ratio": -0.6004756689071655, "logits/chosen": 0.09132170677185059, "logits/rejected": 0.2290278673171997, "logps/chosen": -0.8380820155143738, "logps/rejected": -1.1644837856292725, "loss": 1.2326, "nll_loss": 1.0265964269638062, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08380821347236633, "rewards/margins": 0.03264017030596733, "rewards/rejected": -0.11644838005304337, "step": 1465 }, { "epoch": 0.4134730328387596, "grad_norm": 1.2734375, "learning_rate": 3.6473183305512118e-06, "log_odds_chosen": 0.6088531613349915, "log_odds_ratio": -0.5412156581878662, "logits/chosen": 0.06665558367967606, "logits/rejected": 0.12135007232427597, "logps/chosen": -0.8178361654281616, "logps/rejected": -1.1795545816421509, "loss": 1.2037, "nll_loss": 0.9547508955001831, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08178362995386124, "rewards/margins": 0.036171846091747284, "rewards/rejected": -0.11795546859502792, "step": 1470 }, { "epoch": 0.41487940369875537, "grad_norm": 0.474609375, "learning_rate": 3.636397920726861e-06, "log_odds_chosen": 0.03626465052366257, "log_odds_ratio": -0.7632964849472046, "logits/chosen": -0.06508911401033401, "logits/rejected": 0.015456904657185078, "logps/chosen": -0.8522012829780579, "logps/rejected": -0.8960638046264648, "loss": 1.3382, "nll_loss": 1.295910120010376, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08522014319896698, "rewards/margins": 0.004386237356811762, "rewards/rejected": -0.089606374502182, "step": 1475 }, { "epoch": 0.41628577455875115, "grad_norm": 0.4375, "learning_rate": 3.6254501115164477e-06, "log_odds_chosen": 0.07423652708530426, "log_odds_ratio": -0.7656761407852173, "logits/chosen": -0.07237622886896133, "logits/rejected": 0.10565494000911713, "logps/chosen": -1.0656932592391968, "logps/rejected": -1.0861456394195557, "loss": 1.332, "nll_loss": 1.252655267715454, "rewards/accuracies": 0.5, "rewards/chosen": -0.1065693125128746, "rewards/margins": 0.0020452491007745266, "rewards/rejected": -0.10861456394195557, "step": 1480 }, { "epoch": 0.41769214541874694, "grad_norm": 0.44140625, "learning_rate": 3.6144751668796672e-06, "log_odds_chosen": 0.17315946519374847, "log_odds_ratio": -0.6867714524269104, "logits/chosen": 0.13648071885108948, "logits/rejected": 0.09958993643522263, "logps/chosen": -0.8613283038139343, "logps/rejected": -0.9445828199386597, "loss": 1.2987, "nll_loss": 1.1979812383651733, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08613282442092896, "rewards/margins": 0.008325454778969288, "rewards/rejected": -0.09445827454328537, "step": 1485 }, { "epoch": 0.4190985162787427, "grad_norm": 0.52734375, "learning_rate": 3.603473351430474e-06, "log_odds_chosen": 0.17239415645599365, "log_odds_ratio": -0.7343893647193909, "logits/chosen": 0.09099440276622772, "logits/rejected": 0.07965691387653351, "logps/chosen": -1.1169488430023193, "logps/rejected": -1.2787196636199951, "loss": 1.3347, "nll_loss": 1.287948489189148, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11169488728046417, "rewards/margins": 0.016177091747522354, "rewards/rejected": -0.12787196040153503, "step": 1490 }, { "epoch": 0.42050488713873846, "grad_norm": 0.61328125, "learning_rate": 3.592444930430693e-06, "log_odds_chosen": 0.5300126075744629, "log_odds_ratio": -0.5818522572517395, "logits/chosen": -0.03600483015179634, "logits/rejected": 0.10826855897903442, "logps/chosen": -0.8172906041145325, "logps/rejected": -1.1227980852127075, "loss": 1.313, "nll_loss": 1.1631724834442139, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08172906935214996, "rewards/margins": 0.030550751835107803, "rewards/rejected": -0.11227981001138687, "step": 1495 }, { "epoch": 0.42191125799873425, "grad_norm": 0.33984375, "learning_rate": 3.581390169783633e-06, "log_odds_chosen": 0.5500032305717468, "log_odds_ratio": -0.531952977180481, "logits/chosen": 0.2329874336719513, "logits/rejected": -0.09679488837718964, "logps/chosen": -0.7500187158584595, "logps/rejected": -1.0893311500549316, "loss": 1.3202, "nll_loss": 1.3443794250488281, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07500188052654266, "rewards/margins": 0.03393123671412468, "rewards/rejected": -0.10893311351537704, "step": 1500 }, { "epoch": 0.42331762885873003, "grad_norm": 0.859375, "learning_rate": 3.570309336027667e-06, "log_odds_chosen": 0.20320725440979004, "log_odds_ratio": -0.7183516621589661, "logits/chosen": -0.14446063339710236, "logits/rejected": 0.05480308085680008, "logps/chosen": -0.967627227306366, "logps/rejected": -1.0267921686172485, "loss": 1.2588, "nll_loss": 1.4239879846572876, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09676271677017212, "rewards/margins": 0.005916501395404339, "rewards/rejected": -0.10267921537160873, "step": 1505 }, { "epoch": 0.4247239997187258, "grad_norm": 0.5625, "learning_rate": 3.559202696329812e-06, "log_odds_chosen": 0.3492463529109955, "log_odds_ratio": -0.6220179796218872, "logits/chosen": 0.3201816976070404, "logits/rejected": 0.06840449571609497, "logps/chosen": -0.9060875177383423, "logps/rejected": -1.1536937952041626, "loss": 1.3733, "nll_loss": 1.1420310735702515, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09060876071453094, "rewards/margins": 0.024760618805885315, "rewards/rejected": -0.11536937952041626, "step": 1510 }, { "epoch": 0.4261303705787216, "grad_norm": 0.494140625, "learning_rate": 3.548070518479285e-06, "log_odds_chosen": 0.14108441770076752, "log_odds_ratio": -0.698050856590271, "logits/chosen": 0.09484975039958954, "logits/rejected": -0.1475522667169571, "logps/chosen": -0.8964487910270691, "logps/rejected": -0.9797111749649048, "loss": 1.4352, "nll_loss": 1.370657205581665, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08964487910270691, "rewards/margins": 0.008326229639351368, "rewards/rejected": -0.097971111536026, "step": 1515 }, { "epoch": 0.4275367414387174, "grad_norm": 1.2421875, "learning_rate": 3.5369130708810457e-06, "log_odds_chosen": 0.20812788605690002, "log_odds_ratio": -0.7099069356918335, "logits/chosen": 0.06524882465600967, "logits/rejected": 0.09684231877326965, "logps/chosen": -1.0159488916397095, "logps/rejected": -1.0735489130020142, "loss": 1.3004, "nll_loss": 1.3176088333129883, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10159488767385483, "rewards/margins": 0.005759999621659517, "rewards/rejected": -0.10735489428043365, "step": 1520 }, { "epoch": 0.4289431122987132, "grad_norm": 1.15625, "learning_rate": 3.525730622549327e-06, "log_odds_chosen": 0.3095996081829071, "log_odds_ratio": -0.656899631023407, "logits/chosen": 0.13213399052619934, "logits/rejected": 0.025996968150138855, "logps/chosen": -0.9443826675415039, "logps/rejected": -1.1343662738800049, "loss": 1.3466, "nll_loss": 1.3322721719741821, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09443826973438263, "rewards/margins": 0.01899835839867592, "rewards/rejected": -0.11343662440776825, "step": 1525 }, { "epoch": 0.43034948315870897, "grad_norm": 0.76171875, "learning_rate": 3.5145234431011455e-06, "log_odds_chosen": 0.1597270667552948, "log_odds_ratio": -0.6989853978157043, "logits/chosen": 0.27805274724960327, "logits/rejected": 0.14623470604419708, "logps/chosen": -0.9531451463699341, "logps/rejected": -1.0427472591400146, "loss": 1.3183, "nll_loss": 1.1004104614257812, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09531451761722565, "rewards/margins": 0.00896020419895649, "rewards/rejected": -0.10427472740411758, "step": 1530 }, { "epoch": 0.43175585401870475, "grad_norm": 0.59375, "learning_rate": 3.503291802749807e-06, "log_odds_chosen": -0.03711827099323273, "log_odds_ratio": -0.8125853538513184, "logits/chosen": 0.1721441000699997, "logits/rejected": -0.18884584307670593, "logps/chosen": -1.061238169670105, "logps/rejected": -1.0077035427093506, "loss": 1.3304, "nll_loss": 1.3840948343276978, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10612382739782333, "rewards/margins": -0.0053534661419689655, "rewards/rejected": -0.10077036917209625, "step": 1535 }, { "epoch": 0.4331622248787005, "grad_norm": 0.94921875, "learning_rate": 3.492035972298384e-06, "log_odds_chosen": 0.5267351269721985, "log_odds_ratio": -0.5846163034439087, "logits/chosen": 0.30322569608688354, "logits/rejected": 0.09838562458753586, "logps/chosen": -0.8233901858329773, "logps/rejected": -1.1212366819381714, "loss": 1.2286, "nll_loss": 1.1432218551635742, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08233902603387833, "rewards/margins": 0.02978464961051941, "rewards/rejected": -0.11212366819381714, "step": 1540 }, { "epoch": 0.4345685957386963, "grad_norm": 0.56640625, "learning_rate": 3.480756223133192e-06, "log_odds_chosen": 0.5206764340400696, "log_odds_ratio": -0.5795222520828247, "logits/chosen": 0.22843270003795624, "logits/rejected": -0.002892266260460019, "logps/chosen": -0.8150238990783691, "logps/rejected": -1.0866426229476929, "loss": 1.3497, "nll_loss": 1.199294924736023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08150239288806915, "rewards/margins": 0.027161872014403343, "rewards/rejected": -0.10866427421569824, "step": 1545 }, { "epoch": 0.43597496659869206, "grad_norm": 0.4765625, "learning_rate": 3.469452827217244e-06, "log_odds_chosen": 0.31150805950164795, "log_odds_ratio": -0.6585651636123657, "logits/chosen": 0.022850574925541878, "logits/rejected": -0.079190194606781, "logps/chosen": -0.8741966485977173, "logps/rejected": -1.0799192190170288, "loss": 1.3772, "nll_loss": 1.3223216533660889, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08741967380046844, "rewards/margins": 0.02057226002216339, "rewards/rejected": -0.10799191892147064, "step": 1550 }, { "epoch": 0.43738133745868785, "grad_norm": 0.51171875, "learning_rate": 3.4581260570836923e-06, "log_odds_chosen": 0.3546622693538666, "log_odds_ratio": -0.5984280705451965, "logits/chosen": 0.21791966259479523, "logits/rejected": 0.12619267404079437, "logps/chosen": -0.8225027322769165, "logps/rejected": -1.0330629348754883, "loss": 1.3622, "nll_loss": 1.3160654306411743, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08225028216838837, "rewards/margins": 0.02105601504445076, "rewards/rejected": -0.10330629348754883, "step": 1555 }, { "epoch": 0.43878770831868363, "grad_norm": 0.91796875, "learning_rate": 3.4467761858292597e-06, "log_odds_chosen": 0.2265872061252594, "log_odds_ratio": -0.7681325078010559, "logits/chosen": 0.1044432520866394, "logits/rejected": 0.03168236091732979, "logps/chosen": -1.058218240737915, "logps/rejected": -1.1245912313461304, "loss": 1.3188, "nll_loss": 1.2893956899642944, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10582182556390762, "rewards/margins": 0.006637311074882746, "rewards/rejected": -0.11245913803577423, "step": 1560 }, { "epoch": 0.4401940791786794, "grad_norm": 0.6875, "learning_rate": 3.4354034871076535e-06, "log_odds_chosen": 0.17266540229320526, "log_odds_ratio": -0.6633031368255615, "logits/chosen": 0.2871550917625427, "logits/rejected": 0.03216805309057236, "logps/chosen": -0.852996826171875, "logps/rejected": -0.9725733995437622, "loss": 1.2719, "nll_loss": 1.1104490756988525, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08529968559741974, "rewards/margins": 0.011957659386098385, "rewards/rejected": -0.0972573384642601, "step": 1565 }, { "epoch": 0.4416004500386752, "grad_norm": 0.5625, "learning_rate": 3.4240082351229698e-06, "log_odds_chosen": 0.9087265729904175, "log_odds_ratio": -0.4688630998134613, "logits/chosen": 0.3752935528755188, "logits/rejected": -0.13310836255550385, "logps/chosen": -0.7691707015037537, "logps/rejected": -1.356413722038269, "loss": 1.3106, "nll_loss": 1.174346923828125, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07691706717014313, "rewards/margins": 0.05872431397438049, "rewards/rejected": -0.13564138114452362, "step": 1570 }, { "epoch": 0.443006820898671, "grad_norm": 0.765625, "learning_rate": 3.4125907046230765e-06, "log_odds_chosen": 0.3072708249092102, "log_odds_ratio": -0.6539907455444336, "logits/chosen": 0.16183142364025116, "logits/rejected": -0.0026418864727020264, "logps/chosen": -1.0090245008468628, "logps/rejected": -1.166789174079895, "loss": 1.2752, "nll_loss": 1.4169988632202148, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10090246051549911, "rewards/margins": 0.015776459127664566, "rewards/rejected": -0.11667891591787338, "step": 1575 }, { "epoch": 0.4444131917586668, "grad_norm": 1.0234375, "learning_rate": 3.4011511708929966e-06, "log_odds_chosen": 0.28264811635017395, "log_odds_ratio": -0.6940265893936157, "logits/chosen": 0.16960462927818298, "logits/rejected": -0.06453105062246323, "logps/chosen": -0.9375308752059937, "logps/rejected": -1.095520257949829, "loss": 1.2216, "nll_loss": 1.189095377922058, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09375307708978653, "rewards/margins": 0.015798933804035187, "rewards/rejected": -0.10955201089382172, "step": 1580 }, { "epoch": 0.44581956261866257, "grad_norm": 0.93359375, "learning_rate": 3.389689909748265e-06, "log_odds_chosen": 0.2239263951778412, "log_odds_ratio": -0.6509321331977844, "logits/chosen": 0.04326072335243225, "logits/rejected": 0.18090423941612244, "logps/chosen": -0.9276517033576965, "logps/rejected": -1.0240963697433472, "loss": 1.2754, "nll_loss": 1.1569693088531494, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09276516735553741, "rewards/margins": 0.009644483216106892, "rewards/rejected": -0.10240964591503143, "step": 1585 }, { "epoch": 0.4472259334786583, "grad_norm": 0.4453125, "learning_rate": 3.378207197528282e-06, "log_odds_chosen": 0.42619529366493225, "log_odds_ratio": -0.6441267132759094, "logits/chosen": 0.13767486810684204, "logits/rejected": -0.05010326951742172, "logps/chosen": -0.9565455317497253, "logps/rejected": -1.2347350120544434, "loss": 1.3051, "nll_loss": 1.1900882720947266, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09565454721450806, "rewards/margins": 0.027818959206342697, "rewards/rejected": -0.12347351014614105, "step": 1590 }, { "epoch": 0.4486323043386541, "grad_norm": 1.8828125, "learning_rate": 3.3667033110896476e-06, "log_odds_chosen": 0.4609476923942566, "log_odds_ratio": -0.642728328704834, "logits/chosen": 0.09757024794816971, "logits/rejected": 0.0014503851998597383, "logps/chosen": -0.936115562915802, "logps/rejected": -1.218464970588684, "loss": 1.4283, "nll_loss": 1.3003675937652588, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09361156076192856, "rewards/margins": 0.028234923258423805, "rewards/rejected": -0.12184648215770721, "step": 1595 }, { "epoch": 0.4500386751986499, "grad_norm": 0.3359375, "learning_rate": 3.355178527799487e-06, "log_odds_chosen": 0.04692380875349045, "log_odds_ratio": -0.7183758616447449, "logits/chosen": 0.22514934837818146, "logits/rejected": -0.017583077773451805, "logps/chosen": -0.9809813499450684, "logps/rejected": -1.0266735553741455, "loss": 1.2577, "nll_loss": 1.283029556274414, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09809814393520355, "rewards/margins": 0.004569205921143293, "rewards/rejected": -0.10266734659671783, "step": 1600 }, { "epoch": 0.45144504605864566, "grad_norm": 1.046875, "learning_rate": 3.343633125528766e-06, "log_odds_chosen": 0.7795692682266235, "log_odds_ratio": -0.4920505881309509, "logits/chosen": -0.039743535220623016, "logits/rejected": -0.005208232905715704, "logps/chosen": -0.7143467664718628, "logps/rejected": -1.1701852083206177, "loss": 1.3277, "nll_loss": 1.2369157075881958, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07143466919660568, "rewards/margins": 0.045583855360746384, "rewards/rejected": -0.11701853573322296, "step": 1605 }, { "epoch": 0.45285141691864145, "grad_norm": 1.3671875, "learning_rate": 3.3320673826455898e-06, "log_odds_chosen": 0.4412851929664612, "log_odds_ratio": -0.587189793586731, "logits/chosen": 0.08623610436916351, "logits/rejected": -0.03705819323658943, "logps/chosen": -0.8188657760620117, "logps/rejected": -1.0730937719345093, "loss": 1.3171, "nll_loss": 1.1836116313934326, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08188657462596893, "rewards/margins": 0.025422796607017517, "rewards/rejected": -0.10730937868356705, "step": 1610 }, { "epoch": 0.45425778777863723, "grad_norm": 0.578125, "learning_rate": 3.3204815780084853e-06, "log_odds_chosen": 0.28102391958236694, "log_odds_ratio": -0.642076849937439, "logits/chosen": 0.17856454849243164, "logits/rejected": 0.09094846248626709, "logps/chosen": -0.8270488977432251, "logps/rejected": -1.0271437168121338, "loss": 1.3033, "nll_loss": 1.156569242477417, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08270487934350967, "rewards/margins": 0.0200094822794199, "rewards/rejected": -0.10271435976028442, "step": 1615 }, { "epoch": 0.455664158638633, "grad_norm": 0.443359375, "learning_rate": 3.3088759909596906e-06, "log_odds_chosen": -0.021066760644316673, "log_odds_ratio": -0.7931792736053467, "logits/chosen": 0.09776515513658524, "logits/rejected": 0.06629323214292526, "logps/chosen": -1.0177059173583984, "logps/rejected": -0.9952551126480103, "loss": 1.3109, "nll_loss": 1.226104497909546, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10177057981491089, "rewards/margins": -0.0022450797259807587, "rewards/rejected": -0.09952551126480103, "step": 1620 }, { "epoch": 0.4570705294986288, "grad_norm": 0.703125, "learning_rate": 3.2972509013184063e-06, "log_odds_chosen": 0.3991120457649231, "log_odds_ratio": -0.6175200939178467, "logits/chosen": 0.21924090385437012, "logits/rejected": 0.07826922088861465, "logps/chosen": -0.8282210230827332, "logps/rejected": -1.032022476196289, "loss": 1.2812, "nll_loss": 1.088661789894104, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08282209932804108, "rewards/margins": 0.020380137488245964, "rewards/rejected": -0.10320223867893219, "step": 1625 }, { "epoch": 0.4584769003586246, "grad_norm": 0.55859375, "learning_rate": 3.285606589374056e-06, "log_odds_chosen": 0.10021786391735077, "log_odds_ratio": -0.7633699178695679, "logits/chosen": 0.1093636304140091, "logits/rejected": -0.02713550068438053, "logps/chosen": -0.9839357137680054, "logps/rejected": -1.1006211042404175, "loss": 1.4217, "nll_loss": 1.5614440441131592, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09839358180761337, "rewards/margins": 0.011668531224131584, "rewards/rejected": -0.11006210744380951, "step": 1630 }, { "epoch": 0.4598832712186203, "grad_norm": 0.7578125, "learning_rate": 3.27394333587953e-06, "log_odds_chosen": 0.3297358751296997, "log_odds_ratio": -0.6110000014305115, "logits/chosen": 0.1353745013475418, "logits/rejected": -0.1594073474407196, "logps/chosen": -0.9906272888183594, "logps/rejected": -1.278929352760315, "loss": 1.3627, "nll_loss": 1.3165563344955444, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0990627333521843, "rewards/margins": 0.028830209746956825, "rewards/rejected": -0.12789292633533478, "step": 1635 }, { "epoch": 0.4612896420786161, "grad_norm": 1.046875, "learning_rate": 3.2622614220444105e-06, "log_odds_chosen": 0.19407084584236145, "log_odds_ratio": -0.7150810956954956, "logits/chosen": -0.019503358751535416, "logits/rejected": -0.02048128843307495, "logps/chosen": -0.9893431663513184, "logps/rejected": -1.1059573888778687, "loss": 1.2613, "nll_loss": 1.1831978559494019, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09893431514501572, "rewards/margins": 0.011661411263048649, "rewards/rejected": -0.11059572547674179, "step": 1640 }, { "epoch": 0.4626960129386119, "grad_norm": 0.7890625, "learning_rate": 3.2505611295281934e-06, "log_odds_chosen": 0.2686554193496704, "log_odds_ratio": -0.6652511358261108, "logits/chosen": -0.005538326688110828, "logits/rejected": -0.2434784471988678, "logps/chosen": -0.9460929036140442, "logps/rejected": -1.1476470232009888, "loss": 1.3183, "nll_loss": 1.2766331434249878, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09460929781198502, "rewards/margins": 0.0201554112136364, "rewards/rejected": -0.11476470530033112, "step": 1645 }, { "epoch": 0.4641023837986077, "grad_norm": 1.609375, "learning_rate": 3.2388427404335016e-06, "log_odds_chosen": 0.13716399669647217, "log_odds_ratio": -0.7483528256416321, "logits/chosen": -0.04212506115436554, "logits/rejected": 0.11257772147655487, "logps/chosen": -0.9056754112243652, "logps/rejected": -0.9881712198257446, "loss": 1.4144, "nll_loss": 1.4437960386276245, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09056752175092697, "rewards/margins": 0.008249588310718536, "rewards/rejected": -0.0988171249628067, "step": 1650 }, { "epoch": 0.4655087546586035, "grad_norm": 1.1875, "learning_rate": 3.2271065372992765e-06, "log_odds_chosen": 0.7073981165885925, "log_odds_ratio": -0.5036030411720276, "logits/chosen": 0.25857871770858765, "logits/rejected": 0.1317531317472458, "logps/chosen": -0.7869008779525757, "logps/rejected": -1.2054097652435303, "loss": 1.243, "nll_loss": 1.020970106124878, "rewards/accuracies": 0.75, "rewards/chosen": -0.07869009673595428, "rewards/margins": 0.04185087978839874, "rewards/rejected": -0.12054097652435303, "step": 1655 }, { "epoch": 0.46691512551859926, "grad_norm": 0.341796875, "learning_rate": 3.2153528030939696e-06, "log_odds_chosen": 0.3224171996116638, "log_odds_ratio": -0.6465792059898376, "logits/chosen": 0.19630217552185059, "logits/rejected": -0.009671496227383614, "logps/chosen": -0.8761960864067078, "logps/rejected": -1.0852859020233154, "loss": 1.2579, "nll_loss": 1.1838932037353516, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08761961758136749, "rewards/margins": 0.02090897597372532, "rewards/rejected": -0.10852859169244766, "step": 1660 }, { "epoch": 0.46832149637859505, "grad_norm": 0.6328125, "learning_rate": 3.2035818212087218e-06, "log_odds_chosen": 0.029603172093629837, "log_odds_ratio": -0.7813437581062317, "logits/chosen": 0.3448614478111267, "logits/rejected": -0.03233183175325394, "logps/chosen": -0.8988531827926636, "logps/rejected": -0.8917310833930969, "loss": 1.3023, "nll_loss": 1.211477518081665, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08988531678915024, "rewards/margins": -0.0007122076931409538, "rewards/rejected": -0.08917311578989029, "step": 1665 }, { "epoch": 0.46972786723859083, "grad_norm": 0.81640625, "learning_rate": 3.191793875450524e-06, "log_odds_chosen": 0.14861974120140076, "log_odds_ratio": -0.7764616012573242, "logits/chosen": 0.09097392857074738, "logits/rejected": 0.15764659643173218, "logps/chosen": -1.060884714126587, "logps/rejected": -1.1452229022979736, "loss": 1.3467, "nll_loss": 1.055648684501648, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10608847439289093, "rewards/margins": 0.008433830924332142, "rewards/rejected": -0.1145222932100296, "step": 1670 }, { "epoch": 0.4711342380985866, "grad_norm": 0.515625, "learning_rate": 3.1799892500353825e-06, "log_odds_chosen": -0.0818730816245079, "log_odds_ratio": -0.8223946690559387, "logits/chosen": 0.04592124745249748, "logits/rejected": 0.04530264809727669, "logps/chosen": -0.8977168202400208, "logps/rejected": -0.8727294206619263, "loss": 1.2926, "nll_loss": 1.2204254865646362, "rewards/accuracies": 0.5, "rewards/chosen": -0.08977167308330536, "rewards/margins": -0.0024987380020320415, "rewards/rejected": -0.08727294206619263, "step": 1675 }, { "epoch": 0.47254060895858235, "grad_norm": 0.5, "learning_rate": 3.168168229581461e-06, "log_odds_chosen": 0.3223528265953064, "log_odds_ratio": -0.6745055317878723, "logits/chosen": 0.09610681235790253, "logits/rejected": -0.14661376178264618, "logps/chosen": -0.868693470954895, "logps/rejected": -1.0475997924804688, "loss": 1.2964, "nll_loss": 1.3960098028182983, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08686934411525726, "rewards/margins": 0.017890626564621925, "rewards/rejected": -0.10475997626781464, "step": 1680 }, { "epoch": 0.47394697981857814, "grad_norm": 0.5546875, "learning_rate": 3.1563310991022183e-06, "log_odds_chosen": 0.42393559217453003, "log_odds_ratio": -0.5872426629066467, "logits/chosen": 0.14853203296661377, "logits/rejected": -0.09115082025527954, "logps/chosen": -0.8752552270889282, "logps/rejected": -1.1131998300552368, "loss": 1.2221, "nll_loss": 1.189212679862976, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08752551674842834, "rewards/margins": 0.023794464766979218, "rewards/rejected": -0.11131997406482697, "step": 1685 }, { "epoch": 0.4753533506785739, "grad_norm": 0.4765625, "learning_rate": 3.144478143999539e-06, "log_odds_chosen": -0.04508579522371292, "log_odds_ratio": -0.7579798102378845, "logits/chosen": 0.27330082654953003, "logits/rejected": -0.18080435693264008, "logps/chosen": -1.079253911972046, "logps/rejected": -1.0695364475250244, "loss": 1.3606, "nll_loss": 1.4030883312225342, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.1079254001379013, "rewards/margins": -0.0009717432549223304, "rewards/rejected": -0.10695364326238632, "step": 1690 }, { "epoch": 0.4767597215385697, "grad_norm": 0.498046875, "learning_rate": 3.1326096500568502e-06, "log_odds_chosen": -0.2987438440322876, "log_odds_ratio": -0.9051570892333984, "logits/chosen": 0.13019177317619324, "logits/rejected": -0.057271480560302734, "logps/chosen": -1.1571803092956543, "logps/rejected": -0.9558758735656738, "loss": 1.331, "nll_loss": 1.4521785974502563, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.1157180443406105, "rewards/margins": -0.020130449905991554, "rewards/rejected": -0.0955875962972641, "step": 1695 }, { "epoch": 0.4781660923985655, "grad_norm": 0.71875, "learning_rate": 3.1207259034322325e-06, "log_odds_chosen": 0.10301417112350464, "log_odds_ratio": -0.7123724222183228, "logits/chosen": 0.12717892229557037, "logits/rejected": 0.19477501511573792, "logps/chosen": -0.9269098043441772, "logps/rejected": -0.9850351214408875, "loss": 1.2283, "nll_loss": 1.0985163450241089, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09269097447395325, "rewards/margins": 0.005812531802803278, "rewards/rejected": -0.09850350767374039, "step": 1700 }, { "epoch": 0.4795724632585613, "grad_norm": 0.89453125, "learning_rate": 3.1088271906515203e-06, "log_odds_chosen": 0.022561922669410706, "log_odds_ratio": -0.7353700399398804, "logits/chosen": 0.40177297592163086, "logits/rejected": 0.4013861119747162, "logps/chosen": -0.8993846774101257, "logps/rejected": -0.8993097543716431, "loss": 1.3072, "nll_loss": 1.1129878759384155, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0899384617805481, "rewards/margins": -7.492490112781525e-06, "rewards/rejected": -0.08993097394704819, "step": 1705 }, { "epoch": 0.4809788341185571, "grad_norm": 0.95703125, "learning_rate": 3.096913798601392e-06, "log_odds_chosen": -0.01520991325378418, "log_odds_ratio": -0.7819222211837769, "logits/chosen": 0.08972098678350449, "logits/rejected": 0.14325182139873505, "logps/chosen": -1.2126904726028442, "logps/rejected": -1.2241413593292236, "loss": 1.3372, "nll_loss": 1.2252757549285889, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.12126903235912323, "rewards/margins": 0.001145093934610486, "rewards/rejected": -0.12241413444280624, "step": 1710 }, { "epoch": 0.48238520497855286, "grad_norm": 0.4296875, "learning_rate": 3.0849860145224537e-06, "log_odds_chosen": -0.0876084640622139, "log_odds_ratio": -0.8532189130783081, "logits/chosen": 0.057985819876194, "logits/rejected": 0.012759095057845116, "logps/chosen": -0.9567869305610657, "logps/rejected": -0.895396888256073, "loss": 1.2678, "nll_loss": 1.1957978010177612, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09567869454622269, "rewards/margins": -0.006138999946415424, "rewards/rejected": -0.08953969180583954, "step": 1715 }, { "epoch": 0.48379157583854865, "grad_norm": 0.546875, "learning_rate": 3.0730441260023148e-06, "log_odds_chosen": 0.5008511543273926, "log_odds_ratio": -0.6241706609725952, "logits/chosen": 0.2945128381252289, "logits/rejected": -0.06201595067977905, "logps/chosen": -0.7394557595252991, "logps/rejected": -0.9900891184806824, "loss": 1.3866, "nll_loss": 1.2984784841537476, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07394556701183319, "rewards/margins": 0.02506333589553833, "rewards/rejected": -0.09900891780853271, "step": 1720 }, { "epoch": 0.4851979466985444, "grad_norm": 0.37890625, "learning_rate": 3.061088420968652e-06, "log_odds_chosen": 0.37631872296333313, "log_odds_ratio": -0.6500317454338074, "logits/chosen": 0.17834916710853577, "logits/rejected": -0.08376047015190125, "logps/chosen": -0.8821493983268738, "logps/rejected": -1.156684160232544, "loss": 1.2124, "nll_loss": 1.1348316669464111, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0882149413228035, "rewards/margins": 0.027453461661934853, "rewards/rejected": -0.1156684011220932, "step": 1725 }, { "epoch": 0.48660431755854017, "grad_norm": 0.58203125, "learning_rate": 3.04911918768227e-06, "log_odds_chosen": 0.01393374614417553, "log_odds_ratio": -0.7638979554176331, "logits/chosen": 0.21452713012695312, "logits/rejected": -0.010724795050919056, "logps/chosen": -0.9365432858467102, "logps/rejected": -1.0033760070800781, "loss": 1.2342, "nll_loss": 1.1810895204544067, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0936543270945549, "rewards/margins": 0.00668326998129487, "rewards/rejected": -0.10033760219812393, "step": 1730 }, { "epoch": 0.48801068841853595, "grad_norm": 0.6875, "learning_rate": 3.037136714730148e-06, "log_odds_chosen": 0.06743361800909042, "log_odds_ratio": -0.718464195728302, "logits/chosen": 0.08722179383039474, "logits/rejected": 0.22729694843292236, "logps/chosen": -0.9022086262702942, "logps/rejected": -0.9189395904541016, "loss": 1.2611, "nll_loss": 1.176918387413025, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0902208760380745, "rewards/margins": 0.0016730849165469408, "rewards/rejected": -0.09189395606517792, "step": 1735 }, { "epoch": 0.48941705927853174, "grad_norm": 0.416015625, "learning_rate": 3.025141291018484e-06, "log_odds_chosen": 0.46886926889419556, "log_odds_ratio": -0.6271840929985046, "logits/chosen": 0.31561678647994995, "logits/rejected": 0.21411773562431335, "logps/chosen": -0.8544023633003235, "logps/rejected": -1.1380481719970703, "loss": 1.2311, "nll_loss": 1.1394670009613037, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08544023334980011, "rewards/margins": 0.02836458943784237, "rewards/rejected": -0.11380481719970703, "step": 1740 }, { "epoch": 0.49082343013852753, "grad_norm": 0.5625, "learning_rate": 3.0131332057657263e-06, "log_odds_chosen": 0.1628924459218979, "log_odds_ratio": -0.7746323943138123, "logits/chosen": 0.04965885728597641, "logits/rejected": 0.08840426057577133, "logps/chosen": -0.8988859057426453, "logps/rejected": -0.8661472201347351, "loss": 1.3893, "nll_loss": 1.2756502628326416, "rewards/accuracies": 0.5, "rewards/chosen": -0.08988858014345169, "rewards/margins": -0.0032738607842475176, "rewards/rejected": -0.08661472052335739, "step": 1745 }, { "epoch": 0.4922298009985233, "grad_norm": 0.6328125, "learning_rate": 3.0011127484956066e-06, "log_odds_chosen": 0.43131130933761597, "log_odds_ratio": -0.5675671100616455, "logits/chosen": 0.06633243709802628, "logits/rejected": 0.10099319368600845, "logps/chosen": -0.8430767059326172, "logps/rejected": -1.0605162382125854, "loss": 1.3615, "nll_loss": 1.4225993156433105, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08430766314268112, "rewards/margins": 0.021743962541222572, "rewards/rejected": -0.10605162382125854, "step": 1750 }, { "epoch": 0.4936361718585191, "grad_norm": 1.046875, "learning_rate": 2.989080209030152e-06, "log_odds_chosen": 0.3503009080886841, "log_odds_ratio": -0.6180037260055542, "logits/chosen": 0.2614450752735138, "logits/rejected": 0.14672674238681793, "logps/chosen": -0.8308509588241577, "logps/rejected": -1.072124719619751, "loss": 1.1843, "nll_loss": 1.1179145574569702, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08308509737253189, "rewards/margins": 0.02412736788392067, "rewards/rejected": -0.10721246898174286, "step": 1755 }, { "epoch": 0.4950425427185149, "grad_norm": 0.875, "learning_rate": 2.977035877482698e-06, "log_odds_chosen": 0.11333318799734116, "log_odds_ratio": -0.7067681550979614, "logits/chosen": 0.08780858665704727, "logits/rejected": 0.16933095455169678, "logps/chosen": -1.0028111934661865, "logps/rejected": -1.0639703273773193, "loss": 1.2054, "nll_loss": 1.2186434268951416, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10028110444545746, "rewards/margins": 0.006115921773016453, "rewards/rejected": -0.10639703273773193, "step": 1760 }, { "epoch": 0.4964489135785107, "grad_norm": 0.45703125, "learning_rate": 2.9649800442509013e-06, "log_odds_chosen": 0.19351093471050262, "log_odds_ratio": -0.6989894509315491, "logits/chosen": -0.018921542912721634, "logits/rejected": 0.05963977426290512, "logps/chosen": -0.9325786828994751, "logps/rejected": -1.1182286739349365, "loss": 1.2193, "nll_loss": 1.1260807514190674, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0932578593492508, "rewards/margins": 0.018564995378255844, "rewards/rejected": -0.11182286590337753, "step": 1765 }, { "epoch": 0.4978552844385064, "grad_norm": 0.83984375, "learning_rate": 2.952913000009729e-06, "log_odds_chosen": 0.38330164551734924, "log_odds_ratio": -0.5583044290542603, "logits/chosen": 0.20868602395057678, "logits/rejected": -0.043395109474658966, "logps/chosen": -0.8025444746017456, "logps/rejected": -1.0090991258621216, "loss": 1.3287, "nll_loss": 1.2745373249053955, "rewards/accuracies": 0.75, "rewards/chosen": -0.0802544504404068, "rewards/margins": 0.020655466243624687, "rewards/rejected": -0.10090991109609604, "step": 1770 }, { "epoch": 0.4992616552985022, "grad_norm": 1.5625, "learning_rate": 2.9408350357044527e-06, "log_odds_chosen": 0.3873779773712158, "log_odds_ratio": -0.6013652086257935, "logits/chosen": -0.011114698834717274, "logits/rejected": 0.006222672760486603, "logps/chosen": -0.8589563369750977, "logps/rejected": -1.0658544301986694, "loss": 1.2814, "nll_loss": 1.2002887725830078, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08589563518762589, "rewards/margins": 0.020689817145466805, "rewards/rejected": -0.10658544301986694, "step": 1775 }, { "epoch": 0.500668026158498, "grad_norm": 0.69140625, "learning_rate": 2.9287464425436386e-06, "log_odds_chosen": 0.22454440593719482, "log_odds_ratio": -0.6838093996047974, "logits/chosen": -0.025767764076590538, "logits/rejected": 0.0712432935833931, "logps/chosen": -1.0069491863250732, "logps/rejected": -1.1783020496368408, "loss": 1.2376, "nll_loss": 1.1458652019500732, "rewards/accuracies": 0.5, "rewards/chosen": -0.10069490969181061, "rewards/margins": 0.01713528484106064, "rewards/rejected": -0.11783019453287125, "step": 1780 }, { "epoch": 0.5020743970184938, "grad_norm": 0.94140625, "learning_rate": 2.9166475119921206e-06, "log_odds_chosen": 0.09982554614543915, "log_odds_ratio": -0.7170512676239014, "logits/chosen": 0.27315324544906616, "logits/rejected": 0.05909186601638794, "logps/chosen": -1.0025029182434082, "logps/rejected": -1.0902785062789917, "loss": 1.2826, "nll_loss": 1.3132749795913696, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10025028884410858, "rewards/margins": 0.008777563460171223, "rewards/rejected": -0.10902786254882812, "step": 1785 }, { "epoch": 0.5034807678784896, "grad_norm": 0.5546875, "learning_rate": 2.904538535763973e-06, "log_odds_chosen": 0.6338373422622681, "log_odds_ratio": -0.48895391821861267, "logits/chosen": 0.29887187480926514, "logits/rejected": 0.14295390248298645, "logps/chosen": -0.7918098568916321, "logps/rejected": -1.1807332038879395, "loss": 1.1814, "nll_loss": 1.0374513864517212, "rewards/accuracies": 0.75, "rewards/chosen": -0.07918097823858261, "rewards/margins": 0.03889235109090805, "rewards/rejected": -0.11807332932949066, "step": 1790 }, { "epoch": 0.5048871387384853, "grad_norm": 0.5625, "learning_rate": 2.8924198058154807e-06, "log_odds_chosen": 0.3318602740764618, "log_odds_ratio": -0.6486313343048096, "logits/chosen": 0.18483847379684448, "logits/rejected": 0.06718714535236359, "logps/chosen": -0.9118415713310242, "logps/rejected": -1.1547482013702393, "loss": 1.2743, "nll_loss": 1.1991218328475952, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09118416905403137, "rewards/margins": 0.024290654808282852, "rewards/rejected": -0.11547482013702393, "step": 1795 }, { "epoch": 0.5062935095984811, "grad_norm": 0.9375, "learning_rate": 2.8802916143380983e-06, "log_odds_chosen": 0.14648711681365967, "log_odds_ratio": -0.667282223701477, "logits/chosen": 0.21301324665546417, "logits/rejected": 0.15577247738838196, "logps/chosen": -0.8973161578178406, "logps/rejected": -0.9705324172973633, "loss": 1.2663, "nll_loss": 1.1156691312789917, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0897316187620163, "rewards/margins": 0.007321618497371674, "rewards/rejected": -0.09705324470996857, "step": 1800 }, { "epoch": 0.5076998804584769, "grad_norm": 0.302734375, "learning_rate": 2.8681542537514024e-06, "log_odds_chosen": 0.34126919507980347, "log_odds_ratio": -0.5945046544075012, "logits/chosen": 0.30521661043167114, "logits/rejected": -0.009005474857985973, "logps/chosen": -0.899261474609375, "logps/rejected": -1.116114616394043, "loss": 1.3684, "nll_loss": 1.3031537532806396, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08992613852024078, "rewards/margins": 0.021685311570763588, "rewards/rejected": -0.11161146312952042, "step": 1805 }, { "epoch": 0.5091062513184726, "grad_norm": 0.54296875, "learning_rate": 2.8560080166960465e-06, "log_odds_chosen": 0.05003465339541435, "log_odds_ratio": -0.8127752542495728, "logits/chosen": 0.15410101413726807, "logits/rejected": -0.05961986631155014, "logps/chosen": -1.2021180391311646, "logps/rejected": -1.244368314743042, "loss": 1.2782, "nll_loss": 1.2843389511108398, "rewards/accuracies": 0.5, "rewards/chosen": -0.12021180242300034, "rewards/margins": 0.004225029144436121, "rewards/rejected": -0.12443683296442032, "step": 1810 }, { "epoch": 0.5105126221784685, "grad_norm": 0.5, "learning_rate": 2.8438531960267e-06, "log_odds_chosen": 0.14014047384262085, "log_odds_ratio": -0.746768593788147, "logits/chosen": 0.007824910804629326, "logits/rejected": 0.04268079251050949, "logps/chosen": -0.9318583607673645, "logps/rejected": -1.0280802249908447, "loss": 1.2935, "nll_loss": 1.2723755836486816, "rewards/accuracies": 0.5, "rewards/chosen": -0.09318584203720093, "rewards/margins": 0.009622195735573769, "rewards/rejected": -0.10280803591012955, "step": 1815 }, { "epoch": 0.5119189930384642, "grad_norm": 0.6953125, "learning_rate": 2.8316900848049896e-06, "log_odds_chosen": 0.46041789650917053, "log_odds_ratio": -0.545207142829895, "logits/chosen": -0.025069892406463623, "logits/rejected": 0.0031093836296349764, "logps/chosen": -0.9471213221549988, "logps/rejected": -1.2298479080200195, "loss": 1.2804, "nll_loss": 0.9852622747421265, "rewards/accuracies": 0.75, "rewards/chosen": -0.09471213817596436, "rewards/margins": 0.02827264927327633, "rewards/rejected": -0.12298478931188583, "step": 1820 }, { "epoch": 0.5133253638984601, "grad_norm": 0.462890625, "learning_rate": 2.8195189762924357e-06, "log_odds_chosen": 0.38171496987342834, "log_odds_ratio": -0.5993026494979858, "logits/chosen": 0.13395783305168152, "logits/rejected": -0.014965623617172241, "logps/chosen": -0.8572479486465454, "logps/rejected": -1.1530402898788452, "loss": 1.3398, "nll_loss": 1.2653952836990356, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08572478592395782, "rewards/margins": 0.02957923151552677, "rewards/rejected": -0.11530401557683945, "step": 1825 }, { "epoch": 0.5147317347584558, "grad_norm": 0.5546875, "learning_rate": 2.807340163943377e-06, "log_odds_chosen": 0.9017633199691772, "log_odds_ratio": -0.528324544429779, "logits/chosen": 0.2883888781070709, "logits/rejected": 0.047523729503154755, "logps/chosen": -0.8029147386550903, "logps/rejected": -1.508462905883789, "loss": 1.3291, "nll_loss": 1.1781851053237915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08029146492481232, "rewards/margins": 0.07055483758449554, "rewards/rejected": -0.15084628760814667, "step": 1830 }, { "epoch": 0.5161381056184516, "grad_norm": 0.443359375, "learning_rate": 2.7951539413978967e-06, "log_odds_chosen": -0.15000049769878387, "log_odds_ratio": -0.9076086282730103, "logits/chosen": 0.026946574449539185, "logits/rejected": 0.008309757336974144, "logps/chosen": -1.0551016330718994, "logps/rejected": -0.962007999420166, "loss": 1.3199, "nll_loss": 1.2718522548675537, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.1055101752281189, "rewards/margins": -0.009309363551437855, "rewards/rejected": -0.09620080888271332, "step": 1835 }, { "epoch": 0.5175444764784474, "grad_norm": 0.6484375, "learning_rate": 2.7829606024747458e-06, "log_odds_chosen": 0.3771592974662781, "log_odds_ratio": -0.6184414625167847, "logits/chosen": 0.08473803848028183, "logits/rejected": -0.06116770580410957, "logps/chosen": -0.8525704145431519, "logps/rejected": -1.120895504951477, "loss": 1.2459, "nll_loss": 1.1650941371917725, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08525704592466354, "rewards/margins": 0.026832515373826027, "rewards/rejected": -0.11208955198526382, "step": 1840 }, { "epoch": 0.5189508473384431, "grad_norm": 0.46484375, "learning_rate": 2.7707604411642547e-06, "log_odds_chosen": 0.26341018080711365, "log_odds_ratio": -0.6081176996231079, "logits/chosen": 0.264903724193573, "logits/rejected": -0.026007074862718582, "logps/chosen": -0.9291330575942993, "logps/rejected": -1.0887218713760376, "loss": 1.3753, "nll_loss": 1.2891864776611328, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09291330724954605, "rewards/margins": 0.015958871692419052, "rewards/rejected": -0.1088721975684166, "step": 1845 }, { "epoch": 0.5203572181984389, "grad_norm": 0.408203125, "learning_rate": 2.7585537516212468e-06, "log_odds_chosen": 0.26861482858657837, "log_odds_ratio": -0.7222877740859985, "logits/chosen": -0.026812052354216576, "logits/rejected": -0.068088099360466, "logps/chosen": -1.0210070610046387, "logps/rejected": -1.2642821073532104, "loss": 1.3171, "nll_loss": 1.4248225688934326, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.10210070759057999, "rewards/margins": 0.02432749792933464, "rewards/rejected": -0.12642820179462433, "step": 1850 }, { "epoch": 0.5217635890584347, "grad_norm": 0.625, "learning_rate": 2.7463408281579455e-06, "log_odds_chosen": 0.18715333938598633, "log_odds_ratio": -0.7116204500198364, "logits/chosen": 0.18907713890075684, "logits/rejected": -0.004186171106994152, "logps/chosen": -0.8570513725280762, "logps/rejected": -0.9989348649978638, "loss": 1.3202, "nll_loss": 1.1158322095870972, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08570513874292374, "rewards/margins": 0.014188344590365887, "rewards/rejected": -0.0998934805393219, "step": 1855 }, { "epoch": 0.5231699599184305, "grad_norm": 0.322265625, "learning_rate": 2.73412196523688e-06, "log_odds_chosen": 0.3805992603302002, "log_odds_ratio": -0.5736201405525208, "logits/chosen": 0.1680772304534912, "logits/rejected": 0.03141466900706291, "logps/chosen": -0.8502508997917175, "logps/rejected": -1.1017472743988037, "loss": 1.287, "nll_loss": 1.1758310794830322, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08502508699893951, "rewards/margins": 0.025149637833237648, "rewards/rejected": -0.11017473042011261, "step": 1860 }, { "epoch": 0.5245763307784262, "grad_norm": 1.1953125, "learning_rate": 2.7218974574637837e-06, "log_odds_chosen": 0.6645687222480774, "log_odds_ratio": -0.6347068548202515, "logits/chosen": 0.04413944110274315, "logits/rejected": 0.04097120463848114, "logps/chosen": -0.911517322063446, "logps/rejected": -1.2816895246505737, "loss": 1.3929, "nll_loss": 1.3630738258361816, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09115172922611237, "rewards/margins": 0.03701721876859665, "rewards/rejected": -0.12816892564296722, "step": 1865 }, { "epoch": 0.5259827016384221, "grad_norm": 0.38671875, "learning_rate": 2.70966759958049e-06, "log_odds_chosen": 0.28335127234458923, "log_odds_ratio": -0.6399809122085571, "logits/chosen": 0.17220312356948853, "logits/rejected": 0.18077576160430908, "logps/chosen": -0.9338391423225403, "logps/rejected": -1.128045916557312, "loss": 1.231, "nll_loss": 1.2164467573165894, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09338392317295074, "rewards/margins": 0.0194206852465868, "rewards/rejected": -0.11280461400747299, "step": 1870 }, { "epoch": 0.5273890724984178, "grad_norm": 0.5390625, "learning_rate": 2.697432686457828e-06, "log_odds_chosen": 0.4342438280582428, "log_odds_ratio": -0.6491691470146179, "logits/chosen": 0.17093636095523834, "logits/rejected": 0.05285681039094925, "logps/chosen": -0.8747994303703308, "logps/rejected": -1.1325397491455078, "loss": 1.3414, "nll_loss": 1.2444498538970947, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08747994154691696, "rewards/margins": 0.025774037465453148, "rewards/rejected": -0.11325398832559586, "step": 1875 }, { "epoch": 0.5287954433584137, "grad_norm": 0.62109375, "learning_rate": 2.685193013088515e-06, "log_odds_chosen": 0.42729368805885315, "log_odds_ratio": -0.6372612714767456, "logits/chosen": 0.1625080108642578, "logits/rejected": 0.06369408220052719, "logps/chosen": -0.9214962720870972, "logps/rejected": -1.2057900428771973, "loss": 1.3388, "nll_loss": 1.317048192024231, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09214963763952255, "rewards/margins": 0.02842937409877777, "rewards/rejected": -0.12057900428771973, "step": 1880 }, { "epoch": 0.5302018142184094, "grad_norm": 1.6953125, "learning_rate": 2.6729488745800375e-06, "log_odds_chosen": 0.21069876849651337, "log_odds_ratio": -0.6181350946426392, "logits/chosen": 0.09169472008943558, "logits/rejected": -0.15534797310829163, "logps/chosen": -1.0708431005477905, "logps/rejected": -1.2090436220169067, "loss": 1.2751, "nll_loss": 1.3398125171661377, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10708429664373398, "rewards/margins": 0.0138200419023633, "rewards/rejected": -0.1209043487906456, "step": 1885 }, { "epoch": 0.5316081850784051, "grad_norm": 0.703125, "learning_rate": 2.6607005661475412e-06, "log_odds_chosen": -0.015950357541441917, "log_odds_ratio": -0.9686321020126343, "logits/chosen": 0.17628921568393707, "logits/rejected": 0.04031284898519516, "logps/chosen": -1.1431938409805298, "logps/rejected": -1.177972435951233, "loss": 1.3656, "nll_loss": 1.365552306175232, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11431938409805298, "rewards/margins": 0.0034778628032654524, "rewards/rejected": -0.11779724061489105, "step": 1890 }, { "epoch": 0.533014555938401, "grad_norm": 0.380859375, "learning_rate": 2.6484483831067132e-06, "log_odds_chosen": 0.6062152981758118, "log_odds_ratio": -0.5288558602333069, "logits/chosen": 0.04212791472673416, "logits/rejected": -0.21651284396648407, "logps/chosen": -0.9404972791671753, "logps/rejected": -1.3659436702728271, "loss": 1.3344, "nll_loss": 1.4181785583496094, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09404972940683365, "rewards/margins": 0.0425446555018425, "rewards/rejected": -0.13659438490867615, "step": 1895 }, { "epoch": 0.5344209267983967, "grad_norm": 0.498046875, "learning_rate": 2.6361926208666585e-06, "log_odds_chosen": 0.2467803657054901, "log_odds_ratio": -0.6717809438705444, "logits/chosen": 0.306289941072464, "logits/rejected": -0.03955193608999252, "logps/chosen": -0.9068489074707031, "logps/rejected": -1.087083101272583, "loss": 1.2124, "nll_loss": 1.149648904800415, "rewards/accuracies": 0.5, "rewards/chosen": -0.09068489074707031, "rewards/margins": 0.01802341639995575, "rewards/rejected": -0.10870830714702606, "step": 1900 }, { "epoch": 0.5358272976583925, "grad_norm": 0.357421875, "learning_rate": 2.623933574922779e-06, "log_odds_chosen": 0.3376588523387909, "log_odds_ratio": -0.6184042096138, "logits/chosen": 0.07933865487575531, "logits/rejected": 0.1608869433403015, "logps/chosen": -0.850027859210968, "logps/rejected": -1.0102084875106812, "loss": 1.2721, "nll_loss": 1.1246836185455322, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08500279486179352, "rewards/margins": 0.01601807400584221, "rewards/rejected": -0.10102085769176483, "step": 1905 }, { "epoch": 0.5372336685183883, "grad_norm": 0.490234375, "learning_rate": 2.611671540849651e-06, "log_odds_chosen": 0.28319352865219116, "log_odds_ratio": -0.6058458685874939, "logits/chosen": 0.3010689616203308, "logits/rejected": 0.015681147575378418, "logps/chosen": -0.9729520082473755, "logps/rejected": -1.1751806735992432, "loss": 1.3129, "nll_loss": 1.2195885181427002, "rewards/accuracies": 0.75, "rewards/chosen": -0.09729520231485367, "rewards/margins": 0.0202228631824255, "rewards/rejected": -0.11751806735992432, "step": 1910 }, { "epoch": 0.5386400393783841, "grad_norm": 0.4140625, "learning_rate": 2.599406814293895e-06, "log_odds_chosen": 0.31516438722610474, "log_odds_ratio": -0.6291471719741821, "logits/chosen": 0.12986072897911072, "logits/rejected": 0.03120257332921028, "logps/chosen": -0.9961276054382324, "logps/rejected": -1.1284226179122925, "loss": 1.311, "nll_loss": 1.3952885866165161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.099612757563591, "rewards/margins": 0.01322950143367052, "rewards/rejected": -0.11284227669239044, "step": 1915 }, { "epoch": 0.5400464102383798, "grad_norm": 1.5390625, "learning_rate": 2.5871396909670494e-06, "log_odds_chosen": 0.723926842212677, "log_odds_ratio": -0.5813840627670288, "logits/chosen": 0.10541262477636337, "logits/rejected": 0.2073705941438675, "logps/chosen": -0.8834552764892578, "logps/rejected": -1.36771559715271, "loss": 1.345, "nll_loss": 1.0873137712478638, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08834554255008698, "rewards/margins": 0.048426032066345215, "rewards/rejected": -0.1367715746164322, "step": 1920 }, { "epoch": 0.5414527810983757, "grad_norm": 0.67578125, "learning_rate": 2.5748704666384417e-06, "log_odds_chosen": 0.41179290413856506, "log_odds_ratio": -0.6550354957580566, "logits/chosen": 0.15152336657047272, "logits/rejected": -0.05778312683105469, "logps/chosen": -0.9851999282836914, "logps/rejected": -1.2101080417633057, "loss": 1.3186, "nll_loss": 1.2828853130340576, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09852000325918198, "rewards/margins": 0.022490810602903366, "rewards/rejected": -0.12101080268621445, "step": 1925 }, { "epoch": 0.5428591519583714, "grad_norm": 0.3828125, "learning_rate": 2.562599437128055e-06, "log_odds_chosen": 0.377454936504364, "log_odds_ratio": -0.6358648538589478, "logits/chosen": 0.0949888676404953, "logits/rejected": -0.10224437713623047, "logps/chosen": -0.8374663591384888, "logps/rejected": -1.0614335536956787, "loss": 1.3362, "nll_loss": 1.259254813194275, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08374662697315216, "rewards/margins": 0.02239672839641571, "rewards/rejected": -0.10614337027072906, "step": 1930 }, { "epoch": 0.5442655228183672, "grad_norm": 0.5625, "learning_rate": 2.550326898299397e-06, "log_odds_chosen": 0.3798829913139343, "log_odds_ratio": -0.6570533514022827, "logits/chosen": 0.20587129890918732, "logits/rejected": -0.19827596843242645, "logps/chosen": -0.8702229261398315, "logps/rejected": -1.0879228115081787, "loss": 1.2534, "nll_loss": 1.336097240447998, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08702228963375092, "rewards/margins": 0.021769985556602478, "rewards/rejected": -0.1087922677397728, "step": 1935 }, { "epoch": 0.545671893678363, "grad_norm": 0.50390625, "learning_rate": 2.538053146052366e-06, "log_odds_chosen": 0.42701345682144165, "log_odds_ratio": -0.597525417804718, "logits/chosen": 0.3210323452949524, "logits/rejected": 0.06907093524932861, "logps/chosen": -0.782520592212677, "logps/rejected": -1.081879734992981, "loss": 1.2815, "nll_loss": 1.3015620708465576, "rewards/accuracies": 0.75, "rewards/chosen": -0.07825206220149994, "rewards/margins": 0.029935915023088455, "rewards/rejected": -0.1081879585981369, "step": 1940 }, { "epoch": 0.5470782645383587, "grad_norm": 0.4453125, "learning_rate": 2.5257784763161177e-06, "log_odds_chosen": -0.26187849044799805, "log_odds_ratio": -0.9886520504951477, "logits/chosen": -0.0548955500125885, "logits/rejected": -0.041044097393751144, "logps/chosen": -1.1638720035552979, "logps/rejected": -1.041886329650879, "loss": 1.4207, "nll_loss": 1.4691832065582275, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.11638720333576202, "rewards/margins": -0.012198579497635365, "rewards/rejected": -0.10418863594532013, "step": 1945 }, { "epoch": 0.5484846353983546, "grad_norm": 1.21875, "learning_rate": 2.5135031850419266e-06, "log_odds_chosen": 0.38276079297065735, "log_odds_ratio": -0.5948246717453003, "logits/chosen": -0.045088671147823334, "logits/rejected": -0.12301528453826904, "logps/chosen": -0.9737148284912109, "logps/rejected": -1.1692750453948975, "loss": 1.307, "nll_loss": 1.3369234800338745, "rewards/accuracies": 0.75, "rewards/chosen": -0.09737147390842438, "rewards/margins": 0.019556032493710518, "rewards/rejected": -0.11692751944065094, "step": 1950 }, { "epoch": 0.5498910062583503, "grad_norm": 0.921875, "learning_rate": 2.5012275681960563e-06, "log_odds_chosen": 0.1337728202342987, "log_odds_ratio": -0.7448712587356567, "logits/chosen": 0.07559056580066681, "logits/rejected": -0.15480338037014008, "logps/chosen": -0.8971524238586426, "logps/rejected": -1.0162702798843384, "loss": 1.259, "nll_loss": 1.3991035223007202, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08971523493528366, "rewards/margins": 0.011911772191524506, "rewards/rejected": -0.10162701457738876, "step": 1955 }, { "epoch": 0.5512973771183461, "grad_norm": 0.65234375, "learning_rate": 2.4889519217526178e-06, "log_odds_chosen": 0.22945475578308105, "log_odds_ratio": -0.680932343006134, "logits/chosen": 0.22960948944091797, "logits/rejected": 0.17516712844371796, "logps/chosen": -0.827686607837677, "logps/rejected": -0.9752469062805176, "loss": 1.3716, "nll_loss": 1.041610598564148, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08276865631341934, "rewards/margins": 0.014756026677787304, "rewards/rejected": -0.09752468764781952, "step": 1960 }, { "epoch": 0.5527037479783419, "grad_norm": 0.51953125, "learning_rate": 2.4766765416864358e-06, "log_odds_chosen": 0.4295072555541992, "log_odds_ratio": -0.5632562637329102, "logits/chosen": 0.3465437591075897, "logits/rejected": 0.19048206508159637, "logps/chosen": -0.7735083103179932, "logps/rejected": -0.9938994646072388, "loss": 1.2924, "nll_loss": 1.0761052370071411, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07735083997249603, "rewards/margins": 0.02203909493982792, "rewards/rejected": -0.0993899330496788, "step": 1965 }, { "epoch": 0.5541101188383377, "grad_norm": 0.7421875, "learning_rate": 2.4644017239659145e-06, "log_odds_chosen": 0.20891109108924866, "log_odds_ratio": -0.6594338417053223, "logits/chosen": 0.2855902314186096, "logits/rejected": -0.0511033833026886, "logps/chosen": -0.9585043787956238, "logps/rejected": -1.067773699760437, "loss": 1.2839, "nll_loss": 1.1800204515457153, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09585044533014297, "rewards/margins": 0.010926928371191025, "rewards/rejected": -0.1067773699760437, "step": 1970 }, { "epoch": 0.5555164896983334, "grad_norm": 0.361328125, "learning_rate": 2.4521277645458968e-06, "log_odds_chosen": 0.209771990776062, "log_odds_ratio": -0.6410363912582397, "logits/chosen": 0.16523006558418274, "logits/rejected": 0.0723339170217514, "logps/chosen": -0.9708080291748047, "logps/rejected": -1.0834459066390991, "loss": 1.2706, "nll_loss": 1.1527389287948608, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09708081185817719, "rewards/margins": 0.011263787746429443, "rewards/rejected": -0.10834459215402603, "step": 1975 }, { "epoch": 0.5569228605583292, "grad_norm": 0.5234375, "learning_rate": 2.4398549593605336e-06, "log_odds_chosen": 0.30742546916007996, "log_odds_ratio": -0.7039249539375305, "logits/chosen": 0.04328065365552902, "logits/rejected": -0.12980946898460388, "logps/chosen": -0.9298011064529419, "logps/rejected": -1.132953405380249, "loss": 1.3455, "nll_loss": 1.2900826930999756, "rewards/accuracies": 0.5, "rewards/chosen": -0.09298010915517807, "rewards/margins": 0.020315242931246758, "rewards/rejected": -0.11329533904790878, "step": 1980 }, { "epoch": 0.558329231418325, "grad_norm": 0.578125, "learning_rate": 2.427583604316145e-06, "log_odds_chosen": 0.32450270652770996, "log_odds_ratio": -0.6246632933616638, "logits/chosen": 0.2687751054763794, "logits/rejected": 0.002015703823417425, "logps/chosen": -0.8845337629318237, "logps/rejected": -1.1134403944015503, "loss": 1.2423, "nll_loss": 1.1597281694412231, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08845336735248566, "rewards/margins": 0.022890685126185417, "rewards/rejected": -0.11134406179189682, "step": 1985 }, { "epoch": 0.5597356022783208, "grad_norm": 0.93359375, "learning_rate": 2.4153139952840873e-06, "log_odds_chosen": -0.2574736475944519, "log_odds_ratio": -0.9290412068367004, "logits/chosen": 0.17939214408397675, "logits/rejected": -0.03628557547926903, "logps/chosen": -1.1249353885650635, "logps/rejected": -0.925279438495636, "loss": 1.4328, "nll_loss": 1.4516746997833252, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.11249355226755142, "rewards/margins": -0.019965607672929764, "rewards/rejected": -0.09252794086933136, "step": 1990 }, { "epoch": 0.5611419731383166, "grad_norm": 0.98046875, "learning_rate": 2.4030464280936196e-06, "log_odds_chosen": -0.13905853033065796, "log_odds_ratio": -0.9099873304367065, "logits/chosen": 0.19021955132484436, "logits/rejected": 0.1553717404603958, "logps/chosen": -1.0612506866455078, "logps/rejected": -1.012810230255127, "loss": 1.3182, "nll_loss": 1.1444755792617798, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10612507164478302, "rewards/margins": -0.004844049923121929, "rewards/rejected": -0.10128102451562881, "step": 1995 }, { "epoch": 0.5625483439983123, "grad_norm": 1.4453125, "learning_rate": 2.390781198524771e-06, "log_odds_chosen": 0.12198109924793243, "log_odds_ratio": -0.8283858299255371, "logits/chosen": 0.011166423559188843, "logits/rejected": 0.008707046508789062, "logps/chosen": -1.1139929294586182, "logps/rejected": -1.1516040563583374, "loss": 1.3065, "nll_loss": 1.2652587890625, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11139927804470062, "rewards/margins": 0.003761128056794405, "rewards/rejected": -0.11516042053699493, "step": 2000 }, { "epoch": 0.5639547148583082, "grad_norm": 1.1484375, "learning_rate": 2.378518602301207e-06, "log_odds_chosen": 0.3775716722011566, "log_odds_ratio": -0.660047709941864, "logits/chosen": 0.018313337117433548, "logits/rejected": -0.12297528982162476, "logps/chosen": -0.871324360370636, "logps/rejected": -1.077490210533142, "loss": 1.3086, "nll_loss": 1.2721842527389526, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08713243901729584, "rewards/margins": 0.02061658538877964, "rewards/rejected": -0.10774902254343033, "step": 2005 }, { "epoch": 0.5653610857183039, "grad_norm": 0.359375, "learning_rate": 2.366258935083104e-06, "log_odds_chosen": 0.35200756788253784, "log_odds_ratio": -0.6445342898368835, "logits/chosen": 0.18469831347465515, "logits/rejected": 0.046655140817165375, "logps/chosen": -0.8012509346008301, "logps/rejected": -1.037642240524292, "loss": 1.2667, "nll_loss": 1.1248598098754883, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08012509346008301, "rewards/margins": 0.02363913133740425, "rewards/rejected": -0.10376423597335815, "step": 2010 }, { "epoch": 0.5667674565782997, "grad_norm": 0.53125, "learning_rate": 2.354002492460015e-06, "log_odds_chosen": 0.29196059703826904, "log_odds_ratio": -0.6961022615432739, "logits/chosen": 0.07946896553039551, "logits/rejected": 0.20001347362995148, "logps/chosen": -0.7962630987167358, "logps/rejected": -0.9276267886161804, "loss": 1.2736, "nll_loss": 1.0803678035736084, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07962630689144135, "rewards/margins": 0.013136359862983227, "rewards/rejected": -0.09276267141103745, "step": 2015 }, { "epoch": 0.5681738274382955, "grad_norm": 0.65234375, "learning_rate": 2.3417495699437494e-06, "log_odds_chosen": 0.0876246765255928, "log_odds_ratio": -0.8300532102584839, "logits/chosen": -0.09331229329109192, "logits/rejected": 0.013139751739799976, "logps/chosen": -1.0107604265213013, "logps/rejected": -1.016236662864685, "loss": 1.3041, "nll_loss": 1.3678447008132935, "rewards/accuracies": 0.75, "rewards/chosen": -0.10107602179050446, "rewards/margins": 0.0005476403748616576, "rewards/rejected": -0.10162366926670074, "step": 2020 }, { "epoch": 0.5695801982982912, "grad_norm": 0.64453125, "learning_rate": 2.32950046296124e-06, "log_odds_chosen": 0.22191683948040009, "log_odds_ratio": -0.6643384695053101, "logits/chosen": 0.31556597352027893, "logits/rejected": -0.18868893384933472, "logps/chosen": -0.8896587491035461, "logps/rejected": -1.0823167562484741, "loss": 1.2252, "nll_loss": 1.3100733757019043, "rewards/accuracies": 0.5, "rewards/chosen": -0.08896587044000626, "rewards/margins": 0.019265811890363693, "rewards/rejected": -0.10823168605566025, "step": 2025 }, { "epoch": 0.570986569158287, "grad_norm": 0.40625, "learning_rate": 2.317255466847428e-06, "log_odds_chosen": 0.20406213402748108, "log_odds_ratio": -0.6933099031448364, "logits/chosen": -0.07382262498140335, "logits/rejected": 0.03679852560162544, "logps/chosen": -0.8334574699401855, "logps/rejected": -0.9437686204910278, "loss": 1.2915, "nll_loss": 1.2893542051315308, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08334574103355408, "rewards/margins": 0.011031119152903557, "rewards/rejected": -0.09437686204910278, "step": 2030 }, { "epoch": 0.5723929400182828, "grad_norm": 0.765625, "learning_rate": 2.3050148768381346e-06, "log_odds_chosen": 0.1456795036792755, "log_odds_ratio": -0.7081176042556763, "logits/chosen": 0.11449067294597626, "logits/rejected": -0.00850601214915514, "logps/chosen": -1.0489826202392578, "logps/rejected": -1.1977183818817139, "loss": 1.3718, "nll_loss": 1.2525540590286255, "rewards/accuracies": 0.5, "rewards/chosen": -0.10489825904369354, "rewards/margins": 0.014873594045639038, "rewards/rejected": -0.11977185308933258, "step": 2035 }, { "epoch": 0.5737993108782786, "grad_norm": 0.84375, "learning_rate": 2.2927789880629505e-06, "log_odds_chosen": -0.17272424697875977, "log_odds_ratio": -0.8494987487792969, "logits/chosen": 0.0225498266518116, "logits/rejected": 0.16063106060028076, "logps/chosen": -0.9505000114440918, "logps/rejected": -0.9065941572189331, "loss": 1.2537, "nll_loss": 1.1457884311676025, "rewards/accuracies": 0.5, "rewards/chosen": -0.09504999965429306, "rewards/margins": -0.004390590824186802, "rewards/rejected": -0.09065941721200943, "step": 2040 }, { "epoch": 0.5752056817382744, "grad_norm": 0.640625, "learning_rate": 2.2805480955381146e-06, "log_odds_chosen": 0.10740852355957031, "log_odds_ratio": -0.7454892992973328, "logits/chosen": 0.041121773421764374, "logits/rejected": -0.10364113003015518, "logps/chosen": -1.0231186151504517, "logps/rejected": -1.064650297164917, "loss": 1.4006, "nll_loss": 1.347855806350708, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1023118644952774, "rewards/margins": 0.004153153393417597, "rewards/rejected": -0.10646501928567886, "step": 2045 }, { "epoch": 0.5766120525982702, "grad_norm": 2.65625, "learning_rate": 2.268322494159401e-06, "log_odds_chosen": 0.4567478597164154, "log_odds_ratio": -0.5950134992599487, "logits/chosen": 0.1594325602054596, "logits/rejected": -0.06351219117641449, "logps/chosen": -0.9101318120956421, "logps/rejected": -1.2298481464385986, "loss": 1.3683, "nll_loss": 1.3389989137649536, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09101317077875137, "rewards/margins": 0.03197162598371506, "rewards/rejected": -0.12298478931188583, "step": 2050 }, { "epoch": 0.5780184234582659, "grad_norm": 0.6484375, "learning_rate": 2.256102478695013e-06, "log_odds_chosen": 0.5688678622245789, "log_odds_ratio": -0.5590689182281494, "logits/chosen": 0.3047412037849426, "logits/rejected": 0.02218955010175705, "logps/chosen": -0.8990691304206848, "logps/rejected": -1.2767914533615112, "loss": 1.2824, "nll_loss": 1.1349036693572998, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08990690857172012, "rewards/margins": 0.03777223452925682, "rewards/rejected": -0.12767915427684784, "step": 2055 }, { "epoch": 0.5794247943182618, "grad_norm": 0.8359375, "learning_rate": 2.2438883437784724e-06, "log_odds_chosen": 0.32400697469711304, "log_odds_ratio": -0.6653164625167847, "logits/chosen": 0.126313254237175, "logits/rejected": -0.08219246566295624, "logps/chosen": -0.8283101320266724, "logps/rejected": -1.045828938484192, "loss": 1.2714, "nll_loss": 1.1874326467514038, "rewards/accuracies": 0.5, "rewards/chosen": -0.082831010222435, "rewards/margins": 0.021751878783106804, "rewards/rejected": -0.10458288341760635, "step": 2060 }, { "epoch": 0.5808311651782575, "grad_norm": 1.078125, "learning_rate": 2.231680383901516e-06, "log_odds_chosen": 0.2799515724182129, "log_odds_ratio": -0.6260203719139099, "logits/chosen": 0.2734755873680115, "logits/rejected": -0.0657648891210556, "logps/chosen": -0.997807502746582, "logps/rejected": -1.1968848705291748, "loss": 1.266, "nll_loss": 1.1785128116607666, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09978075325489044, "rewards/margins": 0.019907724112272263, "rewards/rejected": -0.1196884736418724, "step": 2065 }, { "epoch": 0.5822375360382532, "grad_norm": 0.474609375, "learning_rate": 2.219478893406997e-06, "log_odds_chosen": 0.6603912115097046, "log_odds_ratio": -0.49572378396987915, "logits/chosen": 0.08124915510416031, "logits/rejected": -0.0769030898809433, "logps/chosen": -0.7781981229782104, "logps/rejected": -1.1534805297851562, "loss": 1.3176, "nll_loss": 1.248460054397583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0778198167681694, "rewards/margins": 0.037528228014707565, "rewards/rejected": -0.11534804105758667, "step": 2070 }, { "epoch": 0.5836439068982491, "grad_norm": 0.47265625, "learning_rate": 2.2072841664817855e-06, "log_odds_chosen": 0.19003677368164062, "log_odds_ratio": -0.7124180793762207, "logits/chosen": 0.1900695264339447, "logits/rejected": 0.16181489825248718, "logps/chosen": -0.9449484944343567, "logps/rejected": -1.0786292552947998, "loss": 1.375, "nll_loss": 1.1930427551269531, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09449484944343567, "rewards/margins": 0.013368071988224983, "rewards/rejected": -0.1078629270195961, "step": 2075 }, { "epoch": 0.5850502777582448, "grad_norm": 0.359375, "learning_rate": 2.195096497149679e-06, "log_odds_chosen": 0.6070584058761597, "log_odds_ratio": -0.6306462287902832, "logits/chosen": 0.3088809847831726, "logits/rejected": 0.16098138689994812, "logps/chosen": -1.0000215768814087, "logps/rejected": -1.5181127786636353, "loss": 1.2809, "nll_loss": 1.1490371227264404, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10000214725732803, "rewards/margins": 0.051809124648571014, "rewards/rejected": -0.15181128680706024, "step": 2080 }, { "epoch": 0.5864566486182407, "grad_norm": 0.7265625, "learning_rate": 2.182916179264309e-06, "log_odds_chosen": 0.22232560813426971, "log_odds_ratio": -0.6624404191970825, "logits/chosen": 0.025626610964536667, "logits/rejected": 0.16651883721351624, "logps/chosen": -0.8706231117248535, "logps/rejected": -1.018607497215271, "loss": 1.2903, "nll_loss": 1.2660566568374634, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08706231415271759, "rewards/margins": 0.014798441901803017, "rewards/rejected": -0.10186074674129486, "step": 2085 }, { "epoch": 0.5878630194782364, "grad_norm": 0.5625, "learning_rate": 2.170743506502061e-06, "log_odds_chosen": 0.5465322732925415, "log_odds_ratio": -0.5666033625602722, "logits/chosen": 0.0625384971499443, "logits/rejected": 0.12112072855234146, "logps/chosen": -0.685492217540741, "logps/rejected": -0.931922435760498, "loss": 1.178, "nll_loss": 1.129865288734436, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06854921579360962, "rewards/margins": 0.02464301511645317, "rewards/rejected": -0.09319223463535309, "step": 2090 }, { "epoch": 0.5892693903382322, "grad_norm": 0.431640625, "learning_rate": 2.1585787723549886e-06, "log_odds_chosen": 0.21627631783485413, "log_odds_ratio": -0.6291038393974304, "logits/chosen": 0.3572022616863251, "logits/rejected": 0.17766115069389343, "logps/chosen": -0.924892783164978, "logps/rejected": -1.0687482357025146, "loss": 1.3015, "nll_loss": 1.1631147861480713, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09248928725719452, "rewards/margins": 0.014385545626282692, "rewards/rejected": -0.10687483847141266, "step": 2095 }, { "epoch": 0.590675761198228, "grad_norm": 0.55859375, "learning_rate": 2.146422270123741e-06, "log_odds_chosen": 0.5283633470535278, "log_odds_ratio": -0.6173766255378723, "logits/chosen": -0.03145185858011246, "logits/rejected": 0.03247164934873581, "logps/chosen": -0.8153474926948547, "logps/rejected": -1.142411470413208, "loss": 1.2569, "nll_loss": 1.1404359340667725, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08153475821018219, "rewards/margins": 0.03270639851689339, "rewards/rejected": -0.11424114555120468, "step": 2100 }, { "epoch": 0.5920821320582238, "grad_norm": 0.431640625, "learning_rate": 2.134274292910489e-06, "log_odds_chosen": 0.44718852639198303, "log_odds_ratio": -0.6034306287765503, "logits/chosen": 0.19990482926368713, "logits/rejected": 0.05019756406545639, "logps/chosen": -0.8512241244316101, "logps/rejected": -1.1330512762069702, "loss": 1.1902, "nll_loss": 1.1092191934585571, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08512241393327713, "rewards/margins": 0.028182705864310265, "rewards/rejected": -0.11330513656139374, "step": 2105 }, { "epoch": 0.5934885029182195, "grad_norm": 1.015625, "learning_rate": 2.1221351336118587e-06, "log_odds_chosen": -0.05416733771562576, "log_odds_ratio": -0.8569121360778809, "logits/chosen": 0.12580278515815735, "logits/rejected": -0.2401326447725296, "logps/chosen": -1.075391411781311, "logps/rejected": -1.1087602376937866, "loss": 1.302, "nll_loss": 1.4039140939712524, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10753913968801498, "rewards/margins": 0.003336886642500758, "rewards/rejected": -0.11087602376937866, "step": 2110 }, { "epoch": 0.5948948737782153, "grad_norm": 0.74609375, "learning_rate": 2.1100050849118716e-06, "log_odds_chosen": 0.3060021996498108, "log_odds_ratio": -0.6356474757194519, "logits/chosen": 0.17181582748889923, "logits/rejected": 0.06386371701955795, "logps/chosen": -0.9602164030075073, "logps/rejected": -1.1083872318267822, "loss": 1.2469, "nll_loss": 1.1497198343276978, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09602165222167969, "rewards/margins": 0.014817061834037304, "rewards/rejected": -0.11083869636058807, "step": 2115 }, { "epoch": 0.5963012446382111, "grad_norm": 0.58984375, "learning_rate": 2.097884439274883e-06, "log_odds_chosen": 0.6273894906044006, "log_odds_ratio": -0.5269737839698792, "logits/chosen": 0.21485364437103271, "logits/rejected": -0.256959468126297, "logps/chosen": -0.7991748452186584, "logps/rejected": -1.1641663312911987, "loss": 1.3441, "nll_loss": 1.1461737155914307, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07991747558116913, "rewards/margins": 0.03649916127324104, "rewards/rejected": -0.11641664803028107, "step": 2120 }, { "epoch": 0.5977076154982068, "grad_norm": 0.78515625, "learning_rate": 2.0857734889385375e-06, "log_odds_chosen": 0.4728359282016754, "log_odds_ratio": -0.5811837911605835, "logits/chosen": 0.0014868139987811446, "logits/rejected": 0.14086367189884186, "logps/chosen": -0.8561526536941528, "logps/rejected": -1.0794785022735596, "loss": 1.248, "nll_loss": 1.1451321840286255, "rewards/accuracies": 0.75, "rewards/chosen": -0.08561527729034424, "rewards/margins": 0.022332582622766495, "rewards/rejected": -0.10794784873723984, "step": 2125 }, { "epoch": 0.5991139863582027, "grad_norm": 0.57421875, "learning_rate": 2.0736725259067152e-06, "log_odds_chosen": -0.012895092368125916, "log_odds_ratio": -0.7812901735305786, "logits/chosen": -0.04183458536863327, "logits/rejected": -0.05972647666931152, "logps/chosen": -0.9985347986221313, "logps/rejected": -1.0221898555755615, "loss": 1.3272, "nll_loss": 1.202670693397522, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09985347837209702, "rewards/margins": 0.0023655148688703775, "rewards/rejected": -0.10221900045871735, "step": 2130 }, { "epoch": 0.6005203572181984, "grad_norm": 0.6171875, "learning_rate": 2.0615818419424965e-06, "log_odds_chosen": 0.5170741677284241, "log_odds_ratio": -0.5714846253395081, "logits/chosen": -0.12221293151378632, "logits/rejected": 0.11729947477579117, "logps/chosen": -0.950890839099884, "logps/rejected": -1.2947622537612915, "loss": 1.2321, "nll_loss": 1.2039806842803955, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09508909285068512, "rewards/margins": 0.034387145191431046, "rewards/rejected": -0.12947621941566467, "step": 2135 }, { "epoch": 0.6019267280781943, "grad_norm": 0.87109375, "learning_rate": 2.0495017285611267e-06, "log_odds_chosen": 0.5369144082069397, "log_odds_ratio": -0.5662406086921692, "logits/chosen": 0.021474510431289673, "logits/rejected": -0.13889047503471375, "logps/chosen": -0.9632217288017273, "logps/rejected": -1.2753288745880127, "loss": 1.3178, "nll_loss": 1.243088722229004, "rewards/accuracies": 0.75, "rewards/chosen": -0.09632216393947601, "rewards/margins": 0.031210720539093018, "rewards/rejected": -0.12753288447856903, "step": 2140 }, { "epoch": 0.60333309893819, "grad_norm": 0.46484375, "learning_rate": 2.0374324770229852e-06, "log_odds_chosen": 0.44575804471969604, "log_odds_ratio": -0.5990105271339417, "logits/chosen": 0.16773398220539093, "logits/rejected": -0.01770983263850212, "logps/chosen": -0.8211402893066406, "logps/rejected": -1.067427396774292, "loss": 1.428, "nll_loss": 1.1296110153198242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08211404085159302, "rewards/margins": 0.024628710001707077, "rewards/rejected": -0.1067427545785904, "step": 2145 }, { "epoch": 0.6047394697981858, "grad_norm": 0.7421875, "learning_rate": 2.0253743783265644e-06, "log_odds_chosen": 0.043892212212085724, "log_odds_ratio": -0.7687390446662903, "logits/chosen": 0.11299238353967667, "logits/rejected": 0.19744233787059784, "logps/chosen": -0.8225703239440918, "logps/rejected": -0.8660001754760742, "loss": 1.2762, "nll_loss": 1.1397249698638916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08225702494382858, "rewards/margins": 0.00434298487380147, "rewards/rejected": -0.08660002052783966, "step": 2150 }, { "epoch": 0.6061458406581816, "grad_norm": 0.8515625, "learning_rate": 2.013327723201456e-06, "log_odds_chosen": 0.8239778280258179, "log_odds_ratio": -0.5448077917098999, "logits/chosen": 0.11527810990810394, "logits/rejected": -0.11592914909124374, "logps/chosen": -0.9209944009780884, "logps/rejected": -1.4681470394134521, "loss": 1.426, "nll_loss": 1.3432397842407227, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09209943562746048, "rewards/margins": 0.05471527576446533, "rewards/rejected": -0.14681470394134521, "step": 2155 }, { "epoch": 0.6075522115181774, "grad_norm": 0.87890625, "learning_rate": 2.001292802101334e-06, "log_odds_chosen": 0.45466384291648865, "log_odds_ratio": -0.6190831661224365, "logits/chosen": -0.057558946311473846, "logits/rejected": 0.02829205058515072, "logps/chosen": -1.1019726991653442, "logps/rejected": -1.4023187160491943, "loss": 1.4079, "nll_loss": 1.5371198654174805, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11019726097583771, "rewards/margins": 0.030034605413675308, "rewards/rejected": -0.1402318775653839, "step": 2160 }, { "epoch": 0.6089585823781731, "grad_norm": 0.494140625, "learning_rate": 1.989269905196962e-06, "log_odds_chosen": 0.5837582349777222, "log_odds_ratio": -0.4719756245613098, "logits/chosen": 0.23967739939689636, "logits/rejected": -0.15985237061977386, "logps/chosen": -0.7570546865463257, "logps/rejected": -1.098534345626831, "loss": 1.3862, "nll_loss": 1.4102153778076172, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07570547610521317, "rewards/margins": 0.034147970378398895, "rewards/rejected": -0.10985343158245087, "step": 2165 }, { "epoch": 0.6103649532381689, "grad_norm": 0.70703125, "learning_rate": 1.9772593223691884e-06, "log_odds_chosen": 0.049016643315553665, "log_odds_ratio": -0.8077160120010376, "logits/chosen": 0.2120121270418167, "logits/rejected": 0.15697090327739716, "logps/chosen": -1.0200127363204956, "logps/rejected": -0.9740360379219055, "loss": 1.2599, "nll_loss": 1.2477294206619263, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.10200126469135284, "rewards/margins": -0.004597645718604326, "rewards/rejected": -0.0974036157131195, "step": 2170 }, { "epoch": 0.6117713240981647, "grad_norm": 0.396484375, "learning_rate": 1.9652613432019603e-06, "log_odds_chosen": 0.439436674118042, "log_odds_ratio": -0.5775164365768433, "logits/chosen": 0.33784905076026917, "logits/rejected": -0.16036701202392578, "logps/chosen": -0.7958052754402161, "logps/rejected": -1.0919268131256104, "loss": 1.3179, "nll_loss": 1.0887128114700317, "rewards/accuracies": 0.75, "rewards/chosen": -0.07958053052425385, "rewards/margins": 0.029612144455313683, "rewards/rejected": -0.10919268429279327, "step": 2175 }, { "epoch": 0.6131776949581604, "grad_norm": 1.34375, "learning_rate": 1.9532762569753413e-06, "log_odds_chosen": 0.46795496344566345, "log_odds_ratio": -0.571441650390625, "logits/chosen": 0.1472143828868866, "logits/rejected": -0.10492970049381256, "logps/chosen": -0.9131848216056824, "logps/rejected": -1.2060058116912842, "loss": 1.3282, "nll_loss": 1.2877761125564575, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09131848812103271, "rewards/margins": 0.02928210236132145, "rewards/rejected": -0.12060059607028961, "step": 2180 }, { "epoch": 0.6145840658181563, "grad_norm": 0.83203125, "learning_rate": 1.9413043526585377e-06, "log_odds_chosen": 0.18188393115997314, "log_odds_ratio": -0.6771284937858582, "logits/chosen": 0.16518446803092957, "logits/rejected": -0.010451188310980797, "logps/chosen": -0.8314681053161621, "logps/rejected": -0.9089493751525879, "loss": 1.1942, "nll_loss": 1.155928611755371, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08314681798219681, "rewards/margins": 0.0077481335029006, "rewards/rejected": -0.09089495241641998, "step": 2185 }, { "epoch": 0.615990436678152, "grad_norm": 0.353515625, "learning_rate": 1.9293459189029297e-06, "log_odds_chosen": 0.17067180573940277, "log_odds_ratio": -0.6768472194671631, "logits/chosen": 0.3366175591945648, "logits/rejected": -0.07003750652074814, "logps/chosen": -0.8593961596488953, "logps/rejected": -1.0146676301956177, "loss": 1.2568, "nll_loss": 1.1882942914962769, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08593961596488953, "rewards/margins": 0.015527153387665749, "rewards/rejected": -0.10146676003932953, "step": 2190 }, { "epoch": 0.6173968075381479, "grad_norm": 0.60546875, "learning_rate": 1.9174012440351115e-06, "log_odds_chosen": 0.05603064224123955, "log_odds_ratio": -0.770088791847229, "logits/chosen": 0.12057554721832275, "logits/rejected": 0.2600278854370117, "logps/chosen": -0.886670708656311, "logps/rejected": -0.8796000480651855, "loss": 1.3116, "nll_loss": 1.1579667329788208, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.08866707235574722, "rewards/margins": -0.0007070727879181504, "rewards/rejected": -0.08796000480651855, "step": 2195 }, { "epoch": 0.6188031783981436, "grad_norm": 0.5859375, "learning_rate": 1.9054706160499425e-06, "log_odds_chosen": 0.9510795474052429, "log_odds_ratio": -0.497814804315567, "logits/chosen": -0.029894907027482986, "logits/rejected": -0.0075707389041781425, "logps/chosen": -0.7396495938301086, "logps/rejected": -1.2674949169158936, "loss": 1.208, "nll_loss": 1.152024745941162, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07396496832370758, "rewards/margins": 0.05278454348444939, "rewards/rejected": -0.12674950063228607, "step": 2200 }, { "epoch": 0.6202095492581394, "grad_norm": 0.38671875, "learning_rate": 1.8935543226035991e-06, "log_odds_chosen": 0.3846930265426636, "log_odds_ratio": -0.6503673791885376, "logits/chosen": 0.04987801983952522, "logits/rejected": -0.14280924201011658, "logps/chosen": -1.050396203994751, "logps/rejected": -1.3851416110992432, "loss": 1.2002, "nll_loss": 1.2732374668121338, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10503961890935898, "rewards/margins": 0.03347453102469444, "rewards/rejected": -0.13851414620876312, "step": 2205 }, { "epoch": 0.6216159201181352, "grad_norm": 0.5234375, "learning_rate": 1.8816526510066443e-06, "log_odds_chosen": 0.016715114936232567, "log_odds_ratio": -0.7636454701423645, "logits/chosen": 0.19401657581329346, "logits/rejected": 0.11138969659805298, "logps/chosen": -0.9060971140861511, "logps/rejected": -0.8897374868392944, "loss": 1.3381, "nll_loss": 1.1524732112884521, "rewards/accuracies": 0.5, "rewards/chosen": -0.09060971438884735, "rewards/margins": -0.0016359605360776186, "rewards/rejected": -0.0889737457036972, "step": 2210 }, { "epoch": 0.6230222909781309, "grad_norm": 0.451171875, "learning_rate": 1.869765888217095e-06, "log_odds_chosen": 0.35817837715148926, "log_odds_ratio": -0.608710765838623, "logits/chosen": 0.34807461500167847, "logits/rejected": 0.004034848418086767, "logps/chosen": -0.9350178837776184, "logps/rejected": -1.145564079284668, "loss": 1.2227, "nll_loss": 1.1987204551696777, "rewards/accuracies": 0.5, "rewards/chosen": -0.09350178390741348, "rewards/margins": 0.02105463482439518, "rewards/rejected": -0.11455640941858292, "step": 2215 }, { "epoch": 0.6244286618381267, "grad_norm": 1.1328125, "learning_rate": 1.8578943208335064e-06, "log_odds_chosen": 0.5045295357704163, "log_odds_ratio": -0.5768045783042908, "logits/chosen": 0.19994623959064484, "logits/rejected": 0.03926212340593338, "logps/chosen": -0.9680612683296204, "logps/rejected": -1.2390127182006836, "loss": 1.2678, "nll_loss": 1.165244460105896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0968061313033104, "rewards/margins": 0.0270951297134161, "rewards/rejected": -0.12390126287937164, "step": 2220 }, { "epoch": 0.6258350326981225, "grad_norm": 0.66796875, "learning_rate": 1.8460382350880631e-06, "log_odds_chosen": 0.49432697892189026, "log_odds_ratio": -0.5883679986000061, "logits/chosen": 0.4599657952785492, "logits/rejected": -0.22167901694774628, "logps/chosen": -0.9144092798233032, "logps/rejected": -1.2420196533203125, "loss": 1.2947, "nll_loss": 1.199195384979248, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09144093096256256, "rewards/margins": 0.03276105225086212, "rewards/rejected": -0.12420199066400528, "step": 2225 }, { "epoch": 0.6272414035581183, "grad_norm": 0.396484375, "learning_rate": 1.8341979168396729e-06, "log_odds_chosen": 0.2862274944782257, "log_odds_ratio": -0.6693924069404602, "logits/chosen": 0.08865300565958023, "logits/rejected": -0.13743454217910767, "logps/chosen": -1.0582154989242554, "logps/rejected": -1.2691776752471924, "loss": 1.349, "nll_loss": 1.4249746799468994, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10582154989242554, "rewards/margins": 0.021096205338835716, "rewards/rejected": -0.126917764544487, "step": 2230 }, { "epoch": 0.628647774418114, "grad_norm": 0.890625, "learning_rate": 1.8223736515670815e-06, "log_odds_chosen": -0.037134379148483276, "log_odds_ratio": -0.8214631080627441, "logits/chosen": -0.0823502168059349, "logits/rejected": 0.12497730553150177, "logps/chosen": -0.932249903678894, "logps/rejected": -0.8945218920707703, "loss": 1.3146, "nll_loss": 1.2705105543136597, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09322498738765717, "rewards/margins": -0.0037727851886302233, "rewards/rejected": -0.08945219963788986, "step": 2235 }, { "epoch": 0.6300541452781099, "grad_norm": 0.5234375, "learning_rate": 1.8105657243619823e-06, "log_odds_chosen": 0.34684863686561584, "log_odds_ratio": -0.7218656539916992, "logits/chosen": 0.14807887375354767, "logits/rejected": 0.03153051808476448, "logps/chosen": -0.8757486343383789, "logps/rejected": -1.0104224681854248, "loss": 1.3259, "nll_loss": 1.1627438068389893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08757485449314117, "rewards/margins": 0.013467395678162575, "rewards/rejected": -0.1010422483086586, "step": 2240 }, { "epoch": 0.6314605161381056, "grad_norm": 0.8671875, "learning_rate": 1.7987744199221486e-06, "log_odds_chosen": 0.6574544906616211, "log_odds_ratio": -0.5802866220474243, "logits/chosen": 0.1322064995765686, "logits/rejected": -0.11798272281885147, "logps/chosen": -0.8747881650924683, "logps/rejected": -1.334978699684143, "loss": 1.263, "nll_loss": 1.0923330783843994, "rewards/accuracies": 0.75, "rewards/chosen": -0.08747883141040802, "rewards/margins": 0.04601903632283211, "rewards/rejected": -0.13349786400794983, "step": 2245 }, { "epoch": 0.6328668869981015, "grad_norm": 0.5234375, "learning_rate": 1.787000022544564e-06, "log_odds_chosen": 0.5828297138214111, "log_odds_ratio": -0.5941449403762817, "logits/chosen": 0.22428825497627258, "logits/rejected": 0.07940506190061569, "logps/chosen": -0.733045756816864, "logps/rejected": -1.0111888647079468, "loss": 1.2972, "nll_loss": 1.1566951274871826, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07330457866191864, "rewards/margins": 0.027814310044050217, "rewards/rejected": -0.10111889988183975, "step": 2250 }, { "epoch": 0.6342732578580972, "grad_norm": 0.60546875, "learning_rate": 1.7752428161185722e-06, "log_odds_chosen": 0.31006038188934326, "log_odds_ratio": -0.6792932748794556, "logits/chosen": 0.16227427124977112, "logits/rejected": -0.025093629956245422, "logps/chosen": -0.9472533464431763, "logps/rejected": -1.1567457914352417, "loss": 1.2279, "nll_loss": 1.1718425750732422, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09472532570362091, "rewards/margins": 0.020949259400367737, "rewards/rejected": -0.11567459255456924, "step": 2255 }, { "epoch": 0.6356796287180929, "grad_norm": 0.7109375, "learning_rate": 1.7635030841190305e-06, "log_odds_chosen": 0.37945660948753357, "log_odds_ratio": -0.6706798076629639, "logits/chosen": 0.12936149537563324, "logits/rejected": 0.19631770253181458, "logps/chosen": -0.9205350875854492, "logps/rejected": -1.1315011978149414, "loss": 1.1826, "nll_loss": 1.1098390817642212, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09205349534749985, "rewards/margins": 0.021096620708703995, "rewards/rejected": -0.11315013468265533, "step": 2260 }, { "epoch": 0.6370859995780888, "grad_norm": 0.53125, "learning_rate": 1.7517811095994735e-06, "log_odds_chosen": 0.6533330678939819, "log_odds_ratio": -0.49430108070373535, "logits/chosen": 0.004287800285965204, "logits/rejected": -0.12239503860473633, "logps/chosen": -0.9396077990531921, "logps/rejected": -1.3777822256088257, "loss": 1.4278, "nll_loss": 1.3003541231155396, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09396077692508698, "rewards/margins": 0.043817438185214996, "rewards/rejected": -0.13777822256088257, "step": 2265 }, { "epoch": 0.6384923704380845, "grad_norm": 0.765625, "learning_rate": 1.7400771751852918e-06, "log_odds_chosen": 0.3625580668449402, "log_odds_ratio": -0.6069513559341431, "logits/chosen": 0.1046602874994278, "logits/rejected": -0.14799770712852478, "logps/chosen": -0.9565946459770203, "logps/rejected": -1.1671000719070435, "loss": 1.2987, "nll_loss": 1.3088912963867188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09565945714712143, "rewards/margins": 0.021050548180937767, "rewards/rejected": -0.11671002209186554, "step": 2270 }, { "epoch": 0.6398987412980803, "grad_norm": 1.1484375, "learning_rate": 1.7283915630669152e-06, "log_odds_chosen": 0.6291302442550659, "log_odds_ratio": -0.5847497582435608, "logits/chosen": 0.27099600434303284, "logits/rejected": -0.10813238471746445, "logps/chosen": -0.8863940238952637, "logps/rejected": -1.34452486038208, "loss": 1.35, "nll_loss": 1.2117236852645874, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08863941580057144, "rewards/margins": 0.04581306502223015, "rewards/rejected": -0.1344524621963501, "step": 2275 }, { "epoch": 0.6413051121580761, "grad_norm": 0.52734375, "learning_rate": 1.7167245549930084e-06, "log_odds_chosen": 0.4684177041053772, "log_odds_ratio": -0.6205426454544067, "logits/chosen": 0.19276770949363708, "logits/rejected": -0.0946609154343605, "logps/chosen": -1.003852367401123, "logps/rejected": -1.2560100555419922, "loss": 1.3595, "nll_loss": 1.323639988899231, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10038523375988007, "rewards/margins": 0.025215759873390198, "rewards/rejected": -0.12560100853443146, "step": 2280 }, { "epoch": 0.6427114830180719, "grad_norm": 0.82421875, "learning_rate": 1.705076432263681e-06, "log_odds_chosen": 0.18357697129249573, "log_odds_ratio": -0.685191810131073, "logits/chosen": 0.3408302664756775, "logits/rejected": -0.02346021868288517, "logps/chosen": -0.8450328707695007, "logps/rejected": -0.9442952871322632, "loss": 1.3166, "nll_loss": 1.247656226158142, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08450329303741455, "rewards/margins": 0.009926247410476208, "rewards/rejected": -0.09442953020334244, "step": 2285 }, { "epoch": 0.6441178538780676, "grad_norm": 0.70703125, "learning_rate": 1.693447475723701e-06, "log_odds_chosen": 0.4198075234889984, "log_odds_ratio": -0.6046397686004639, "logits/chosen": 0.02919054962694645, "logits/rejected": -0.09719662368297577, "logps/chosen": -0.8987553715705872, "logps/rejected": -1.1064974069595337, "loss": 1.3942, "nll_loss": 1.2800304889678955, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08987553417682648, "rewards/margins": 0.020774196833372116, "rewards/rejected": -0.1106497272849083, "step": 2290 }, { "epoch": 0.6455242247380635, "grad_norm": 0.82421875, "learning_rate": 1.6818379657557276e-06, "log_odds_chosen": 0.2639097571372986, "log_odds_ratio": -0.6733454465866089, "logits/chosen": 0.13979147374629974, "logits/rejected": -0.036435022950172424, "logps/chosen": -0.8567923307418823, "logps/rejected": -1.0064163208007812, "loss": 1.2637, "nll_loss": 1.1874176263809204, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08567923307418823, "rewards/margins": 0.014962397515773773, "rewards/rejected": -0.100641630589962, "step": 2295 }, { "epoch": 0.6469305955980592, "grad_norm": 0.69140625, "learning_rate": 1.6702481822735463e-06, "log_odds_chosen": 0.5099008679389954, "log_odds_ratio": -0.6696144342422485, "logits/chosen": 0.1339375525712967, "logits/rejected": -0.07086416333913803, "logps/chosen": -0.6614585518836975, "logps/rejected": -1.0182112455368042, "loss": 1.2939, "nll_loss": 1.266301155090332, "rewards/accuracies": 0.75, "rewards/chosen": -0.06614585220813751, "rewards/margins": 0.0356752835214138, "rewards/rejected": -0.10182113945484161, "step": 2300 }, { "epoch": 0.6483369664580549, "grad_norm": 0.84765625, "learning_rate": 1.6586784047153264e-06, "log_odds_chosen": 0.1889144480228424, "log_odds_ratio": -0.7223377227783203, "logits/chosen": 0.18328048288822174, "logits/rejected": 0.0038080899976193905, "logps/chosen": -1.0151089429855347, "logps/rejected": -1.1970748901367188, "loss": 1.476, "nll_loss": 1.4434733390808105, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1015109196305275, "rewards/margins": 0.018196579068899155, "rewards/rejected": -0.11970750242471695, "step": 2305 }, { "epoch": 0.6497433373180508, "grad_norm": 1.2265625, "learning_rate": 1.6471289120368755e-06, "log_odds_chosen": 0.061086464673280716, "log_odds_ratio": -0.806367039680481, "logits/chosen": 0.060416080057621, "logits/rejected": 0.13000234961509705, "logps/chosen": -1.0920124053955078, "logps/rejected": -1.1408212184906006, "loss": 1.3669, "nll_loss": 1.3251636028289795, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10920125246047974, "rewards/margins": 0.004880874417722225, "rewards/rejected": -0.11408212035894394, "step": 2310 }, { "epoch": 0.6511497081780465, "grad_norm": 0.81640625, "learning_rate": 1.6355999827049214e-06, "log_odds_chosen": -0.14868974685668945, "log_odds_ratio": -0.8572866320610046, "logits/chosen": 0.33928146958351135, "logits/rejected": 0.13666602969169617, "logps/chosen": -0.9500266909599304, "logps/rejected": -0.9167013168334961, "loss": 1.3243, "nll_loss": 1.296018362045288, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0950026735663414, "rewards/margins": -0.003332542721182108, "rewards/rejected": -0.09167014062404633, "step": 2315 }, { "epoch": 0.6525560790380424, "grad_norm": 0.8046875, "learning_rate": 1.6240918946903923e-06, "log_odds_chosen": 0.08933033049106598, "log_odds_ratio": -0.8013785481452942, "logits/chosen": 0.3539492189884186, "logits/rejected": 0.07136426866054535, "logps/chosen": -1.0159389972686768, "logps/rejected": -1.0767545700073242, "loss": 1.306, "nll_loss": 1.2898885011672974, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10159389674663544, "rewards/margins": 0.00608155969530344, "rewards/rejected": -0.1076754555106163, "step": 2320 }, { "epoch": 0.6539624498980381, "grad_norm": 0.91015625, "learning_rate": 1.612604925461717e-06, "log_odds_chosen": 0.3929768204689026, "log_odds_ratio": -0.5996061563491821, "logits/chosen": 0.21692538261413574, "logits/rejected": 0.08881232887506485, "logps/chosen": -0.7952896356582642, "logps/rejected": -1.0285656452178955, "loss": 1.2243, "nll_loss": 1.0526376962661743, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07952895760536194, "rewards/margins": 0.023327605798840523, "rewards/rejected": -0.10285656154155731, "step": 2325 }, { "epoch": 0.6553688207580339, "grad_norm": 0.49609375, "learning_rate": 1.6011393519781373e-06, "log_odds_chosen": 0.46675533056259155, "log_odds_ratio": -0.5259859561920166, "logits/chosen": 0.25100797414779663, "logits/rejected": 0.05385111644864082, "logps/chosen": -0.8101531267166138, "logps/rejected": -1.090301275253296, "loss": 1.2458, "nll_loss": 1.068930983543396, "rewards/accuracies": 0.75, "rewards/chosen": -0.08101530373096466, "rewards/margins": 0.02801482379436493, "rewards/rejected": -0.1090301126241684, "step": 2330 }, { "epoch": 0.6567751916180297, "grad_norm": 0.68359375, "learning_rate": 1.5896954506830251e-06, "log_odds_chosen": 0.08317549526691437, "log_odds_ratio": -0.7241812348365784, "logits/chosen": 0.018165847286581993, "logits/rejected": -0.045239925384521484, "logps/chosen": -1.012702465057373, "logps/rejected": -1.0038830041885376, "loss": 1.3008, "nll_loss": 1.1472804546356201, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10127024352550507, "rewards/margins": -0.0008819475769996643, "rewards/rejected": -0.1003882884979248, "step": 2335 }, { "epoch": 0.6581815624780255, "grad_norm": 0.86328125, "learning_rate": 1.5782734974972207e-06, "log_odds_chosen": 0.2770829200744629, "log_odds_ratio": -0.6474322080612183, "logits/chosen": 0.3198090195655823, "logits/rejected": 0.0875503420829773, "logps/chosen": -0.8086474537849426, "logps/rejected": -0.9755656123161316, "loss": 1.2639, "nll_loss": 1.0375709533691406, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08086474239826202, "rewards/margins": 0.016691816970705986, "rewards/rejected": -0.09755655378103256, "step": 2340 }, { "epoch": 0.6595879333380212, "grad_norm": 0.62109375, "learning_rate": 1.5668737678123808e-06, "log_odds_chosen": 0.36177942156791687, "log_odds_ratio": -0.6418853402137756, "logits/chosen": 0.32350581884384155, "logits/rejected": -0.033881280571222305, "logps/chosen": -0.9356054067611694, "logps/rejected": -1.2087011337280273, "loss": 1.4322, "nll_loss": 1.203917384147644, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09356053918600082, "rewards/margins": 0.027309581637382507, "rewards/rejected": -0.12087012827396393, "step": 2345 }, { "epoch": 0.660994304198017, "grad_norm": 0.59765625, "learning_rate": 1.5554965364843355e-06, "log_odds_chosen": 0.636290967464447, "log_odds_ratio": -0.582929790019989, "logits/chosen": 0.13164618611335754, "logits/rejected": 0.20954278111457825, "logps/chosen": -0.7973164916038513, "logps/rejected": -1.2046148777008057, "loss": 1.3067, "nll_loss": 1.1008788347244263, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07973165065050125, "rewards/margins": 0.04072984308004379, "rewards/rejected": -0.12046150118112564, "step": 2350 }, { "epoch": 0.6624006750580128, "grad_norm": 0.9140625, "learning_rate": 1.5441420778264647e-06, "log_odds_chosen": 0.40660586953163147, "log_odds_ratio": -0.5770841836929321, "logits/chosen": -0.06428630650043488, "logits/rejected": 0.038785386830568314, "logps/chosen": -0.8069353103637695, "logps/rejected": -1.0121805667877197, "loss": 1.284, "nll_loss": 1.1668837070465088, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08069352805614471, "rewards/margins": 0.0205245278775692, "rewards/rejected": -0.10121805965900421, "step": 2355 }, { "epoch": 0.6638070459180085, "grad_norm": 0.60546875, "learning_rate": 1.5328106656030805e-06, "log_odds_chosen": 0.2510248124599457, "log_odds_ratio": -0.6650518178939819, "logits/chosen": 0.2300342619419098, "logits/rejected": 0.23015658557415009, "logps/chosen": -0.9045982360839844, "logps/rejected": -1.0975348949432373, "loss": 1.2664, "nll_loss": 1.1747139692306519, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09045982360839844, "rewards/margins": 0.01929367706179619, "rewards/rejected": -0.10975348949432373, "step": 2360 }, { "epoch": 0.6652134167780044, "grad_norm": 0.58984375, "learning_rate": 1.52150257302283e-06, "log_odds_chosen": -0.03923415392637253, "log_odds_ratio": -0.8279203176498413, "logits/chosen": 0.12241461127996445, "logits/rejected": 0.2429099977016449, "logps/chosen": -1.0360397100448608, "logps/rejected": -1.0254569053649902, "loss": 1.2996, "nll_loss": 1.235386848449707, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10360397398471832, "rewards/margins": -0.0010582676623016596, "rewards/rejected": -0.10254569351673126, "step": 2365 }, { "epoch": 0.6666197876380001, "grad_norm": 1.046875, "learning_rate": 1.510218072732107e-06, "log_odds_chosen": 0.1947515308856964, "log_odds_ratio": -0.7813352346420288, "logits/chosen": 0.027283471077680588, "logits/rejected": -0.05581042915582657, "logps/chosen": -0.8156864047050476, "logps/rejected": -0.9339845776557922, "loss": 1.3315, "nll_loss": 1.197085976600647, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0815686509013176, "rewards/margins": 0.01182980090379715, "rewards/rejected": -0.09339844435453415, "step": 2370 }, { "epoch": 0.668026158497996, "grad_norm": 1.2109375, "learning_rate": 1.4989574368084757e-06, "log_odds_chosen": 0.5522528886795044, "log_odds_ratio": -0.6361822485923767, "logits/chosen": -0.045281365513801575, "logits/rejected": -0.08735646307468414, "logps/chosen": -0.8376988172531128, "logps/rejected": -1.1683425903320312, "loss": 1.287, "nll_loss": 1.156860589981079, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08376988023519516, "rewards/margins": 0.03306437283754349, "rewards/rejected": -0.11683426052331924, "step": 2375 }, { "epoch": 0.6694325293579917, "grad_norm": 0.4453125, "learning_rate": 1.4877209367541167e-06, "log_odds_chosen": 0.46589261293411255, "log_odds_ratio": -0.6088491678237915, "logits/chosen": 0.18501324951648712, "logits/rejected": -0.07747994363307953, "logps/chosen": -0.8727632761001587, "logps/rejected": -1.181078553199768, "loss": 1.2855, "nll_loss": 1.294697880744934, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08727632462978363, "rewards/margins": 0.030831540003418922, "rewards/rejected": -0.11810784041881561, "step": 2380 }, { "epoch": 0.6708389002179875, "grad_norm": 1.4453125, "learning_rate": 1.4765088434892735e-06, "log_odds_chosen": 0.20517206192016602, "log_odds_ratio": -0.7064529657363892, "logits/chosen": 0.04695446044206619, "logits/rejected": 0.20691752433776855, "logps/chosen": -0.8503645062446594, "logps/rejected": -1.0062562227249146, "loss": 1.3207, "nll_loss": 1.0638062953948975, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08503645658493042, "rewards/margins": 0.015589162707328796, "rewards/rejected": -0.10062561929225922, "step": 2385 }, { "epoch": 0.6722452710779833, "grad_norm": 0.98046875, "learning_rate": 1.4653214273457261e-06, "log_odds_chosen": 0.3600631654262543, "log_odds_ratio": -0.6305667161941528, "logits/chosen": 0.2736007571220398, "logits/rejected": -0.09294568002223969, "logps/chosen": -1.028140902519226, "logps/rejected": -1.2551156282424927, "loss": 1.2326, "nll_loss": 1.334746241569519, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10281410068273544, "rewards/margins": 0.022697459906339645, "rewards/rejected": -0.12551157176494598, "step": 2390 }, { "epoch": 0.673651641937979, "grad_norm": 1.1640625, "learning_rate": 1.4541589580602691e-06, "log_odds_chosen": 0.21781405806541443, "log_odds_ratio": -0.6903058886528015, "logits/chosen": 0.32889285683631897, "logits/rejected": 0.20784631371498108, "logps/chosen": -0.9776535034179688, "logps/rejected": -1.1374032497406006, "loss": 1.2584, "nll_loss": 1.2378979921340942, "rewards/accuracies": 0.5, "rewards/chosen": -0.09776534140110016, "rewards/margins": 0.0159749872982502, "rewards/rejected": -0.11374033987522125, "step": 2395 }, { "epoch": 0.6750580127979748, "grad_norm": 0.498046875, "learning_rate": 1.4430217047682133e-06, "log_odds_chosen": 0.3385079503059387, "log_odds_ratio": -0.6288328766822815, "logits/chosen": 0.01931552030146122, "logits/rejected": -0.039456650614738464, "logps/chosen": -0.926923394203186, "logps/rejected": -1.1492844820022583, "loss": 1.3098, "nll_loss": 1.1470708847045898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09269233047962189, "rewards/margins": 0.022236105054616928, "rewards/rejected": -0.11492843925952911, "step": 2400 }, { "epoch": 0.6764643836579706, "grad_norm": 0.48046875, "learning_rate": 1.4319099359968897e-06, "log_odds_chosen": 0.4113641381263733, "log_odds_ratio": -0.6631403565406799, "logits/chosen": 0.20192308723926544, "logits/rejected": -0.16459718346595764, "logps/chosen": -0.9309433698654175, "logps/rejected": -1.1940377950668335, "loss": 1.3175, "nll_loss": 1.2504770755767822, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0930943414568901, "rewards/margins": 0.026309454813599586, "rewards/rejected": -0.11940377950668335, "step": 2405 }, { "epoch": 0.6778707545179664, "grad_norm": 0.84375, "learning_rate": 1.4208239196591816e-06, "log_odds_chosen": 0.6248758435249329, "log_odds_ratio": -0.5380848050117493, "logits/chosen": 0.2532563805580139, "logits/rejected": 0.03574008494615555, "logps/chosen": -0.7717488408088684, "logps/rejected": -1.1881312131881714, "loss": 1.2321, "nll_loss": 1.0419811010360718, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07717488706111908, "rewards/margins": 0.04163823276758194, "rewards/rejected": -0.11881311237812042, "step": 2410 }, { "epoch": 0.6792771253779621, "grad_norm": 0.8125, "learning_rate": 1.4097639230470602e-06, "log_odds_chosen": 0.16117160022258759, "log_odds_ratio": -0.6795519590377808, "logits/chosen": -0.0329584926366806, "logits/rejected": 0.09744864702224731, "logps/chosen": -1.0823699235916138, "logps/rejected": -1.2119646072387695, "loss": 1.2633, "nll_loss": 1.1455273628234863, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10823698341846466, "rewards/margins": 0.012959480285644531, "rewards/rejected": -0.12119647115468979, "step": 2415 }, { "epoch": 0.680683496237958, "grad_norm": 0.56640625, "learning_rate": 1.3987302128251451e-06, "log_odds_chosen": 0.5146900415420532, "log_odds_ratio": -0.5096299052238464, "logits/chosen": 0.21035465598106384, "logits/rejected": 0.3026772737503052, "logps/chosen": -0.7643210291862488, "logps/rejected": -1.0609276294708252, "loss": 1.3398, "nll_loss": 0.982632040977478, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07643209397792816, "rewards/margins": 0.02966066636145115, "rewards/rejected": -0.10609277337789536, "step": 2420 }, { "epoch": 0.6820898670979537, "grad_norm": 0.94140625, "learning_rate": 1.3877230550242685e-06, "log_odds_chosen": 0.12729986011981964, "log_odds_ratio": -0.7055837512016296, "logits/chosen": 0.1564406454563141, "logits/rejected": 0.018828097730875015, "logps/chosen": -1.0019980669021606, "logps/rejected": -1.11031973361969, "loss": 1.3547, "nll_loss": 1.2092901468276978, "rewards/accuracies": 0.5, "rewards/chosen": -0.10019980370998383, "rewards/margins": 0.010832170024514198, "rewards/rejected": -0.11103197187185287, "step": 2425 }, { "epoch": 0.6834962379579496, "grad_norm": 0.6875, "learning_rate": 1.376742715035066e-06, "log_odds_chosen": 0.3790433704853058, "log_odds_ratio": -0.6022090911865234, "logits/chosen": 0.07409689575433731, "logits/rejected": 0.1763194501399994, "logps/chosen": -0.931224524974823, "logps/rejected": -1.217162847518921, "loss": 1.2583, "nll_loss": 1.1742773056030273, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0931224375963211, "rewards/margins": 0.02859382890164852, "rewards/rejected": -0.12171627581119537, "step": 2430 }, { "epoch": 0.6849026088179453, "grad_norm": 0.44140625, "learning_rate": 1.3657894576015752e-06, "log_odds_chosen": 0.3091423213481903, "log_odds_ratio": -0.6466763615608215, "logits/chosen": 0.1856229156255722, "logits/rejected": -0.09812851250171661, "logps/chosen": -0.8701708912849426, "logps/rejected": -1.0862239599227905, "loss": 1.2811, "nll_loss": 1.1256446838378906, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08701709657907486, "rewards/margins": 0.021605292335152626, "rewards/rejected": -0.10862239450216293, "step": 2435 }, { "epoch": 0.686308979677941, "grad_norm": 0.5390625, "learning_rate": 1.3548635468148546e-06, "log_odds_chosen": -0.15600749850273132, "log_odds_ratio": -0.8607271313667297, "logits/chosen": 0.039382584393024445, "logits/rejected": -0.024478310719132423, "logps/chosen": -1.0003107786178589, "logps/rejected": -0.936125636100769, "loss": 1.3303, "nll_loss": 1.2574559450149536, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10003107786178589, "rewards/margins": -0.006418503820896149, "rewards/rejected": -0.09361256659030914, "step": 2440 }, { "epoch": 0.6877153505379369, "grad_norm": 0.474609375, "learning_rate": 1.3439652461066132e-06, "log_odds_chosen": 0.9599248766899109, "log_odds_ratio": -0.5301603078842163, "logits/chosen": 0.11211545765399933, "logits/rejected": -0.01433412916958332, "logps/chosen": -0.9639075994491577, "logps/rejected": -1.5113232135772705, "loss": 1.2619, "nll_loss": 1.2481439113616943, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09639076888561249, "rewards/margins": 0.05474156141281128, "rewards/rejected": -0.15113233029842377, "step": 2445 }, { "epoch": 0.6891217213979326, "grad_norm": 1.1640625, "learning_rate": 1.3330948182428632e-06, "log_odds_chosen": 0.3813667893409729, "log_odds_ratio": -0.6987672448158264, "logits/chosen": 0.2353462278842926, "logits/rejected": -0.04703383892774582, "logps/chosen": -0.9156500697135925, "logps/rejected": -1.2335669994354248, "loss": 1.3956, "nll_loss": 1.2824862003326416, "rewards/accuracies": 0.5, "rewards/chosen": -0.09156500548124313, "rewards/margins": 0.03179169446229935, "rewards/rejected": -0.12335671484470367, "step": 2450 }, { "epoch": 0.6905280922579284, "grad_norm": 1.0703125, "learning_rate": 1.3222525253175817e-06, "log_odds_chosen": 0.38882437348365784, "log_odds_ratio": -0.6351332068443298, "logits/chosen": 0.06713583320379257, "logits/rejected": -0.06536121666431427, "logps/chosen": -0.7927005887031555, "logps/rejected": -1.057539701461792, "loss": 1.3135, "nll_loss": 1.363416314125061, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07927004992961884, "rewards/margins": 0.026483912020921707, "rewards/rejected": -0.10575397312641144, "step": 2455 }, { "epoch": 0.6919344631179242, "grad_norm": 0.515625, "learning_rate": 1.3114386287463895e-06, "log_odds_chosen": 0.3855310380458832, "log_odds_ratio": -0.6534914970397949, "logits/chosen": 0.3311907947063446, "logits/rejected": -0.03206902742385864, "logps/chosen": -0.9295966029167175, "logps/rejected": -1.1973559856414795, "loss": 1.2819, "nll_loss": 1.1859447956085205, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09295966476202011, "rewards/margins": 0.02677593193948269, "rewards/rejected": -0.11973558366298676, "step": 2460 }, { "epoch": 0.69334083397792, "grad_norm": 0.921875, "learning_rate": 1.3006533892602529e-06, "log_odds_chosen": 0.5109578967094421, "log_odds_ratio": -0.5499760508537292, "logits/chosen": 0.18025276064872742, "logits/rejected": -0.0963296964764595, "logps/chosen": -0.8864691853523254, "logps/rejected": -1.241557002067566, "loss": 1.2567, "nll_loss": 1.2526640892028809, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08864691853523254, "rewards/margins": 0.035508789122104645, "rewards/rejected": -0.12415570020675659, "step": 2465 }, { "epoch": 0.6947472048379157, "grad_norm": 0.80078125, "learning_rate": 1.289897066899194e-06, "log_odds_chosen": 0.2278120517730713, "log_odds_ratio": -0.6966596841812134, "logits/chosen": 0.2671460211277008, "logits/rejected": -0.14307644963264465, "logps/chosen": -0.9270893335342407, "logps/rejected": -1.1367871761322021, "loss": 1.343, "nll_loss": 1.2511600255966187, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09270893782377243, "rewards/margins": 0.020969776436686516, "rewards/rejected": -0.1136787086725235, "step": 2470 }, { "epoch": 0.6961535756979116, "grad_norm": 0.81640625, "learning_rate": 1.27916992100602e-06, "log_odds_chosen": 0.07840847969055176, "log_odds_ratio": -0.7949457168579102, "logits/chosen": -0.019716311246156693, "logits/rejected": 0.07323477417230606, "logps/chosen": -1.0711171627044678, "logps/rejected": -1.1677472591400146, "loss": 1.2643, "nll_loss": 1.206630825996399, "rewards/accuracies": 0.5, "rewards/chosen": -0.10711170732975006, "rewards/margins": 0.009663019329309464, "rewards/rejected": -0.11677472293376923, "step": 2475 }, { "epoch": 0.6975599465579073, "grad_norm": 0.52734375, "learning_rate": 1.268472210220077e-06, "log_odds_chosen": -0.06773873418569565, "log_odds_ratio": -0.8004165887832642, "logits/chosen": -0.054945267736911774, "logits/rejected": 0.06799677759408951, "logps/chosen": -0.886520266532898, "logps/rejected": -0.8365623354911804, "loss": 1.3579, "nll_loss": 1.3515230417251587, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08865202963352203, "rewards/margins": -0.004995786584913731, "rewards/rejected": -0.08365623652935028, "step": 2480 }, { "epoch": 0.698966317417903, "grad_norm": 1.421875, "learning_rate": 1.2578041924710048e-06, "log_odds_chosen": 0.026949768885970116, "log_odds_ratio": -0.7602518796920776, "logits/chosen": 0.08551601320505142, "logits/rejected": -0.11867004632949829, "logps/chosen": -0.9919036626815796, "logps/rejected": -1.0153735876083374, "loss": 1.3524, "nll_loss": 1.4168071746826172, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0991903692483902, "rewards/margins": 0.002346993423998356, "rewards/rejected": -0.10153736919164658, "step": 2485 }, { "epoch": 0.7003726882778989, "grad_norm": 1.578125, "learning_rate": 1.247166124972523e-06, "log_odds_chosen": 0.27566736936569214, "log_odds_ratio": -0.631436824798584, "logits/chosen": 0.10302142798900604, "logits/rejected": 0.0398896150290966, "logps/chosen": -0.9392396211624146, "logps/rejected": -1.1210951805114746, "loss": 1.3061, "nll_loss": 1.1322752237319946, "rewards/accuracies": 0.75, "rewards/chosen": -0.09392396360635757, "rewards/margins": 0.018185561522841454, "rewards/rejected": -0.11210951954126358, "step": 2490 }, { "epoch": 0.7017790591378946, "grad_norm": 0.57421875, "learning_rate": 1.2365582642162303e-06, "log_odds_chosen": -0.050146959722042084, "log_odds_ratio": -0.7798740267753601, "logits/chosen": 0.04944353178143501, "logits/rejected": 0.006843870971351862, "logps/chosen": -1.0934979915618896, "logps/rejected": -1.067382574081421, "loss": 1.3223, "nll_loss": 1.3140615224838257, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.1093498095870018, "rewards/margins": -0.0026115558575838804, "rewards/rejected": -0.10673825442790985, "step": 2495 }, { "epoch": 0.7031854299978905, "grad_norm": 0.322265625, "learning_rate": 1.225980865965416e-06, "log_odds_chosen": 0.16867448389530182, "log_odds_ratio": -0.7877094149589539, "logits/chosen": 0.15207277238368988, "logits/rejected": 0.07655763626098633, "logps/chosen": -1.1009851694107056, "logps/rejected": -1.1948564052581787, "loss": 1.2842, "nll_loss": 1.3291409015655518, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11009852588176727, "rewards/margins": 0.009387115947902203, "rewards/rejected": -0.11948563903570175, "step": 2500 }, { "epoch": 0.7045918008578862, "grad_norm": 0.92578125, "learning_rate": 1.2154341852489e-06, "log_odds_chosen": 0.19464707374572754, "log_odds_ratio": -0.6840943098068237, "logits/chosen": -0.10291782766580582, "logits/rejected": 0.017472196370363235, "logps/chosen": -0.9715100526809692, "logps/rejected": -1.0822222232818604, "loss": 1.3588, "nll_loss": 1.2510693073272705, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0971510112285614, "rewards/margins": 0.0110712219029665, "rewards/rejected": -0.10822223126888275, "step": 2505 }, { "epoch": 0.705998171717882, "grad_norm": 1.234375, "learning_rate": 1.2049184763548772e-06, "log_odds_chosen": 0.4920802712440491, "log_odds_ratio": -0.5853989124298096, "logits/chosen": -0.08663634210824966, "logits/rejected": -0.034693799912929535, "logps/chosen": -0.902317225933075, "logps/rejected": -1.2154607772827148, "loss": 1.2472, "nll_loss": 1.23630690574646, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09023171663284302, "rewards/margins": 0.03131437301635742, "rewards/rejected": -0.12154610455036163, "step": 2510 }, { "epoch": 0.7074045425778778, "grad_norm": 0.6015625, "learning_rate": 1.1944339928247886e-06, "log_odds_chosen": 0.16076788306236267, "log_odds_ratio": -0.7225486040115356, "logits/chosen": 0.30270877480506897, "logits/rejected": 0.1148032397031784, "logps/chosen": -1.0007215738296509, "logps/rejected": -1.1605135202407837, "loss": 1.2335, "nll_loss": 1.1324108839035034, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10007216036319733, "rewards/margins": 0.01597919873893261, "rewards/rejected": -0.11605136096477509, "step": 2515 }, { "epoch": 0.7088109134378736, "grad_norm": 1.0546875, "learning_rate": 1.1839809874472116e-06, "log_odds_chosen": 0.32421764731407166, "log_odds_ratio": -0.6674357652664185, "logits/chosen": 0.18027594685554504, "logits/rejected": 0.10635361820459366, "logps/chosen": -0.9377398490905762, "logps/rejected": -1.1755977869033813, "loss": 1.3138, "nll_loss": 1.1462560892105103, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09377399832010269, "rewards/margins": 0.0237857848405838, "rewards/rejected": -0.1175597757101059, "step": 2520 }, { "epoch": 0.7102172842978693, "grad_norm": 0.5703125, "learning_rate": 1.1735597122517603e-06, "log_odds_chosen": 0.33940380811691284, "log_odds_ratio": -0.5675379037857056, "logits/chosen": 0.22716209292411804, "logits/rejected": 0.09732247143983841, "logps/chosen": -0.8749046325683594, "logps/rejected": -1.0648473501205444, "loss": 1.2041, "nll_loss": 1.094160795211792, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08749047666788101, "rewards/margins": 0.01899426057934761, "rewards/rejected": -0.10648472607135773, "step": 2525 }, { "epoch": 0.7116236551578651, "grad_norm": 0.4765625, "learning_rate": 1.1631704185030107e-06, "log_odds_chosen": 0.004075491335242987, "log_odds_ratio": -0.7541936635971069, "logits/chosen": 0.27269211411476135, "logits/rejected": 0.01029108464717865, "logps/chosen": -0.8757359385490417, "logps/rejected": -0.8719568252563477, "loss": 1.2674, "nll_loss": 1.2918345928192139, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0875735953450203, "rewards/margins": -0.00037791504291817546, "rewards/rejected": -0.08719567954540253, "step": 2530 }, { "epoch": 0.7130300260178609, "grad_norm": 0.88671875, "learning_rate": 1.1528133566944427e-06, "log_odds_chosen": -0.1133066862821579, "log_odds_ratio": -0.8648877143859863, "logits/chosen": -0.10340137779712677, "logits/rejected": 0.04538872092962265, "logps/chosen": -1.0196564197540283, "logps/rejected": -0.8988596200942993, "loss": 1.2808, "nll_loss": 1.2842795848846436, "rewards/accuracies": 0.5, "rewards/chosen": -0.10196565091609955, "rewards/margins": -0.01207968033850193, "rewards/rejected": -0.08988596498966217, "step": 2535 }, { "epoch": 0.7144363968778566, "grad_norm": 0.75390625, "learning_rate": 1.1424887765424029e-06, "log_odds_chosen": 0.18615292012691498, "log_odds_ratio": -0.7488057017326355, "logits/chosen": 0.0731426477432251, "logits/rejected": -0.023691270500421524, "logps/chosen": -1.0201672315597534, "logps/rejected": -1.0216002464294434, "loss": 1.4175, "nll_loss": 1.341402292251587, "rewards/accuracies": 0.5, "rewards/chosen": -0.10201673209667206, "rewards/margins": 0.00014328323595691472, "rewards/rejected": -0.1021600142121315, "step": 2540 }, { "epoch": 0.7158427677378525, "grad_norm": 0.74609375, "learning_rate": 1.1321969269800783e-06, "log_odds_chosen": -0.12177006155252457, "log_odds_ratio": -0.8551080822944641, "logits/chosen": 0.060602523386478424, "logits/rejected": 0.08475537598133087, "logps/chosen": -1.0529420375823975, "logps/rejected": -0.9692171216011047, "loss": 1.2211, "nll_loss": 1.0461233854293823, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.10529420524835587, "rewards/margins": -0.008372487500309944, "rewards/rejected": -0.09692171216011047, "step": 2545 }, { "epoch": 0.7172491385978482, "grad_norm": 0.7578125, "learning_rate": 1.1219380561514992e-06, "log_odds_chosen": 0.6954945921897888, "log_odds_ratio": -0.580727756023407, "logits/chosen": 0.25962942838668823, "logits/rejected": -0.02940789982676506, "logps/chosen": -0.884055495262146, "logps/rejected": -1.4140456914901733, "loss": 1.2898, "nll_loss": 1.118775486946106, "rewards/accuracies": 0.75, "rewards/chosen": -0.0884055569767952, "rewards/margins": 0.05299902707338333, "rewards/rejected": -0.14140458405017853, "step": 2550 }, { "epoch": 0.7186555094578441, "grad_norm": 0.86328125, "learning_rate": 1.1117124114055533e-06, "log_odds_chosen": 0.3898099362850189, "log_odds_ratio": -0.6564149260520935, "logits/chosen": 0.13655290007591248, "logits/rejected": -0.14647407829761505, "logps/chosen": -0.924484133720398, "logps/rejected": -1.103148102760315, "loss": 1.3297, "nll_loss": 1.439805507659912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09244842082262039, "rewards/margins": 0.017866378650069237, "rewards/rejected": -0.11031480133533478, "step": 2555 }, { "epoch": 0.7200618803178398, "grad_norm": 0.73046875, "learning_rate": 1.1015202392900234e-06, "log_odds_chosen": 0.24838736653327942, "log_odds_ratio": -0.7105676531791687, "logits/chosen": 0.13131016492843628, "logits/rejected": -0.09939908236265182, "logps/chosen": -0.8603301048278809, "logps/rejected": -1.0633742809295654, "loss": 1.355, "nll_loss": 1.2121269702911377, "rewards/accuracies": 0.75, "rewards/chosen": -0.08603300899267197, "rewards/margins": 0.02030441164970398, "rewards/rejected": -0.10633742809295654, "step": 2560 }, { "epoch": 0.7214682511778356, "grad_norm": 0.921875, "learning_rate": 1.0913617855456449e-06, "log_odds_chosen": 0.07735253125429153, "log_odds_ratio": -0.7111676931381226, "logits/chosen": 0.3953332304954529, "logits/rejected": -0.05915503576397896, "logps/chosen": -1.0024585723876953, "logps/rejected": -1.083264708518982, "loss": 1.1852, "nll_loss": 1.2101662158966064, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10024585574865341, "rewards/margins": 0.008080625906586647, "rewards/rejected": -0.10832647979259491, "step": 2565 }, { "epoch": 0.7228746220378314, "grad_norm": 0.59765625, "learning_rate": 1.0812372951001752e-06, "log_odds_chosen": -0.04463967680931091, "log_odds_ratio": -0.8388767242431641, "logits/chosen": 0.02681068144738674, "logits/rejected": -0.018642084673047066, "logps/chosen": -1.0836817026138306, "logps/rejected": -1.0096104145050049, "loss": 1.353, "nll_loss": 1.344254493713379, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1083681732416153, "rewards/margins": -0.007407122757285833, "rewards/rejected": -0.10096104443073273, "step": 2570 }, { "epoch": 0.7242809928978271, "grad_norm": 0.63671875, "learning_rate": 1.0711470120624937e-06, "log_odds_chosen": 0.3836483061313629, "log_odds_ratio": -0.5960331559181213, "logits/chosen": 0.2320297658443451, "logits/rejected": -0.0060660927556455135, "logps/chosen": -0.9361212849617004, "logps/rejected": -1.170351266860962, "loss": 1.3233, "nll_loss": 1.1359012126922607, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09361211955547333, "rewards/margins": 0.023423010483384132, "rewards/rejected": -0.11703513562679291, "step": 2575 }, { "epoch": 0.725687363757823, "grad_norm": 0.61328125, "learning_rate": 1.0610911797167133e-06, "log_odds_chosen": 0.0792519822716713, "log_odds_ratio": -0.7163349390029907, "logits/chosen": 0.24216756224632263, "logits/rejected": 0.06887772679328918, "logps/chosen": -0.8495702743530273, "logps/rejected": -0.881389319896698, "loss": 1.2753, "nll_loss": 1.0806999206542969, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08495702594518661, "rewards/margins": 0.003181902226060629, "rewards/rejected": -0.08813893049955368, "step": 2580 }, { "epoch": 0.7270937346178187, "grad_norm": 0.82421875, "learning_rate": 1.0510700405163152e-06, "log_odds_chosen": 0.3315682113170624, "log_odds_ratio": -0.5999222993850708, "logits/chosen": 0.313637912273407, "logits/rejected": -0.13951337337493896, "logps/chosen": -0.9159483909606934, "logps/rejected": -1.1227281093597412, "loss": 1.2945, "nll_loss": 1.2091586589813232, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09159483760595322, "rewards/margins": 0.020677978172898293, "rewards/rejected": -0.11227281391620636, "step": 2585 }, { "epoch": 0.7285001054778145, "grad_norm": 0.7265625, "learning_rate": 1.0410838360783058e-06, "log_odds_chosen": 0.3959486782550812, "log_odds_ratio": -0.617482602596283, "logits/chosen": 0.40600308775901794, "logits/rejected": -0.3985676169395447, "logps/chosen": -1.0334031581878662, "logps/rejected": -1.3553186655044556, "loss": 1.355, "nll_loss": 1.2000818252563477, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10334031283855438, "rewards/margins": 0.032191552221775055, "rewards/rejected": -0.13553187251091003, "step": 2590 }, { "epoch": 0.7299064763378103, "grad_norm": 0.4609375, "learning_rate": 1.031132807177385e-06, "log_odds_chosen": 0.4226677417755127, "log_odds_ratio": -0.6021221876144409, "logits/chosen": -0.010524725541472435, "logits/rejected": -0.04317883029580116, "logps/chosen": -0.860185444355011, "logps/rejected": -1.1054656505584717, "loss": 1.2373, "nll_loss": 1.1730222702026367, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08601854741573334, "rewards/margins": 0.024528011679649353, "rewards/rejected": -0.11054656654596329, "step": 2595 }, { "epoch": 0.7313128471978061, "grad_norm": 0.71484375, "learning_rate": 1.0212171937401493e-06, "log_odds_chosen": 0.555956244468689, "log_odds_ratio": -0.5494065284729004, "logits/chosen": 0.1120821014046669, "logits/rejected": -0.11111991107463837, "logps/chosen": -0.9098693132400513, "logps/rejected": -1.2724263668060303, "loss": 1.3911, "nll_loss": 1.4419333934783936, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09098692983388901, "rewards/margins": 0.03625571355223656, "rewards/rejected": -0.12724265456199646, "step": 2600 }, { "epoch": 0.7327192180578018, "grad_norm": 0.5546875, "learning_rate": 1.0113372348392988e-06, "log_odds_chosen": 0.33087357878685, "log_odds_ratio": -0.6583686470985413, "logits/chosen": 0.20928256213665009, "logits/rejected": 0.009285476990044117, "logps/chosen": -0.9686871767044067, "logps/rejected": -1.1702792644500732, "loss": 1.2857, "nll_loss": 1.3147389888763428, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09686873108148575, "rewards/margins": 0.02015921100974083, "rewards/rejected": -0.11702793836593628, "step": 2605 }, { "epoch": 0.7341255889177977, "grad_norm": 0.9296875, "learning_rate": 1.0014931686878776e-06, "log_odds_chosen": 0.43109697103500366, "log_odds_ratio": -0.5490614175796509, "logits/chosen": -0.0007611617329530418, "logits/rejected": 0.2633914649486542, "logps/chosen": -0.8575743436813354, "logps/rejected": -1.1433038711547852, "loss": 1.2056, "nll_loss": 1.0657745599746704, "rewards/accuracies": 0.75, "rewards/chosen": -0.08575743436813354, "rewards/margins": 0.02857295610010624, "rewards/rejected": -0.11433041095733643, "step": 2610 }, { "epoch": 0.7355319597777934, "grad_norm": 0.68359375, "learning_rate": 9.916852326335294e-07, "log_odds_chosen": 0.37479788064956665, "log_odds_ratio": -0.5814806222915649, "logits/chosen": 0.19862744212150574, "logits/rejected": 0.21426455676555634, "logps/chosen": -0.9342344999313354, "logps/rejected": -1.1362876892089844, "loss": 1.3389, "nll_loss": 1.1891016960144043, "rewards/accuracies": 0.75, "rewards/chosen": -0.09342344850301743, "rewards/margins": 0.020205311477184296, "rewards/rejected": -0.11362876743078232, "step": 2615 }, { "epoch": 0.7369383306377891, "grad_norm": 0.9609375, "learning_rate": 9.819136631527742e-07, "log_odds_chosen": -0.01976107433438301, "log_odds_ratio": -0.7440468072891235, "logits/chosen": 0.2642653286457062, "logits/rejected": -0.05234430357813835, "logps/chosen": -0.9733647108078003, "logps/rejected": -0.9724742770195007, "loss": 1.2947, "nll_loss": 1.2140226364135742, "rewards/accuracies": 0.5, "rewards/chosen": -0.09733647853136063, "rewards/margins": -8.90508308657445e-05, "rewards/rejected": -0.09724743664264679, "step": 2620 }, { "epoch": 0.738344701497785, "grad_norm": 1.6328125, "learning_rate": 9.7217869584531e-07, "log_odds_chosen": 0.542760968208313, "log_odds_ratio": -0.6224207878112793, "logits/chosen": 0.03186125308275223, "logits/rejected": -0.09850295633077621, "logps/chosen": -0.9357168078422546, "logps/rejected": -1.185787320137024, "loss": 1.3398, "nll_loss": 1.4264706373214722, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09357168525457382, "rewards/margins": 0.025007059797644615, "rewards/rejected": -0.11857875436544418, "step": 2625 }, { "epoch": 0.7397510723577807, "grad_norm": 0.4765625, "learning_rate": 9.624805654283264e-07, "log_odds_chosen": 0.4654063284397125, "log_odds_ratio": -0.5991908311843872, "logits/chosen": 0.08382277935743332, "logits/rejected": -0.10615154355764389, "logps/chosen": -0.9338623285293579, "logps/rejected": -1.217292308807373, "loss": 1.2077, "nll_loss": 1.33759605884552, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0933862254023552, "rewards/margins": 0.02834300324320793, "rewards/rejected": -0.12172923237085342, "step": 2630 }, { "epoch": 0.7411574432177765, "grad_norm": 0.75390625, "learning_rate": 9.528195057308498e-07, "log_odds_chosen": 0.13208118081092834, "log_odds_ratio": -0.7440918684005737, "logits/chosen": 0.12219689041376114, "logits/rejected": 0.05499458312988281, "logps/chosen": -0.9848679304122925, "logps/rejected": -1.0994460582733154, "loss": 1.2732, "nll_loss": 1.143159031867981, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09848680347204208, "rewards/margins": 0.011457815766334534, "rewards/rejected": -0.10994460433721542, "step": 2635 }, { "epoch": 0.7425638140777723, "grad_norm": 1.09375, "learning_rate": 9.431957496881044e-07, "log_odds_chosen": -0.24765324592590332, "log_odds_ratio": -0.91820228099823, "logits/chosen": 0.13052822649478912, "logits/rejected": -0.09291420876979828, "logps/chosen": -1.1655653715133667, "logps/rejected": -1.0791189670562744, "loss": 1.4344, "nll_loss": 1.45156991481781, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.11655654013156891, "rewards/margins": -0.008644639514386654, "rewards/rejected": -0.10791189968585968, "step": 2640 }, { "epoch": 0.7439701849377681, "grad_norm": 0.490234375, "learning_rate": 9.336095293358955e-07, "log_odds_chosen": 0.18489600718021393, "log_odds_ratio": -0.7089937329292297, "logits/chosen": 0.22435322403907776, "logits/rejected": -0.04987093061208725, "logps/chosen": -0.8399380445480347, "logps/rejected": -0.9723587036132812, "loss": 1.2625, "nll_loss": 1.1622884273529053, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0839938074350357, "rewards/margins": 0.013242078013718128, "rewards/rejected": -0.09723588079214096, "step": 2645 }, { "epoch": 0.7453765557977639, "grad_norm": 0.9296875, "learning_rate": 9.240610758050167e-07, "log_odds_chosen": 0.16607101261615753, "log_odds_ratio": -0.6715102791786194, "logits/chosen": 0.4128045439720154, "logits/rejected": -0.09653332084417343, "logps/chosen": -0.8086966276168823, "logps/rejected": -0.9036060571670532, "loss": 1.3191, "nll_loss": 1.1381139755249023, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.080869659781456, "rewards/margins": 0.009490938857197762, "rewards/rejected": -0.0903606042265892, "step": 2650 }, { "epoch": 0.7467829266577597, "grad_norm": 0.84375, "learning_rate": 9.14550619315675e-07, "log_odds_chosen": 0.41601577401161194, "log_odds_ratio": -0.6104211211204529, "logits/chosen": 0.2691023349761963, "logits/rejected": -0.30555975437164307, "logps/chosen": -0.9007304906845093, "logps/rejected": -1.229494333267212, "loss": 1.2178, "nll_loss": 1.2668547630310059, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09007303416728973, "rewards/margins": 0.0328763946890831, "rewards/rejected": -0.12294943630695343, "step": 2655 }, { "epoch": 0.7481892975177554, "grad_norm": 0.6796875, "learning_rate": 9.050783891719397e-07, "log_odds_chosen": 0.16551217436790466, "log_odds_ratio": -0.7445172071456909, "logits/chosen": 0.025711068883538246, "logits/rejected": -0.060128070414066315, "logps/chosen": -1.0086417198181152, "logps/rejected": -1.1160506010055542, "loss": 1.2587, "nll_loss": 1.2568762302398682, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10086417198181152, "rewards/margins": 0.010740896686911583, "rewards/rejected": -0.11160507053136826, "step": 2660 }, { "epoch": 0.7495956683777513, "grad_norm": 0.466796875, "learning_rate": 8.956446137562183e-07, "log_odds_chosen": 0.16542503237724304, "log_odds_ratio": -0.7191334962844849, "logits/chosen": 0.11821790039539337, "logits/rejected": -0.20049512386322021, "logps/chosen": -0.9817320704460144, "logps/rejected": -1.1040692329406738, "loss": 1.2025, "nll_loss": 1.22897469997406, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09817320853471756, "rewards/margins": 0.012233722023665905, "rewards/rejected": -0.11040693521499634, "step": 2665 }, { "epoch": 0.751002039237747, "grad_norm": 1.15625, "learning_rate": 8.862495205237432e-07, "log_odds_chosen": 0.3671686351299286, "log_odds_ratio": -0.5621435046195984, "logits/chosen": 0.1925962269306183, "logits/rejected": -0.02321244589984417, "logps/chosen": -0.8481559753417969, "logps/rejected": -1.0866224765777588, "loss": 1.236, "nll_loss": 1.1403025388717651, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0848156064748764, "rewards/margins": 0.02384665608406067, "rewards/rejected": -0.10866224765777588, "step": 2670 }, { "epoch": 0.7524084100977427, "grad_norm": 1.140625, "learning_rate": 8.768933359970924e-07, "log_odds_chosen": 0.514217734336853, "log_odds_ratio": -0.5560160279273987, "logits/chosen": 0.12720641493797302, "logits/rejected": -0.23077580332756042, "logps/chosen": -0.987766444683075, "logps/rejected": -1.3353379964828491, "loss": 1.3173, "nll_loss": 1.2551425695419312, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09877664595842361, "rewards/margins": 0.03475714102387428, "rewards/rejected": -0.1335338056087494, "step": 2675 }, { "epoch": 0.7538147809577386, "grad_norm": 0.6875, "learning_rate": 8.675762857607256e-07, "log_odds_chosen": 0.19895534217357635, "log_odds_ratio": -0.7668569684028625, "logits/chosen": 0.3156585395336151, "logits/rejected": -0.07820574939250946, "logps/chosen": -0.902428150177002, "logps/rejected": -1.12700355052948, "loss": 1.2807, "nll_loss": 1.1560865640640259, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09024281799793243, "rewards/margins": 0.02245754934847355, "rewards/rejected": -0.11270036548376083, "step": 2680 }, { "epoch": 0.7552211518177343, "grad_norm": 0.4765625, "learning_rate": 8.582985944555488e-07, "log_odds_chosen": 0.5820831656455994, "log_odds_ratio": -0.5177468061447144, "logits/chosen": 0.30657750368118286, "logits/rejected": -0.03154323250055313, "logps/chosen": -0.8904935121536255, "logps/rejected": -1.2868787050247192, "loss": 1.2957, "nll_loss": 1.1450371742248535, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08904936164617538, "rewards/margins": 0.03963851183652878, "rewards/rejected": -0.12868787348270416, "step": 2685 }, { "epoch": 0.7566275226777301, "grad_norm": 0.68359375, "learning_rate": 8.49060485773493e-07, "log_odds_chosen": 0.780549943447113, "log_odds_ratio": -0.49319782853126526, "logits/chosen": 0.043089210987091064, "logits/rejected": -0.19765236973762512, "logps/chosen": -0.9323430061340332, "logps/rejected": -1.4097062349319458, "loss": 1.3579, "nll_loss": 1.2788646221160889, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09323429316282272, "rewards/margins": 0.047736309468746185, "rewards/rejected": -0.1409706175327301, "step": 2690 }, { "epoch": 0.7580338935377259, "grad_norm": 0.462890625, "learning_rate": 8.39862182452123e-07, "log_odds_chosen": 0.36220189929008484, "log_odds_ratio": -0.6064194440841675, "logits/chosen": 0.006648133508861065, "logits/rejected": 0.20794352889060974, "logps/chosen": -0.9459856748580933, "logps/rejected": -1.1608078479766846, "loss": 1.3569, "nll_loss": 1.14859938621521, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09459856897592545, "rewards/margins": 0.021482214331626892, "rewards/rejected": -0.11608078330755234, "step": 2695 }, { "epoch": 0.7594402643977217, "grad_norm": 0.3984375, "learning_rate": 8.307039062692682e-07, "log_odds_chosen": 0.06786508858203888, "log_odds_ratio": -0.7138899564743042, "logits/chosen": 0.28591951727867126, "logits/rejected": 0.158804252743721, "logps/chosen": -0.9865355491638184, "logps/rejected": -1.0179736614227295, "loss": 1.358, "nll_loss": 1.275681972503662, "rewards/accuracies": 0.5, "rewards/chosen": -0.09865355491638184, "rewards/margins": 0.003143805544823408, "rewards/rejected": -0.10179736465215683, "step": 2700 }, { "epoch": 0.7608466352577175, "grad_norm": 0.77734375, "learning_rate": 8.215858780376732e-07, "log_odds_chosen": 0.16411906480789185, "log_odds_ratio": -0.6866927146911621, "logits/chosen": 0.16503454744815826, "logits/rejected": 0.040900081396102905, "logps/chosen": -1.0014139413833618, "logps/rejected": -1.0946948528289795, "loss": 1.3739, "nll_loss": 1.2816741466522217, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10014139115810394, "rewards/margins": 0.009328092448413372, "rewards/rejected": -0.10946948826313019, "step": 2705 }, { "epoch": 0.7622530061177133, "grad_norm": 0.5859375, "learning_rate": 8.125083175996776e-07, "log_odds_chosen": 0.25532618165016174, "log_odds_ratio": -0.7476253509521484, "logits/chosen": -0.02141258493065834, "logits/rejected": -0.10258176177740097, "logps/chosen": -0.931313693523407, "logps/rejected": -1.1693487167358398, "loss": 1.2248, "nll_loss": 1.2920305728912354, "rewards/accuracies": 0.5, "rewards/chosen": -0.09313137829303741, "rewards/margins": 0.023803498595952988, "rewards/rejected": -0.1169348731637001, "step": 2710 }, { "epoch": 0.763659376977709, "grad_norm": 1.015625, "learning_rate": 8.034714438219101e-07, "log_odds_chosen": 0.7236061692237854, "log_odds_ratio": -0.5326360464096069, "logits/chosen": 0.06193612143397331, "logits/rejected": -0.04839233309030533, "logps/chosen": -0.89410001039505, "logps/rejected": -1.2422478199005127, "loss": 1.3183, "nll_loss": 1.3468520641326904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08941000699996948, "rewards/margins": 0.034814778715372086, "rewards/rejected": -0.12422479689121246, "step": 2715 }, { "epoch": 0.7650657478377048, "grad_norm": 0.435546875, "learning_rate": 7.944754745900158e-07, "log_odds_chosen": 0.3403502404689789, "log_odds_ratio": -0.7160285711288452, "logits/chosen": 0.03555140644311905, "logits/rejected": -0.06346184015274048, "logps/chosen": -0.917829692363739, "logps/rejected": -1.2131367921829224, "loss": 1.3901, "nll_loss": 1.3876527547836304, "rewards/accuracies": 0.5, "rewards/chosen": -0.09178297221660614, "rewards/margins": 0.029530709609389305, "rewards/rejected": -0.12131367623806, "step": 2720 }, { "epoch": 0.7664721186977006, "grad_norm": 0.5546875, "learning_rate": 7.855206268034e-07, "log_odds_chosen": 0.3166981041431427, "log_odds_ratio": -0.6263189911842346, "logits/chosen": 0.32282960414886475, "logits/rejected": 0.1895475536584854, "logps/chosen": -0.9686748385429382, "logps/rejected": -1.2145028114318848, "loss": 1.3271, "nll_loss": 1.1234979629516602, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09686748683452606, "rewards/margins": 0.02458280883729458, "rewards/rejected": -0.12145029008388519, "step": 2725 }, { "epoch": 0.7678784895576963, "grad_norm": 0.9765625, "learning_rate": 7.766071163700004e-07, "log_odds_chosen": 0.24529030919075012, "log_odds_ratio": -0.6629317402839661, "logits/chosen": 0.00040346087189391255, "logits/rejected": -0.030096957460045815, "logps/chosen": -0.9108067750930786, "logps/rejected": -1.0234143733978271, "loss": 1.3121, "nll_loss": 1.280273199081421, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0910806730389595, "rewards/margins": 0.011260779574513435, "rewards/rejected": -0.10234145075082779, "step": 2730 }, { "epoch": 0.7692848604176922, "grad_norm": 0.7890625, "learning_rate": 7.677351582010811e-07, "log_odds_chosen": 0.5884172320365906, "log_odds_ratio": -0.5643038749694824, "logits/chosen": -0.007936659269034863, "logits/rejected": 0.024150729179382324, "logps/chosen": -0.8941663503646851, "logps/rejected": -1.3198270797729492, "loss": 1.2896, "nll_loss": 1.2319035530090332, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08941663801670074, "rewards/margins": 0.042566053569316864, "rewards/rejected": -0.1319826990365982, "step": 2735 }, { "epoch": 0.7706912312776879, "grad_norm": 0.40625, "learning_rate": 7.589049662060518e-07, "log_odds_chosen": 0.40013399720191956, "log_odds_ratio": -0.5915107131004333, "logits/chosen": 0.39384156465530396, "logits/rejected": 0.017804330214858055, "logps/chosen": -0.9199845194816589, "logps/rejected": -1.1359189748764038, "loss": 1.3512, "nll_loss": 1.1475433111190796, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09199845045804977, "rewards/margins": 0.02159346267580986, "rewards/rejected": -0.11359190940856934, "step": 2740 }, { "epoch": 0.7720976021376837, "grad_norm": 0.73828125, "learning_rate": 7.501167532873075e-07, "log_odds_chosen": 0.07918643206357956, "log_odds_ratio": -0.7816256284713745, "logits/chosen": 0.016884874552488327, "logits/rejected": 0.1366664171218872, "logps/chosen": -1.0064961910247803, "logps/rejected": -0.9999880790710449, "loss": 1.2442, "nll_loss": 1.2048060894012451, "rewards/accuracies": 0.75, "rewards/chosen": -0.1006496325135231, "rewards/margins": -0.0006508378428407013, "rewards/rejected": -0.09999879449605942, "step": 2745 }, { "epoch": 0.7735039729976795, "grad_norm": 0.58203125, "learning_rate": 7.413707313350965e-07, "log_odds_chosen": 0.47310739755630493, "log_odds_ratio": -0.6083270907402039, "logits/chosen": 0.1051364317536354, "logits/rejected": -0.04713956639170647, "logps/chosen": -0.9754480123519897, "logps/rejected": -1.2068026065826416, "loss": 1.3705, "nll_loss": 1.2637088298797607, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09754480421543121, "rewards/margins": 0.02313544601202011, "rewards/rejected": -0.12068025767803192, "step": 2750 }, { "epoch": 0.7749103438576753, "grad_norm": 0.5625, "learning_rate": 7.326671112224135e-07, "log_odds_chosen": 0.4301450848579407, "log_odds_ratio": -0.6069377660751343, "logits/chosen": 0.003236403688788414, "logits/rejected": 0.0592556893825531, "logps/chosen": -0.8992059826850891, "logps/rejected": -1.2177690267562866, "loss": 1.263, "nll_loss": 1.3265444040298462, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08992060273885727, "rewards/margins": 0.031856290996074677, "rewards/rejected": -0.12177689373493195, "step": 2755 }, { "epoch": 0.776316714717671, "grad_norm": 0.64453125, "learning_rate": 7.240061027999129e-07, "log_odds_chosen": -0.005230224225670099, "log_odds_ratio": -0.7652040123939514, "logits/chosen": 0.13336391746997833, "logits/rejected": 0.040630996227264404, "logps/chosen": -1.029039740562439, "logps/rejected": -0.9913640022277832, "loss": 1.3398, "nll_loss": 1.3457856178283691, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10290397703647614, "rewards/margins": -0.0037675693165510893, "rewards/rejected": -0.09913641214370728, "step": 2760 }, { "epoch": 0.7777230855776668, "grad_norm": 0.6328125, "learning_rate": 7.153879148908496e-07, "log_odds_chosen": 0.44602876901626587, "log_odds_ratio": -0.5923662185668945, "logits/chosen": 0.3108251988887787, "logits/rejected": -0.019554242491722107, "logps/chosen": -0.8534099459648132, "logps/rejected": -1.0774658918380737, "loss": 1.2261, "nll_loss": 1.1150697469711304, "rewards/accuracies": 0.75, "rewards/chosen": -0.08534099906682968, "rewards/margins": 0.022405600175261497, "rewards/rejected": -0.10774660110473633, "step": 2765 }, { "epoch": 0.7791294564376626, "grad_norm": 0.89453125, "learning_rate": 7.068127552860468e-07, "log_odds_chosen": 0.15509608387947083, "log_odds_ratio": -0.6707223057746887, "logits/chosen": 0.23539385199546814, "logits/rejected": 0.3137028217315674, "logps/chosen": -0.9319781064987183, "logps/rejected": -1.0136935710906982, "loss": 1.2378, "nll_loss": 1.2062842845916748, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09319781512022018, "rewards/margins": 0.00817155558615923, "rewards/rejected": -0.10136936604976654, "step": 2770 }, { "epoch": 0.7805358272976584, "grad_norm": 0.361328125, "learning_rate": 6.982808307388813e-07, "log_odds_chosen": 0.3228221535682678, "log_odds_ratio": -0.6157253384590149, "logits/chosen": 0.14408710598945618, "logits/rejected": -0.12160065025091171, "logps/chosen": -0.8962388038635254, "logps/rejected": -1.1015846729278564, "loss": 1.3435, "nll_loss": 1.1823790073394775, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08962388336658478, "rewards/margins": 0.020534580573439598, "rewards/rejected": -0.11015846580266953, "step": 2775 }, { "epoch": 0.7819421981576542, "grad_norm": 0.42578125, "learning_rate": 6.897923469603023e-07, "log_odds_chosen": 0.4124717116355896, "log_odds_ratio": -0.6217266321182251, "logits/chosen": 0.3927221894264221, "logits/rejected": -0.1088181883096695, "logps/chosen": -0.9175729751586914, "logps/rejected": -1.187263011932373, "loss": 1.2468, "nll_loss": 1.0678209066390991, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09175729751586914, "rewards/margins": 0.026969006285071373, "rewards/rejected": -0.11872629821300507, "step": 2780 }, { "epoch": 0.7833485690176499, "grad_norm": 1.015625, "learning_rate": 6.813475086138699e-07, "log_odds_chosen": 0.4346896708011627, "log_odds_ratio": -0.5942043662071228, "logits/chosen": 0.33520328998565674, "logits/rejected": 0.08043156564235687, "logps/chosen": -0.7721613645553589, "logps/rejected": -1.0392404794692993, "loss": 1.2665, "nll_loss": 1.1436734199523926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07721613347530365, "rewards/margins": 0.026707911863923073, "rewards/rejected": -0.10392403602600098, "step": 2785 }, { "epoch": 0.7847549398776458, "grad_norm": 0.37890625, "learning_rate": 6.729465193108195e-07, "log_odds_chosen": 0.487898051738739, "log_odds_ratio": -0.6025739908218384, "logits/chosen": 0.24001376330852509, "logits/rejected": -0.13463373482227325, "logps/chosen": -0.946478545665741, "logps/rejected": -1.3150343894958496, "loss": 1.3489, "nll_loss": 1.4121153354644775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09464786946773529, "rewards/margins": 0.03685557097196579, "rewards/rejected": -0.13150344789028168, "step": 2790 }, { "epoch": 0.7861613107376415, "grad_norm": 1.2109375, "learning_rate": 6.645895816051576e-07, "log_odds_chosen": 0.06646132469177246, "log_odds_ratio": -0.7336984276771545, "logits/chosen": 0.13854220509529114, "logits/rejected": 0.14925724267959595, "logps/chosen": -0.866500973701477, "logps/rejected": -0.8997198343276978, "loss": 1.3252, "nll_loss": 1.2038604021072388, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08665008842945099, "rewards/margins": 0.0033218897879123688, "rewards/rejected": -0.08997198194265366, "step": 2795 }, { "epoch": 0.7875676815976373, "grad_norm": 0.5390625, "learning_rate": 6.562768969887706e-07, "log_odds_chosen": -0.08361145108938217, "log_odds_ratio": -0.8131535649299622, "logits/chosen": 0.00587810855358839, "logits/rejected": -0.00947192870080471, "logps/chosen": -1.0220993757247925, "logps/rejected": -0.9503192901611328, "loss": 1.3987, "nll_loss": 1.5176174640655518, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10220994055271149, "rewards/margins": -0.00717801321297884, "rewards/rejected": -0.09503192454576492, "step": 2800 }, { "epoch": 0.7889740524576331, "grad_norm": 0.48828125, "learning_rate": 6.480086658865722e-07, "log_odds_chosen": 0.20292186737060547, "log_odds_ratio": -0.7245144248008728, "logits/chosen": 0.12096218764781952, "logits/rejected": -0.17297106981277466, "logps/chosen": -1.0369324684143066, "logps/rejected": -1.181063175201416, "loss": 1.3157, "nll_loss": 1.3815839290618896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10369324684143066, "rewards/margins": 0.014413063414394855, "rewards/rejected": -0.11810630559921265, "step": 2805 }, { "epoch": 0.7903804233176288, "grad_norm": 0.65234375, "learning_rate": 6.397850876516698e-07, "log_odds_chosen": 0.13107821345329285, "log_odds_ratio": -0.8021856546401978, "logits/chosen": -0.062346749007701874, "logits/rejected": 0.16534826159477234, "logps/chosen": -1.063522219657898, "logps/rejected": -1.1438487768173218, "loss": 1.275, "nll_loss": 1.1316450834274292, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10635224729776382, "rewards/margins": 0.008032634854316711, "rewards/rejected": -0.11438487470149994, "step": 2810 }, { "epoch": 0.7917867941776247, "grad_norm": 0.69140625, "learning_rate": 6.316063605605569e-07, "log_odds_chosen": 0.1849391758441925, "log_odds_ratio": -0.7484263181686401, "logits/chosen": -0.05717191845178604, "logits/rejected": 0.0552591010928154, "logps/chosen": -1.103704810142517, "logps/rejected": -1.2362749576568604, "loss": 1.3111, "nll_loss": 1.2431524991989136, "rewards/accuracies": 0.5, "rewards/chosen": -0.11037049442529678, "rewards/margins": 0.013256999664008617, "rewards/rejected": -0.12362749874591827, "step": 2815 }, { "epoch": 0.7931931650376204, "grad_norm": 0.412109375, "learning_rate": 6.234726818083323e-07, "log_odds_chosen": 0.5808418989181519, "log_odds_ratio": -0.539539635181427, "logits/chosen": 0.15562665462493896, "logits/rejected": 0.16735698282718658, "logps/chosen": -0.7677489519119263, "logps/rejected": -1.1099231243133545, "loss": 1.355, "nll_loss": 1.0738924741744995, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07677489519119263, "rewards/margins": 0.034217409789562225, "rewards/rejected": -0.11099229753017426, "step": 2820 }, { "epoch": 0.7945995358976162, "grad_norm": 1.109375, "learning_rate": 6.153842475039468e-07, "log_odds_chosen": 0.5155315399169922, "log_odds_ratio": -0.5820074081420898, "logits/chosen": 0.03340931981801987, "logits/rejected": -0.009877646341919899, "logps/chosen": -0.8543240427970886, "logps/rejected": -1.2141969203948975, "loss": 1.3416, "nll_loss": 1.3335164785385132, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08543241024017334, "rewards/margins": 0.03598729148507118, "rewards/rejected": -0.12141970545053482, "step": 2825 }, { "epoch": 0.796005906757612, "grad_norm": 0.55859375, "learning_rate": 6.073412526654762e-07, "log_odds_chosen": 0.3356287181377411, "log_odds_ratio": -0.6290286779403687, "logits/chosen": 0.1556534469127655, "logits/rejected": -0.012762689962983131, "logps/chosen": -0.9366539716720581, "logps/rejected": -1.118251085281372, "loss": 1.3256, "nll_loss": 1.3431589603424072, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09366540610790253, "rewards/margins": 0.018159715458750725, "rewards/rejected": -0.11182510852813721, "step": 2830 }, { "epoch": 0.7974122776176078, "grad_norm": 1.171875, "learning_rate": 5.993438912154148e-07, "log_odds_chosen": 0.603886067867279, "log_odds_ratio": -0.5090949535369873, "logits/chosen": 0.09760837256908417, "logits/rejected": -0.09581668674945831, "logps/chosen": -0.8826834559440613, "logps/rejected": -1.224760890007019, "loss": 1.2492, "nll_loss": 1.2926743030548096, "rewards/accuracies": 0.75, "rewards/chosen": -0.08826833963394165, "rewards/margins": 0.034207746386528015, "rewards/rejected": -0.12247607856988907, "step": 2835 }, { "epoch": 0.7988186484776035, "grad_norm": 0.671875, "learning_rate": 5.913923559760043e-07, "log_odds_chosen": 0.42496395111083984, "log_odds_ratio": -0.6716245412826538, "logits/chosen": -0.25178101658821106, "logits/rejected": 0.042255472391843796, "logps/chosen": -0.9133031964302063, "logps/rejected": -1.154044270515442, "loss": 1.3642, "nll_loss": 1.3009847402572632, "rewards/accuracies": 0.5, "rewards/chosen": -0.09133031964302063, "rewards/margins": 0.02407410368323326, "rewards/rejected": -0.11540442705154419, "step": 2840 }, { "epoch": 0.8002250193375994, "grad_norm": 0.640625, "learning_rate": 5.83486838664582e-07, "log_odds_chosen": 0.010806980542838573, "log_odds_ratio": -0.7399574518203735, "logits/chosen": 0.28659483790397644, "logits/rejected": 0.009598970413208008, "logps/chosen": -1.0342603921890259, "logps/rejected": -1.0249049663543701, "loss": 1.336, "nll_loss": 1.3047925233840942, "rewards/accuracies": 0.5, "rewards/chosen": -0.10342603921890259, "rewards/margins": -0.0009355418151244521, "rewards/rejected": -0.10249048471450806, "step": 2845 }, { "epoch": 0.8016313901975951, "grad_norm": 0.6328125, "learning_rate": 5.756275298889588e-07, "log_odds_chosen": 0.41581591963768005, "log_odds_ratio": -0.5534268021583557, "logits/chosen": 0.18089225888252258, "logits/rejected": 0.21019300818443298, "logps/chosen": -0.8669838905334473, "logps/rejected": -1.126821517944336, "loss": 1.2032, "nll_loss": 1.023749589920044, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08669839799404144, "rewards/margins": 0.025983760133385658, "rewards/rejected": -0.11268214881420135, "step": 2850 }, { "epoch": 0.8030377610575908, "grad_norm": 1.03125, "learning_rate": 5.678146191428266e-07, "log_odds_chosen": 0.3920010030269623, "log_odds_ratio": -0.8133651614189148, "logits/chosen": -0.07348822802305222, "logits/rejected": -0.08784066885709763, "logps/chosen": -1.0480382442474365, "logps/rejected": -1.3849751949310303, "loss": 1.2955, "nll_loss": 1.2348756790161133, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10480382293462753, "rewards/margins": 0.03369368985295296, "rewards/rejected": -0.1384975016117096, "step": 2855 }, { "epoch": 0.8044441319175867, "grad_norm": 0.80859375, "learning_rate": 5.600482948011835e-07, "log_odds_chosen": 0.6736562848091125, "log_odds_ratio": -0.5489833354949951, "logits/chosen": 0.32034921646118164, "logits/rejected": 0.19463083148002625, "logps/chosen": -0.7580772638320923, "logps/rejected": -1.1340504884719849, "loss": 1.2874, "nll_loss": 1.0859034061431885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07580773532390594, "rewards/margins": 0.03759731352329254, "rewards/rejected": -0.11340503394603729, "step": 2860 }, { "epoch": 0.8058505027775824, "grad_norm": 0.8515625, "learning_rate": 5.523287441157973e-07, "log_odds_chosen": 0.33141160011291504, "log_odds_ratio": -0.6283584237098694, "logits/chosen": 0.26048731803894043, "logits/rejected": 0.09480077028274536, "logps/chosen": -0.8867843747138977, "logps/rejected": -1.0908777713775635, "loss": 1.2885, "nll_loss": 1.1591079235076904, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08867844939231873, "rewards/margins": 0.020409341901540756, "rewards/rejected": -0.10908778756856918, "step": 2865 }, { "epoch": 0.8072568736375783, "grad_norm": 0.91796875, "learning_rate": 5.446561532106878e-07, "log_odds_chosen": 0.5373131632804871, "log_odds_ratio": -0.5314943790435791, "logits/chosen": 0.07900340110063553, "logits/rejected": -0.17070356011390686, "logps/chosen": -0.8341196775436401, "logps/rejected": -1.1634811162948608, "loss": 1.3394, "nll_loss": 1.2959939241409302, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08341196924448013, "rewards/margins": 0.03293614834547043, "rewards/rejected": -0.11634812504053116, "step": 2870 }, { "epoch": 0.808663244497574, "grad_norm": 0.82421875, "learning_rate": 5.370307070776398e-07, "log_odds_chosen": 0.28469741344451904, "log_odds_ratio": -0.6910279393196106, "logits/chosen": 0.06598956882953644, "logits/rejected": 0.07577238976955414, "logps/chosen": -0.8683170080184937, "logps/rejected": -1.0559046268463135, "loss": 1.3146, "nll_loss": 1.086538553237915, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08683168888092041, "rewards/margins": 0.01875876821577549, "rewards/rejected": -0.10559046268463135, "step": 2875 }, { "epoch": 0.8100696153575698, "grad_norm": 1.1171875, "learning_rate": 5.294525895717447e-07, "log_odds_chosen": 0.15487167239189148, "log_odds_ratio": -0.7134450674057007, "logits/chosen": 0.09006836265325546, "logits/rejected": -0.11367156356573105, "logps/chosen": -0.9337458610534668, "logps/rejected": -1.0194793939590454, "loss": 1.2439, "nll_loss": 1.2626831531524658, "rewards/accuracies": 0.5, "rewards/chosen": -0.0933745726943016, "rewards/margins": 0.00857335515320301, "rewards/rejected": -0.10194794088602066, "step": 2880 }, { "epoch": 0.8114759862175656, "grad_norm": 0.59375, "learning_rate": 5.219219834069655e-07, "log_odds_chosen": -0.030949676409363747, "log_odds_ratio": -0.8828393816947937, "logits/chosen": 0.17199784517288208, "logits/rejected": -0.15780949592590332, "logps/chosen": -0.9767589569091797, "logps/rejected": -1.0622228384017944, "loss": 1.3058, "nll_loss": 1.3363220691680908, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09767589718103409, "rewards/margins": 0.008546384051442146, "rewards/rejected": -0.10622228682041168, "step": 2885 }, { "epoch": 0.8128823570775614, "grad_norm": 0.7734375, "learning_rate": 5.14439070151731e-07, "log_odds_chosen": 0.008938372135162354, "log_odds_ratio": -0.7494646310806274, "logits/chosen": 0.08280684053897858, "logits/rejected": 0.01193913072347641, "logps/chosen": -0.9013395309448242, "logps/rejected": -0.8923047780990601, "loss": 1.3378, "nll_loss": 1.3657349348068237, "rewards/accuracies": 0.5, "rewards/chosen": -0.09013396501541138, "rewards/margins": -0.0009034700924530625, "rewards/rejected": -0.0892305001616478, "step": 2890 }, { "epoch": 0.8142887279375571, "grad_norm": 0.80078125, "learning_rate": 5.070040302245596e-07, "log_odds_chosen": 0.14327339828014374, "log_odds_ratio": -0.7465823292732239, "logits/chosen": 0.07318969070911407, "logits/rejected": -0.08628226816654205, "logps/chosen": -1.0324245691299438, "logps/rejected": -1.1777406930923462, "loss": 1.2994, "nll_loss": 1.3385908603668213, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10324247181415558, "rewards/margins": 0.014531604945659637, "rewards/rejected": -0.11777406930923462, "step": 2895 }, { "epoch": 0.8156950987975529, "grad_norm": 0.94140625, "learning_rate": 4.996170428897087e-07, "log_odds_chosen": 0.1323220580816269, "log_odds_ratio": -0.6626992225646973, "logits/chosen": 0.17111194133758545, "logits/rejected": 0.08679263293743134, "logps/chosen": -0.9184327125549316, "logps/rejected": -0.9902726411819458, "loss": 1.3859, "nll_loss": 1.392698049545288, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09184327721595764, "rewards/margins": 0.0071840002201497555, "rewards/rejected": -0.09902726113796234, "step": 2900 }, { "epoch": 0.8171014696575487, "grad_norm": 0.4296875, "learning_rate": 4.922782862528525e-07, "log_odds_chosen": 0.7320371866226196, "log_odds_ratio": -0.5086044669151306, "logits/chosen": -0.006654873490333557, "logits/rejected": -0.1328873336315155, "logps/chosen": -0.8062270283699036, "logps/rejected": -1.2401305437088013, "loss": 1.3848, "nll_loss": 1.377241611480713, "rewards/accuracies": 0.75, "rewards/chosen": -0.08062271773815155, "rewards/margins": 0.043390341103076935, "rewards/rejected": -0.12401305139064789, "step": 2905 }, { "epoch": 0.8185078405175444, "grad_norm": 0.435546875, "learning_rate": 4.84987937256787e-07, "log_odds_chosen": 0.31368353962898254, "log_odds_ratio": -0.6530981659889221, "logits/chosen": 0.16416539251804352, "logits/rejected": -0.05000491812825203, "logps/chosen": -0.8580842018127441, "logps/rejected": -1.0716028213500977, "loss": 1.2962, "nll_loss": 1.2167668342590332, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0858084186911583, "rewards/margins": 0.021351851522922516, "rewards/rejected": -0.10716027021408081, "step": 2910 }, { "epoch": 0.8199142113775403, "grad_norm": 0.875, "learning_rate": 4.777461716771664e-07, "log_odds_chosen": -0.14726386964321136, "log_odds_ratio": -0.8374557495117188, "logits/chosen": 0.23301705718040466, "logits/rejected": -0.11433436721563339, "logps/chosen": -1.1605294942855835, "logps/rejected": -1.0649001598358154, "loss": 1.3267, "nll_loss": 1.2344844341278076, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.11605296283960342, "rewards/margins": -0.00956294871866703, "rewards/rejected": -0.10649001598358154, "step": 2915 }, { "epoch": 0.821320582237536, "grad_norm": 0.92578125, "learning_rate": 4.7055316411826203e-07, "log_odds_chosen": 0.27676817774772644, "log_odds_ratio": -0.6456387042999268, "logits/chosen": 0.2127613127231598, "logits/rejected": 0.0729435533285141, "logps/chosen": -0.904322624206543, "logps/rejected": -1.0903629064559937, "loss": 1.3024, "nll_loss": 1.3120503425598145, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09043227136135101, "rewards/margins": 0.018604028970003128, "rewards/rejected": -0.10903630405664444, "step": 2920 }, { "epoch": 0.8227269530975319, "grad_norm": 0.8828125, "learning_rate": 4.63409088008753e-07, "log_odds_chosen": 0.023978684097528458, "log_odds_ratio": -0.8245126605033875, "logits/chosen": 0.14065992832183838, "logits/rejected": 0.173757404088974, "logps/chosen": -1.055584192276001, "logps/rejected": -1.0905482769012451, "loss": 1.3143, "nll_loss": 1.3115730285644531, "rewards/accuracies": 0.5, "rewards/chosen": -0.10555841028690338, "rewards/margins": 0.0034964128863066435, "rewards/rejected": -0.10905482620000839, "step": 2925 }, { "epoch": 0.8241333239575276, "grad_norm": 0.44921875, "learning_rate": 4.5631411559754615e-07, "log_odds_chosen": 0.58967524766922, "log_odds_ratio": -0.5444141626358032, "logits/chosen": 0.05109834671020508, "logits/rejected": -0.01770491525530815, "logps/chosen": -0.8372839093208313, "logps/rejected": -1.223162293434143, "loss": 1.2745, "nll_loss": 1.2411905527114868, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08372838795185089, "rewards/margins": 0.03858783096075058, "rewards/rejected": -0.12231622636318207, "step": 2930 }, { "epoch": 0.8255396948175234, "grad_norm": 0.388671875, "learning_rate": 4.4926841794962153e-07, "log_odds_chosen": 0.2589498460292816, "log_odds_ratio": -0.6366890668869019, "logits/chosen": 0.11284098774194717, "logits/rejected": -0.06327764689922333, "logps/chosen": -0.9735992550849915, "logps/rejected": -1.0729389190673828, "loss": 1.2247, "nll_loss": 1.3420193195343018, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09735993295907974, "rewards/margins": 0.00993395783007145, "rewards/rejected": -0.10729388892650604, "step": 2935 }, { "epoch": 0.8269460656775192, "grad_norm": 0.3515625, "learning_rate": 4.422721649419104e-07, "log_odds_chosen": 0.5509729981422424, "log_odds_ratio": -0.5443827509880066, "logits/chosen": 0.19436611235141754, "logits/rejected": -0.043565236032009125, "logps/chosen": -0.7678650617599487, "logps/rejected": -1.031546950340271, "loss": 1.187, "nll_loss": 1.1322847604751587, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07678651064634323, "rewards/margins": 0.026368189603090286, "rewards/rejected": -0.10315469652414322, "step": 2940 }, { "epoch": 0.8283524365375149, "grad_norm": 2.3125, "learning_rate": 4.3532552525919535e-07, "log_odds_chosen": 0.3315231204032898, "log_odds_ratio": -0.6372717618942261, "logits/chosen": 0.028471380472183228, "logits/rejected": 0.030725056305527687, "logps/chosen": -0.8331094980239868, "logps/rejected": -1.1218417882919312, "loss": 1.3841, "nll_loss": 1.3554751873016357, "rewards/accuracies": 0.75, "rewards/chosen": -0.08331094682216644, "rewards/margins": 0.028873246163129807, "rewards/rejected": -0.11218418926000595, "step": 2945 }, { "epoch": 0.8297588073975107, "grad_norm": 0.87109375, "learning_rate": 4.2842866639004555e-07, "log_odds_chosen": 0.61634761095047, "log_odds_ratio": -0.5611122250556946, "logits/chosen": 0.13515019416809082, "logits/rejected": 0.17823290824890137, "logps/chosen": -0.7703655362129211, "logps/rejected": -1.0910954475402832, "loss": 1.326, "nll_loss": 1.0025686025619507, "rewards/accuracies": 0.75, "rewards/chosen": -0.07703655958175659, "rewards/margins": 0.03207298368215561, "rewards/rejected": -0.1091095358133316, "step": 2950 }, { "epoch": 0.8311651782575065, "grad_norm": 0.443359375, "learning_rate": 4.2158175462278045e-07, "log_odds_chosen": 0.32084041833877563, "log_odds_ratio": -0.6876135468482971, "logits/chosen": 0.30791932344436646, "logits/rejected": 0.02039993740618229, "logps/chosen": -0.9735520482063293, "logps/rejected": -1.1728037595748901, "loss": 1.3285, "nll_loss": 1.2136799097061157, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09735521674156189, "rewards/margins": 0.019925158470869064, "rewards/rejected": -0.11728037893772125, "step": 2955 }, { "epoch": 0.8325715491175023, "grad_norm": 0.455078125, "learning_rate": 4.147849550414562e-07, "log_odds_chosen": 0.41564807295799255, "log_odds_ratio": -0.6257321834564209, "logits/chosen": 0.05283625051379204, "logits/rejected": -0.07132598012685776, "logps/chosen": -0.9385086894035339, "logps/rejected": -1.1858062744140625, "loss": 1.2499, "nll_loss": 1.150863528251648, "rewards/accuracies": 0.5, "rewards/chosen": -0.09385088086128235, "rewards/margins": 0.02472977340221405, "rewards/rejected": -0.1185806393623352, "step": 2960 }, { "epoch": 0.833977919977498, "grad_norm": 0.5234375, "learning_rate": 4.0803843152188714e-07, "log_odds_chosen": -0.022001957520842552, "log_odds_ratio": -0.8182210922241211, "logits/chosen": 0.0680101290345192, "logits/rejected": 0.07088492810726166, "logps/chosen": -0.9397522211074829, "logps/rejected": -0.9277949333190918, "loss": 1.3689, "nll_loss": 1.3800146579742432, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09397522360086441, "rewards/margins": -0.0011957198148593307, "rewards/rejected": -0.0927794948220253, "step": 2965 }, { "epoch": 0.8353842908374939, "grad_norm": 0.6328125, "learning_rate": 4.0134234672769723e-07, "log_odds_chosen": 0.5197348594665527, "log_odds_ratio": -0.5607367157936096, "logits/chosen": 0.03629279136657715, "logits/rejected": -0.05779438465833664, "logps/chosen": -0.9035292863845825, "logps/rejected": -1.2461154460906982, "loss": 1.2671, "nll_loss": 1.3226429224014282, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09035293012857437, "rewards/margins": 0.03425862640142441, "rewards/rejected": -0.12461157143115997, "step": 2970 }, { "epoch": 0.8367906616974896, "grad_norm": 0.392578125, "learning_rate": 3.94696862106394e-07, "log_odds_chosen": 0.6360191106796265, "log_odds_ratio": -0.6056066751480103, "logits/chosen": -0.09138171374797821, "logits/rejected": -0.028163975104689598, "logps/chosen": -0.8779658079147339, "logps/rejected": -1.2462159395217896, "loss": 1.2585, "nll_loss": 1.3103601932525635, "rewards/accuracies": 0.75, "rewards/chosen": -0.08779658377170563, "rewards/margins": 0.03682499751448631, "rewards/rejected": -0.12462159246206284, "step": 2975 }, { "epoch": 0.8381970325574855, "grad_norm": 0.84765625, "learning_rate": 3.8810213788547825e-07, "log_odds_chosen": 0.008012396283447742, "log_odds_ratio": -0.8510234951972961, "logits/chosen": -0.051430534571409225, "logits/rejected": 0.029810791835188866, "logps/chosen": -0.9297181963920593, "logps/rejected": -0.963575005531311, "loss": 1.3514, "nll_loss": 1.2485787868499756, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0929718166589737, "rewards/margins": 0.0033856802619993687, "rewards/rejected": -0.09635750204324722, "step": 2980 }, { "epoch": 0.8396034034174812, "grad_norm": 0.84375, "learning_rate": 3.8155833306858015e-07, "log_odds_chosen": 0.1383453756570816, "log_odds_ratio": -0.6804744005203247, "logits/chosen": 0.2983183264732361, "logits/rejected": 0.166207954287529, "logps/chosen": -0.9256361126899719, "logps/rejected": -1.0058834552764893, "loss": 1.3121, "nll_loss": 1.1450464725494385, "rewards/accuracies": 0.5, "rewards/chosen": -0.09256361424922943, "rewards/margins": 0.008024740032851696, "rewards/rejected": -0.1005883440375328, "step": 2985 }, { "epoch": 0.8410097742774769, "grad_norm": 0.60546875, "learning_rate": 3.7506560543162594e-07, "log_odds_chosen": 0.39446815848350525, "log_odds_ratio": -0.6375328302383423, "logits/chosen": 0.15619999170303345, "logits/rejected": 0.021760011091828346, "logps/chosen": -0.9194302558898926, "logps/rejected": -1.1816115379333496, "loss": 1.2654, "nll_loss": 1.205482006072998, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09194303303956985, "rewards/margins": 0.026218125596642494, "rewards/rejected": -0.1181611642241478, "step": 2990 }, { "epoch": 0.8424161451374728, "grad_norm": 0.48046875, "learning_rate": 3.6862411151903284e-07, "log_odds_chosen": 0.922314465045929, "log_odds_ratio": -0.4417967200279236, "logits/chosen": 0.20397333800792694, "logits/rejected": -0.004587986972182989, "logps/chosen": -0.7588263750076294, "logps/rejected": -1.3238674402236938, "loss": 1.2892, "nll_loss": 1.3261582851409912, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07588264346122742, "rewards/margins": 0.05650409311056137, "rewards/rejected": -0.1323867291212082, "step": 2995 }, { "epoch": 0.8438225159974685, "grad_norm": 1.9921875, "learning_rate": 3.6223400663993795e-07, "log_odds_chosen": 0.14348141849040985, "log_odds_ratio": -0.762873113155365, "logits/chosen": 0.18984070420265198, "logits/rejected": -0.2189832180738449, "logps/chosen": -0.8771038055419922, "logps/rejected": -0.961025059223175, "loss": 1.381, "nll_loss": 1.347921371459961, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08771036565303802, "rewards/margins": 0.008392124436795712, "rewards/rejected": -0.0961025133728981, "step": 3000 }, { "epoch": 0.8452288868574643, "grad_norm": 0.87109375, "learning_rate": 3.558954448644483e-07, "log_odds_chosen": -0.012045865878462791, "log_odds_ratio": -0.9381014704704285, "logits/chosen": 0.13290119171142578, "logits/rejected": 0.23536305129528046, "logps/chosen": -1.1245501041412354, "logps/rejected": -1.0270380973815918, "loss": 1.2123, "nll_loss": 1.0796576738357544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11245502531528473, "rewards/margins": -0.00975120346993208, "rewards/rejected": -0.10270382463932037, "step": 3005 }, { "epoch": 0.8466352577174601, "grad_norm": 0.85546875, "learning_rate": 3.496085790199305e-07, "log_odds_chosen": 0.24462373554706573, "log_odds_ratio": -0.7010733485221863, "logits/chosen": -0.04115645959973335, "logits/rejected": -0.05985347554087639, "logps/chosen": -0.9546895027160645, "logps/rejected": -1.1048619747161865, "loss": 1.3499, "nll_loss": 1.2850078344345093, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09546895325183868, "rewards/margins": 0.01501724123954773, "rewards/rejected": -0.11048620939254761, "step": 3010 }, { "epoch": 0.8480416285774559, "grad_norm": 1.15625, "learning_rate": 3.4337356068732397e-07, "log_odds_chosen": 0.24743500351905823, "log_odds_ratio": -0.712551474571228, "logits/chosen": 0.10973727703094482, "logits/rejected": 0.06765292584896088, "logps/chosen": -0.9948040843009949, "logps/rejected": -1.153732419013977, "loss": 1.368, "nll_loss": 1.1009012460708618, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09948041290044785, "rewards/margins": 0.015892835333943367, "rewards/rejected": -0.11537323892116547, "step": 3015 }, { "epoch": 0.8494479994374516, "grad_norm": 0.486328125, "learning_rate": 3.3719054019748564e-07, "log_odds_chosen": 0.004147529602050781, "log_odds_ratio": -0.8406862020492554, "logits/chosen": 0.1997382640838623, "logits/rejected": -0.02337918058037758, "logps/chosen": -1.0684696435928345, "logps/rejected": -1.0946805477142334, "loss": 1.3976, "nll_loss": 1.321757435798645, "rewards/accuracies": 0.5, "rewards/chosen": -0.10684695094823837, "rewards/margins": 0.002621088642627001, "rewards/rejected": -0.10946805775165558, "step": 3020 }, { "epoch": 0.8508543702974475, "grad_norm": 0.66015625, "learning_rate": 3.310596666275684e-07, "log_odds_chosen": 0.18196114897727966, "log_odds_ratio": -0.666560173034668, "logits/chosen": 0.10361174494028091, "logits/rejected": 0.15355415642261505, "logps/chosen": -0.878470778465271, "logps/rejected": -0.9664610028266907, "loss": 1.3655, "nll_loss": 1.1552141904830933, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08784707635641098, "rewards/margins": 0.008799021132290363, "rewards/rejected": -0.09664610028266907, "step": 3025 }, { "epoch": 0.8522607411574432, "grad_norm": 0.94921875, "learning_rate": 3.2498108779742436e-07, "log_odds_chosen": 0.3598330616950989, "log_odds_ratio": -0.6202287673950195, "logits/chosen": 0.20293493568897247, "logits/rejected": 0.06396631896495819, "logps/chosen": -0.9000099897384644, "logps/rejected": -1.089213252067566, "loss": 1.2846, "nll_loss": 1.192943811416626, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09000100940465927, "rewards/margins": 0.01892031356692314, "rewards/rejected": -0.10892131179571152, "step": 3030 }, { "epoch": 0.853667112017439, "grad_norm": 0.5703125, "learning_rate": 3.189549502660397e-07, "log_odds_chosen": 0.3495637774467468, "log_odds_ratio": -0.6821560859680176, "logits/chosen": 0.2623140215873718, "logits/rejected": -0.038874976336956024, "logps/chosen": -0.9177125096321106, "logps/rejected": -1.1296727657318115, "loss": 1.3407, "nll_loss": 1.101432204246521, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09177124500274658, "rewards/margins": 0.02119603380560875, "rewards/rejected": -0.11296728998422623, "step": 3035 }, { "epoch": 0.8550734828774348, "grad_norm": 0.5546875, "learning_rate": 3.1298139932800317e-07, "log_odds_chosen": 0.24225249886512756, "log_odds_ratio": -0.6946839094161987, "logits/chosen": -0.00331364874728024, "logits/rejected": 0.1221175566315651, "logps/chosen": -0.9066191911697388, "logps/rejected": -1.0242269039154053, "loss": 1.2658, "nll_loss": 1.0006282329559326, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0906619131565094, "rewards/margins": 0.0117607731372118, "rewards/rejected": -0.10242269188165665, "step": 3040 }, { "epoch": 0.8564798537374305, "grad_norm": 0.921875, "learning_rate": 3.070605790100026e-07, "log_odds_chosen": 0.47688302397727966, "log_odds_ratio": -0.5514553785324097, "logits/chosen": 0.3406530022621155, "logits/rejected": -0.20624065399169922, "logps/chosen": -0.7682023644447327, "logps/rejected": -1.119099497795105, "loss": 1.2605, "nll_loss": 1.0881941318511963, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07682023197412491, "rewards/margins": 0.03508970886468887, "rewards/rejected": -0.11190994828939438, "step": 3045 }, { "epoch": 0.8578862245974264, "grad_norm": 0.77734375, "learning_rate": 3.011926320673511e-07, "log_odds_chosen": 0.31140241026878357, "log_odds_ratio": -0.7348920106887817, "logits/chosen": -0.023136280477046967, "logits/rejected": -0.05698322132229805, "logps/chosen": -1.0393954515457153, "logps/rejected": -1.2733558416366577, "loss": 1.3406, "nll_loss": 1.4408925771713257, "rewards/accuracies": 0.5, "rewards/chosen": -0.10393954813480377, "rewards/margins": 0.02339603379368782, "rewards/rejected": -0.1273355782032013, "step": 3050 }, { "epoch": 0.8592925954574221, "grad_norm": 0.875, "learning_rate": 2.95377699980548e-07, "log_odds_chosen": 0.15080220997333527, "log_odds_ratio": -0.7218335270881653, "logits/chosen": 0.16377411782741547, "logits/rejected": -0.05593908950686455, "logps/chosen": -0.962912380695343, "logps/rejected": -1.0588743686676025, "loss": 1.2954, "nll_loss": 1.289074182510376, "rewards/accuracies": 0.5, "rewards/chosen": -0.09629125893115997, "rewards/margins": 0.009596194140613079, "rewards/rejected": -0.10588743537664413, "step": 3055 }, { "epoch": 0.8606989663174179, "grad_norm": 0.435546875, "learning_rate": 2.89615922951863e-07, "log_odds_chosen": 0.6246389746665955, "log_odds_ratio": -0.5178121328353882, "logits/chosen": 0.16498211026191711, "logits/rejected": -0.22444438934326172, "logps/chosen": -0.9330152273178101, "logps/rejected": -1.3666049242019653, "loss": 1.347, "nll_loss": 1.2327336072921753, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09330151975154877, "rewards/margins": 0.0433589443564415, "rewards/rejected": -0.13666047155857086, "step": 3060 }, { "epoch": 0.8621053371774137, "grad_norm": 0.5625, "learning_rate": 2.8390743990196063e-07, "log_odds_chosen": 0.21638545393943787, "log_odds_ratio": -0.7404422163963318, "logits/chosen": 0.15448644757270813, "logits/rejected": 0.014381295070052147, "logps/chosen": -0.9812231063842773, "logps/rejected": -1.1194281578063965, "loss": 1.2685, "nll_loss": 1.3578256368637085, "rewards/accuracies": 0.5, "rewards/chosen": -0.09812231361865997, "rewards/margins": 0.013820504769682884, "rewards/rejected": -0.1119428277015686, "step": 3065 }, { "epoch": 0.8635117080374095, "grad_norm": 0.703125, "learning_rate": 2.782523884665467e-07, "log_odds_chosen": 0.31919899582862854, "log_odds_ratio": -0.6073621511459351, "logits/chosen": 0.1252957582473755, "logits/rejected": -0.25018587708473206, "logps/chosen": -0.9057314991950989, "logps/rejected": -1.1693683862686157, "loss": 1.2821, "nll_loss": 1.341191053390503, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09057314693927765, "rewards/margins": 0.026363695040345192, "rewards/rejected": -0.11693684756755829, "step": 3070 }, { "epoch": 0.8649180788974052, "grad_norm": 0.99609375, "learning_rate": 2.7265090499305284e-07, "log_odds_chosen": 0.16994507610797882, "log_odds_ratio": -0.6835598349571228, "logits/chosen": 0.02478812076151371, "logits/rejected": -0.0021049350034445524, "logps/chosen": -1.0673532485961914, "logps/rejected": -1.141217589378357, "loss": 1.3122, "nll_loss": 1.3584511280059814, "rewards/accuracies": 0.5, "rewards/chosen": -0.10673532634973526, "rewards/margins": 0.007386439945548773, "rewards/rejected": -0.11412177234888077, "step": 3075 }, { "epoch": 0.866324449757401, "grad_norm": 0.6015625, "learning_rate": 2.671031245373476e-07, "log_odds_chosen": 0.5922014713287354, "log_odds_ratio": -0.6016316413879395, "logits/chosen": 0.3141447603702545, "logits/rejected": -0.19234515726566315, "logps/chosen": -0.805921733379364, "logps/rejected": -1.2572062015533447, "loss": 1.244, "nll_loss": 1.252669095993042, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08059217780828476, "rewards/margins": 0.045128434896469116, "rewards/rejected": -0.12572060525417328, "step": 3080 }, { "epoch": 0.8677308206173968, "grad_norm": 0.421875, "learning_rate": 2.6160918086048067e-07, "log_odds_chosen": 0.23566380143165588, "log_odds_ratio": -0.6851691603660583, "logits/chosen": 0.13757197558879852, "logits/rejected": 0.04290277883410454, "logps/chosen": -0.8842877149581909, "logps/rejected": -1.1051255464553833, "loss": 1.2529, "nll_loss": 1.2141902446746826, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08842878043651581, "rewards/margins": 0.022083774209022522, "rewards/rejected": -0.11051255464553833, "step": 3085 }, { "epoch": 0.8691371914773925, "grad_norm": 1.09375, "learning_rate": 2.56169206425457e-07, "log_odds_chosen": 0.6022747755050659, "log_odds_ratio": -0.5932275056838989, "logits/chosen": 0.2205590009689331, "logits/rejected": -0.08603726327419281, "logps/chosen": -0.8612334132194519, "logps/rejected": -1.2691211700439453, "loss": 1.2449, "nll_loss": 1.21316659450531, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08612334728240967, "rewards/margins": 0.04078877717256546, "rewards/rejected": -0.12691213190555573, "step": 3090 }, { "epoch": 0.8705435623373884, "grad_norm": 0.8359375, "learning_rate": 2.507833323940448e-07, "log_odds_chosen": 0.38121479749679565, "log_odds_ratio": -0.6170352697372437, "logits/chosen": 0.20064540207386017, "logits/rejected": -0.10161037743091583, "logps/chosen": -0.8439592123031616, "logps/rejected": -1.1321828365325928, "loss": 1.4369, "nll_loss": 1.206767201423645, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08439593017101288, "rewards/margins": 0.028822356835007668, "rewards/rejected": -0.1132182702422142, "step": 3095 }, { "epoch": 0.8719499331973841, "grad_norm": 0.38671875, "learning_rate": 2.454516886236102e-07, "log_odds_chosen": 0.39103588461875916, "log_odds_ratio": -0.6175183653831482, "logits/chosen": 0.1710575371980667, "logits/rejected": 0.1462509036064148, "logps/chosen": -0.964928150177002, "logps/rejected": -1.2527413368225098, "loss": 1.2979, "nll_loss": 1.2733262777328491, "rewards/accuracies": 0.75, "rewards/chosen": -0.09649281948804855, "rewards/margins": 0.02878131903707981, "rewards/rejected": -0.12527413666248322, "step": 3100 }, { "epoch": 0.87335630405738, "grad_norm": 0.94140625, "learning_rate": 2.4017440366398944e-07, "log_odds_chosen": 0.07706121355295181, "log_odds_ratio": -0.7358866930007935, "logits/chosen": 0.419283926486969, "logits/rejected": 0.1185811311006546, "logps/chosen": -0.9240352511405945, "logps/rejected": -0.995712161064148, "loss": 1.2982, "nll_loss": 1.1964889764785767, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09240352362394333, "rewards/margins": 0.00716769602149725, "rewards/rejected": -0.09957122057676315, "step": 3105 }, { "epoch": 0.8747626749173757, "grad_norm": 0.73046875, "learning_rate": 2.3495160475438678e-07, "log_odds_chosen": 0.012202749028801918, "log_odds_ratio": -0.751497745513916, "logits/chosen": 0.31101810932159424, "logits/rejected": 0.0647566169500351, "logps/chosen": -0.8119691014289856, "logps/rejected": -0.8204206228256226, "loss": 1.3133, "nll_loss": 1.2127889394760132, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08119690418243408, "rewards/margins": 0.0008451612666249275, "rewards/rejected": -0.08204207569360733, "step": 3110 }, { "epoch": 0.8761690457773715, "grad_norm": 0.4765625, "learning_rate": 2.2978341782030955e-07, "log_odds_chosen": 0.2562883496284485, "log_odds_ratio": -0.7287235260009766, "logits/chosen": 0.16910497844219208, "logits/rejected": 0.021375443786382675, "logps/chosen": -0.9910010099411011, "logps/rejected": -1.17304265499115, "loss": 1.2457, "nll_loss": 1.2075997591018677, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09910011291503906, "rewards/margins": 0.018204163759946823, "rewards/rejected": -0.11730428040027618, "step": 3115 }, { "epoch": 0.8775754166373673, "grad_norm": 0.4765625, "learning_rate": 2.246699674705291e-07, "log_odds_chosen": 0.4453654885292053, "log_odds_ratio": -0.5896596908569336, "logits/chosen": 0.07442586123943329, "logits/rejected": -0.11232948303222656, "logps/chosen": -0.8934370279312134, "logps/rejected": -1.1213937997817993, "loss": 1.342, "nll_loss": 1.2364587783813477, "rewards/accuracies": 0.75, "rewards/chosen": -0.08934369683265686, "rewards/margins": 0.02279568277299404, "rewards/rejected": -0.11213938146829605, "step": 3120 }, { "epoch": 0.8789817874973631, "grad_norm": 0.39453125, "learning_rate": 2.1961137699407774e-07, "log_odds_chosen": 0.18608251214027405, "log_odds_ratio": -0.6622592806816101, "logits/chosen": 0.13960817456245422, "logits/rejected": 0.06866041570901871, "logps/chosen": -1.1074713468551636, "logps/rejected": -1.2126071453094482, "loss": 1.2905, "nll_loss": 1.1634876728057861, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11074714362621307, "rewards/margins": 0.010513585060834885, "rewards/rejected": -0.12126071751117706, "step": 3125 }, { "epoch": 0.8803881583573588, "grad_norm": 0.412109375, "learning_rate": 2.1460776835727671e-07, "log_odds_chosen": 0.8946942090988159, "log_odds_ratio": -0.5820009708404541, "logits/chosen": 0.01775258220732212, "logits/rejected": -0.2365727424621582, "logps/chosen": -1.1353235244750977, "logps/rejected": -1.7781460285186768, "loss": 1.2756, "nll_loss": 1.1982827186584473, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11353236436843872, "rewards/margins": 0.06428224593400955, "rewards/rejected": -0.17781458795070648, "step": 3130 }, { "epoch": 0.8817945292173546, "grad_norm": 1.984375, "learning_rate": 2.0965926220079396e-07, "log_odds_chosen": 0.3438710570335388, "log_odds_ratio": -0.6113203763961792, "logits/chosen": 0.013415333814918995, "logits/rejected": 0.04428642615675926, "logps/chosen": -0.9512618184089661, "logps/rejected": -1.1797399520874023, "loss": 1.2454, "nll_loss": 1.2080955505371094, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09512616693973541, "rewards/margins": 0.02284781076014042, "rewards/rejected": -0.11797398328781128, "step": 3135 }, { "epoch": 0.8832009000773504, "grad_norm": 0.5625, "learning_rate": 2.0476597783673696e-07, "log_odds_chosen": -0.04732183367013931, "log_odds_ratio": -0.871155858039856, "logits/chosen": 0.39590129256248474, "logits/rejected": 0.1537986397743225, "logps/chosen": -0.9534363746643066, "logps/rejected": -0.9588222503662109, "loss": 1.288, "nll_loss": 1.1494090557098389, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09534362703561783, "rewards/margins": 0.000538596126716584, "rewards/rejected": -0.09588223695755005, "step": 3140 }, { "epoch": 0.8846072709373461, "grad_norm": 0.419921875, "learning_rate": 1.9992803324577537e-07, "log_odds_chosen": 0.14176274836063385, "log_odds_ratio": -0.7075371742248535, "logits/chosen": 0.1753927320241928, "logits/rejected": -0.047832123935222626, "logps/chosen": -1.0028040409088135, "logps/rejected": -1.0708872079849243, "loss": 1.3469, "nll_loss": 1.2555263042449951, "rewards/accuracies": 0.5, "rewards/chosen": -0.10028040409088135, "rewards/margins": 0.0068083046935498714, "rewards/rejected": -0.10708870738744736, "step": 3145 }, { "epoch": 0.886013641797342, "grad_norm": 0.45703125, "learning_rate": 1.951455450742959e-07, "log_odds_chosen": 0.05005607753992081, "log_odds_ratio": -0.7353087663650513, "logits/chosen": 0.0414562001824379, "logits/rejected": 0.09611912071704865, "logps/chosen": -1.035521388053894, "logps/rejected": -1.057826280593872, "loss": 1.3203, "nll_loss": 1.315623164176941, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10355212539434433, "rewards/margins": 0.002230483340099454, "rewards/rejected": -0.10578262805938721, "step": 3150 }, { "epoch": 0.8874200126573377, "grad_norm": 0.48828125, "learning_rate": 1.9041862863159065e-07, "log_odds_chosen": 0.23923692107200623, "log_odds_ratio": -0.6668912172317505, "logits/chosen": 0.08915407210588455, "logits/rejected": 0.14433476328849792, "logps/chosen": -0.9437106847763062, "logps/rejected": -1.0925174951553345, "loss": 1.2482, "nll_loss": 1.0153512954711914, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09437108039855957, "rewards/margins": 0.014880669303238392, "rewards/rejected": -0.10925173759460449, "step": 3155 }, { "epoch": 0.8888263835173336, "grad_norm": 0.458984375, "learning_rate": 1.8574739788707692e-07, "log_odds_chosen": 0.2607952952384949, "log_odds_ratio": -0.6574887037277222, "logits/chosen": 0.38875633478164673, "logits/rejected": 0.11233469098806381, "logps/chosen": -0.8368536829948425, "logps/rejected": -1.0057650804519653, "loss": 1.3697, "nll_loss": 1.295095682144165, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08368537575006485, "rewards/margins": 0.016891125589609146, "rewards/rejected": -0.1005764976143837, "step": 3160 }, { "epoch": 0.8902327543773293, "grad_norm": 0.43359375, "learning_rate": 1.811319654675478e-07, "log_odds_chosen": 0.48963475227355957, "log_odds_ratio": -0.558243989944458, "logits/chosen": 0.020070917904376984, "logits/rejected": 0.059196602553129196, "logps/chosen": -0.8479886054992676, "logps/rejected": -1.139087200164795, "loss": 1.2397, "nll_loss": 1.0761158466339111, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08479885756969452, "rewards/margins": 0.029109861701726913, "rewards/rejected": -0.11390872299671173, "step": 3165 }, { "epoch": 0.8916391252373251, "grad_norm": 1.1875, "learning_rate": 1.765724426544596e-07, "log_odds_chosen": -0.31798478960990906, "log_odds_ratio": -0.9315303564071655, "logits/chosen": 0.06348660588264465, "logits/rejected": 0.04159253090620041, "logps/chosen": -0.9252711534500122, "logps/rejected": -0.7397373914718628, "loss": 1.3367, "nll_loss": 1.2851605415344238, "rewards/accuracies": 0.25, "rewards/chosen": -0.0925271064043045, "rewards/margins": -0.018553372472524643, "rewards/rejected": -0.07397373020648956, "step": 3170 }, { "epoch": 0.8930454960973209, "grad_norm": 0.51953125, "learning_rate": 1.7206893938124664e-07, "log_odds_chosen": 0.289433091878891, "log_odds_ratio": -0.6993290185928345, "logits/chosen": 0.17868265509605408, "logits/rejected": 0.11774604022502899, "logps/chosen": -0.8691530227661133, "logps/rejected": -0.9393417239189148, "loss": 1.3159, "nll_loss": 1.2740023136138916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08691529929637909, "rewards/margins": 0.007018885109573603, "rewards/rejected": -0.09393417835235596, "step": 3175 }, { "epoch": 0.8944518669573166, "grad_norm": 1.0078125, "learning_rate": 1.6762156423067055e-07, "log_odds_chosen": 0.21066789329051971, "log_odds_ratio": -0.7149744629859924, "logits/chosen": 0.14750464260578156, "logits/rejected": -0.06367696821689606, "logps/chosen": -0.9500678777694702, "logps/rejected": -1.096605658531189, "loss": 1.2811, "nll_loss": 1.2395678758621216, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0950067862868309, "rewards/margins": 0.014653787016868591, "rewards/rejected": -0.1096605658531189, "step": 3180 }, { "epoch": 0.8958582378173124, "grad_norm": 0.578125, "learning_rate": 1.6323042443220276e-07, "log_odds_chosen": 0.392107218503952, "log_odds_ratio": -0.6191523671150208, "logits/chosen": 0.16187028586864471, "logits/rejected": 0.1439216136932373, "logps/chosen": -0.9571911096572876, "logps/rejected": -1.1570160388946533, "loss": 1.2599, "nll_loss": 1.3180067539215088, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0957191064953804, "rewards/margins": 0.019982485100626945, "rewards/rejected": -0.1157015934586525, "step": 3185 }, { "epoch": 0.8972646086773082, "grad_norm": 0.251953125, "learning_rate": 1.5889562585943958e-07, "log_odds_chosen": 0.6321003437042236, "log_odds_ratio": -0.5375659465789795, "logits/chosen": 0.22293278574943542, "logits/rejected": -0.12136626243591309, "logps/chosen": -0.9416702389717102, "logps/rejected": -1.3543380498886108, "loss": 1.2855, "nll_loss": 1.086060881614685, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09416702389717102, "rewards/margins": 0.041266776621341705, "rewards/rejected": -0.13543380796909332, "step": 3190 }, { "epoch": 0.898670979537304, "grad_norm": 0.87109375, "learning_rate": 1.5461727302754858e-07, "log_odds_chosen": 0.36316487193107605, "log_odds_ratio": -0.6222284436225891, "logits/chosen": 0.2005232870578766, "logits/rejected": -0.357538104057312, "logps/chosen": -0.9915369749069214, "logps/rejected": -1.2216651439666748, "loss": 1.2588, "nll_loss": 1.2823837995529175, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09915368258953094, "rewards/margins": 0.023012813180685043, "rewards/rejected": -0.12216649204492569, "step": 3195 }, { "epoch": 0.9000773503972997, "grad_norm": 0.58984375, "learning_rate": 1.5039546909075082e-07, "log_odds_chosen": -0.10971565544605255, "log_odds_ratio": -0.8382658958435059, "logits/chosen": 0.2254742681980133, "logits/rejected": -0.114879310131073, "logps/chosen": -0.9886395335197449, "logps/rejected": -0.953644871711731, "loss": 1.4266, "nll_loss": 1.540959358215332, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09886395186185837, "rewards/margins": -0.0034994587767869234, "rewards/rejected": -0.09536449611186981, "step": 3200 }, { "epoch": 0.9014837212572956, "grad_norm": 1.0390625, "learning_rate": 1.4623031583982982e-07, "log_odds_chosen": -0.10182987153530121, "log_odds_ratio": -0.849809467792511, "logits/chosen": 0.29536086320877075, "logits/rejected": 0.19249524176120758, "logps/chosen": -0.9300820231437683, "logps/rejected": -0.8594253659248352, "loss": 1.2539, "nll_loss": 1.049961805343628, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09300820529460907, "rewards/margins": -0.007065662182867527, "rewards/rejected": -0.08594253659248352, "step": 3205 }, { "epoch": 0.9028900921172913, "grad_norm": 0.71875, "learning_rate": 1.4212191369968125e-07, "log_odds_chosen": 0.6274880170822144, "log_odds_ratio": -0.5335894823074341, "logits/chosen": 0.2696693539619446, "logits/rejected": -0.06525443494319916, "logps/chosen": -0.9380648732185364, "logps/rejected": -1.3793491125106812, "loss": 1.3315, "nll_loss": 1.16745924949646, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09380649030208588, "rewards/margins": 0.04412844032049179, "rewards/rejected": -0.13793492317199707, "step": 3210 }, { "epoch": 0.9042964629772872, "grad_norm": 0.38671875, "learning_rate": 1.3807036172688887e-07, "log_odds_chosen": 0.04567628353834152, "log_odds_ratio": -0.7277408838272095, "logits/chosen": 0.24385914206504822, "logits/rejected": 0.10521028935909271, "logps/chosen": -0.9530619382858276, "logps/rejected": -0.9420475959777832, "loss": 1.271, "nll_loss": 1.2172807455062866, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09530619531869888, "rewards/margins": -0.001101435860618949, "rewards/rejected": -0.09420476108789444, "step": 3215 }, { "epoch": 0.9057028338372829, "grad_norm": 1.3671875, "learning_rate": 1.3407575760733814e-07, "log_odds_chosen": -0.014312508516013622, "log_odds_ratio": -0.8823341131210327, "logits/chosen": 0.11543910205364227, "logits/rejected": -0.09864558279514313, "logps/chosen": -1.0436660051345825, "logps/rejected": -1.1146585941314697, "loss": 1.2537, "nll_loss": 1.1284033060073853, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10436661541461945, "rewards/margins": 0.007099261973053217, "rewards/rejected": -0.11146585643291473, "step": 3220 }, { "epoch": 0.9071092046972786, "grad_norm": 0.546875, "learning_rate": 1.3013819765385903e-07, "log_odds_chosen": 0.05668574571609497, "log_odds_ratio": -0.7083471417427063, "logits/chosen": 0.13047949969768524, "logits/rejected": 0.05119504779577255, "logps/chosen": -0.9959262013435364, "logps/rejected": -1.0308897495269775, "loss": 1.399, "nll_loss": 1.4183380603790283, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09959262609481812, "rewards/margins": 0.0034963549114763737, "rewards/rejected": -0.10308897495269775, "step": 3225 }, { "epoch": 0.9085155755572745, "grad_norm": 0.62890625, "learning_rate": 1.262577768039061e-07, "log_odds_chosen": 0.2668651342391968, "log_odds_ratio": -0.6243875026702881, "logits/chosen": 0.1160123199224472, "logits/rejected": 0.13229742646217346, "logps/chosen": -0.9078758358955383, "logps/rejected": -1.0736418962478638, "loss": 1.3454, "nll_loss": 1.1889612674713135, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09078757464885712, "rewards/margins": 0.01657661236822605, "rewards/rejected": -0.10736417770385742, "step": 3230 }, { "epoch": 0.9099219464172702, "grad_norm": 0.41015625, "learning_rate": 1.224345886172673e-07, "log_odds_chosen": 0.32301950454711914, "log_odds_ratio": -0.6222423315048218, "logits/chosen": 0.30127039551734924, "logits/rejected": -0.06227840110659599, "logps/chosen": -0.971104621887207, "logps/rejected": -1.1543586254119873, "loss": 1.3673, "nll_loss": 1.2399194240570068, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09711045026779175, "rewards/margins": 0.01832542195916176, "rewards/rejected": -0.1154358834028244, "step": 3235 }, { "epoch": 0.911328317277266, "grad_norm": 0.5546875, "learning_rate": 1.1866872527381029e-07, "log_odds_chosen": 0.04946266487240791, "log_odds_ratio": -0.7625073790550232, "logits/chosen": 0.021832874044775963, "logits/rejected": 0.01887836493551731, "logps/chosen": -1.1444568634033203, "logps/rejected": -1.1844040155410767, "loss": 1.2423, "nll_loss": 1.0771002769470215, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11444568634033203, "rewards/margins": 0.003994708880782127, "rewards/rejected": -0.11844038963317871, "step": 3240 }, { "epoch": 0.9127346881372618, "grad_norm": 0.51171875, "learning_rate": 1.1496027757125743e-07, "log_odds_chosen": 0.18768064677715302, "log_odds_ratio": -0.8096588253974915, "logits/chosen": 0.05349903181195259, "logits/rejected": 0.061613865196704865, "logps/chosen": -1.0463298559188843, "logps/rejected": -1.1358802318572998, "loss": 1.3161, "nll_loss": 1.3629252910614014, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10463298857212067, "rewards/margins": 0.008955043740570545, "rewards/rejected": -0.11358805000782013, "step": 3245 }, { "epoch": 0.9141410589972576, "grad_norm": 1.4296875, "learning_rate": 1.1130933492299906e-07, "log_odds_chosen": 0.3283534646034241, "log_odds_ratio": -0.6355275511741638, "logits/chosen": 0.24122712016105652, "logits/rejected": 0.14860299229621887, "logps/chosen": -0.8834742307662964, "logps/rejected": -1.0724339485168457, "loss": 1.3049, "nll_loss": 1.1096837520599365, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08834744244813919, "rewards/margins": 0.018895963206887245, "rewards/rejected": -0.10724340379238129, "step": 3250 }, { "epoch": 0.9155474298572533, "grad_norm": 0.40234375, "learning_rate": 1.0771598535593508e-07, "log_odds_chosen": 0.5767858028411865, "log_odds_ratio": -0.49948233366012573, "logits/chosen": 0.1712990701198578, "logits/rejected": -0.1658778339624405, "logps/chosen": -0.769035279750824, "logps/rejected": -1.0615276098251343, "loss": 1.3339, "nll_loss": 1.265640377998352, "rewards/accuracies": 0.75, "rewards/chosen": -0.07690352201461792, "rewards/margins": 0.02924923039972782, "rewards/rejected": -0.10615275800228119, "step": 3255 }, { "epoch": 0.9169538007172492, "grad_norm": 0.62109375, "learning_rate": 1.0418031550835594e-07, "log_odds_chosen": 0.7740265727043152, "log_odds_ratio": -0.5304247140884399, "logits/chosen": 0.22207096219062805, "logits/rejected": -0.11452925205230713, "logps/chosen": -0.9042800664901733, "logps/rejected": -1.4035189151763916, "loss": 1.2448, "nll_loss": 1.3662537336349487, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09042801707983017, "rewards/margins": 0.04992387443780899, "rewards/rejected": -0.14035189151763916, "step": 3260 }, { "epoch": 0.9183601715772449, "grad_norm": 0.478515625, "learning_rate": 1.0070241062785063e-07, "log_odds_chosen": 0.04200774058699608, "log_odds_ratio": -0.8028166890144348, "logits/chosen": 0.19796448945999146, "logits/rejected": 0.07584884762763977, "logps/chosen": -0.966947078704834, "logps/rejected": -0.9276117086410522, "loss": 1.2781, "nll_loss": 1.2592346668243408, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.096694715321064, "rewards/margins": -0.0039335330948233604, "rewards/rejected": -0.09276118129491806, "step": 3265 }, { "epoch": 0.9197665424372407, "grad_norm": 0.546875, "learning_rate": 9.72823545692525e-08, "log_odds_chosen": 0.28387412428855896, "log_odds_ratio": -0.6854857802391052, "logits/chosen": 0.2692652642726898, "logits/rejected": -0.17600694298744202, "logps/chosen": -0.9816180467605591, "logps/rejected": -1.206395149230957, "loss": 1.3407, "nll_loss": 1.2754056453704834, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09816179424524307, "rewards/margins": 0.02247772179543972, "rewards/rejected": -0.12063954025506973, "step": 3270 }, { "epoch": 0.9211729132972365, "grad_norm": 0.96484375, "learning_rate": 9.392022979261811e-08, "log_odds_chosen": 0.2608630359172821, "log_odds_ratio": -0.6507230997085571, "logits/chosen": -0.025340568274259567, "logits/rejected": 0.03941095247864723, "logps/chosen": -0.9909608960151672, "logps/rejected": -1.1462547779083252, "loss": 1.377, "nll_loss": 1.3717944622039795, "rewards/accuracies": 0.5, "rewards/chosen": -0.09909608960151672, "rewards/margins": 0.015529394149780273, "rewards/rejected": -0.1146254763007164, "step": 3275 }, { "epoch": 0.9225792841572322, "grad_norm": 0.64453125, "learning_rate": 9.061611736123716e-08, "log_odds_chosen": 0.2979525327682495, "log_odds_ratio": -0.6411749124526978, "logits/chosen": -0.06878896057605743, "logits/rejected": 0.00978168100118637, "logps/chosen": -0.9312704205513, "logps/rejected": -1.1202948093414307, "loss": 1.3165, "nll_loss": 1.3011987209320068, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09312702715396881, "rewards/margins": 0.018902456387877464, "rewards/rejected": -0.11202949285507202, "step": 3280 }, { "epoch": 0.9239856550172281, "grad_norm": 0.310546875, "learning_rate": 8.737009693968068e-08, "log_odds_chosen": 0.3643631935119629, "log_odds_ratio": -0.6027190089225769, "logits/chosen": 0.33368968963623047, "logits/rejected": 0.02814490720629692, "logps/chosen": -0.9008346796035767, "logps/rejected": -1.0817344188690186, "loss": 1.234, "nll_loss": 1.206937313079834, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09008348733186722, "rewards/margins": 0.018089953809976578, "rewards/rejected": -0.1081734448671341, "step": 3285 }, { "epoch": 0.9253920258772238, "grad_norm": 0.35546875, "learning_rate": 8.418224679187792e-08, "log_odds_chosen": 0.00694617023691535, "log_odds_ratio": -0.8071894645690918, "logits/chosen": -0.014169919304549694, "logits/rejected": 0.1118505448102951, "logps/chosen": -1.275794506072998, "logps/rejected": -1.2462470531463623, "loss": 1.2667, "nll_loss": 1.1461488008499146, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1275794506072998, "rewards/margins": -0.0029547386802732944, "rewards/rejected": -0.12462472915649414, "step": 3290 }, { "epoch": 0.9267983967372196, "grad_norm": 0.55859375, "learning_rate": 8.105264377923056e-08, "log_odds_chosen": 0.5860501527786255, "log_odds_ratio": -0.5213780999183655, "logits/chosen": 0.24955987930297852, "logits/rejected": -0.05474225804209709, "logps/chosen": -0.9786784052848816, "logps/rejected": -1.3315867185592651, "loss": 1.353, "nll_loss": 1.3573219776153564, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09786783903837204, "rewards/margins": 0.03529084101319313, "rewards/rejected": -0.13315868377685547, "step": 3295 }, { "epoch": 0.9282047675972154, "grad_norm": 0.671875, "learning_rate": 7.798136335875895e-08, "log_odds_chosen": 0.263238787651062, "log_odds_ratio": -0.6176015138626099, "logits/chosen": 0.39209964871406555, "logits/rejected": 0.1130467876791954, "logps/chosen": -0.7684231400489807, "logps/rejected": -0.8703362345695496, "loss": 1.2282, "nll_loss": 1.0319740772247314, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07684232294559479, "rewards/margins": 0.010191314853727818, "rewards/rejected": -0.08703363686800003, "step": 3300 }, { "epoch": 0.9296111384572112, "grad_norm": 0.439453125, "learning_rate": 7.496847958128273e-08, "log_odds_chosen": 0.4197857975959778, "log_odds_ratio": -0.5740344524383545, "logits/chosen": 0.116268590092659, "logits/rejected": -0.025552403181791306, "logps/chosen": -0.8456694483757019, "logps/rejected": -1.0732481479644775, "loss": 1.301, "nll_loss": 1.2720739841461182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08456695824861526, "rewards/margins": 0.02275785803794861, "rewards/rejected": -0.10732481628656387, "step": 3305 }, { "epoch": 0.931017509317207, "grad_norm": 0.875, "learning_rate": 7.201406508963698e-08, "log_odds_chosen": 0.7171459794044495, "log_odds_ratio": -0.44450870156288147, "logits/chosen": 0.09888849407434464, "logits/rejected": -0.3451232314109802, "logps/chosen": -0.9191095232963562, "logps/rejected": -1.3688544034957886, "loss": 1.3185, "nll_loss": 1.353026270866394, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0919109433889389, "rewards/margins": 0.04497448354959488, "rewards/rejected": -0.13688543438911438, "step": 3310 }, { "epoch": 0.9324238801772027, "grad_norm": 0.71875, "learning_rate": 6.911819111691809e-08, "log_odds_chosen": 0.13063013553619385, "log_odds_ratio": -0.6960271596908569, "logits/chosen": 0.0920720174908638, "logits/rejected": -0.05112043768167496, "logps/chosen": -1.0039879083633423, "logps/rejected": -1.1332917213439941, "loss": 1.256, "nll_loss": 1.4166369438171387, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10039877891540527, "rewards/margins": 0.012930400669574738, "rewards/rejected": -0.11332918703556061, "step": 3315 }, { "epoch": 0.9338302510371985, "grad_norm": 1.390625, "learning_rate": 6.628092748476839e-08, "log_odds_chosen": 0.10143091529607773, "log_odds_ratio": -0.7682610750198364, "logits/chosen": -0.03156689926981926, "logits/rejected": 0.10378922522068024, "logps/chosen": -1.0922961235046387, "logps/rejected": -1.1099097728729248, "loss": 1.3721, "nll_loss": 1.3745818138122559, "rewards/accuracies": 0.5, "rewards/chosen": -0.10922960937023163, "rewards/margins": 0.0017613600939512253, "rewards/rejected": -0.1109909787774086, "step": 3320 }, { "epoch": 0.9352366218971943, "grad_norm": 0.7734375, "learning_rate": 6.350234260169175e-08, "log_odds_chosen": 0.2494625747203827, "log_odds_ratio": -0.727098822593689, "logits/chosen": 0.11463620513677597, "logits/rejected": -0.00450973492115736, "logps/chosen": -0.9400336146354675, "logps/rejected": -1.0810682773590088, "loss": 1.2895, "nll_loss": 1.2487919330596924, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0940033569931984, "rewards/margins": 0.01410345546901226, "rewards/rejected": -0.1081068143248558, "step": 3325 }, { "epoch": 0.9366429927571901, "grad_norm": 3.140625, "learning_rate": 6.078250346140457e-08, "log_odds_chosen": 0.5818791389465332, "log_odds_ratio": -0.5487780570983887, "logits/chosen": 0.1828577071428299, "logits/rejected": -0.32167941331863403, "logps/chosen": -0.975090503692627, "logps/rejected": -1.378078818321228, "loss": 1.2588, "nll_loss": 1.4379678964614868, "rewards/accuracies": 0.75, "rewards/chosen": -0.09750904142856598, "rewards/margins": 0.04029882326722145, "rewards/rejected": -0.13780787587165833, "step": 3330 }, { "epoch": 0.9380493636171858, "grad_norm": 0.57421875, "learning_rate": 5.812147564122017e-08, "log_odds_chosen": 0.08344351500272751, "log_odds_ratio": -0.7285597920417786, "logits/chosen": 0.1936129629611969, "logits/rejected": 0.06743821501731873, "logps/chosen": -0.9562802314758301, "logps/rejected": -1.0205862522125244, "loss": 1.3033, "nll_loss": 1.1195799112319946, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09562802314758301, "rewards/margins": 0.0064305951818823814, "rewards/rejected": -0.10205862671136856, "step": 3335 }, { "epoch": 0.9394557344771817, "grad_norm": 0.56640625, "learning_rate": 5.5519323300467775e-08, "log_odds_chosen": 0.7931851148605347, "log_odds_ratio": -0.4802130162715912, "logits/chosen": 0.3637743890285492, "logits/rejected": -0.05420628935098648, "logps/chosen": -0.7939968705177307, "logps/rejected": -1.3140610456466675, "loss": 1.3665, "nll_loss": 1.1318628787994385, "rewards/accuracies": 0.75, "rewards/chosen": -0.07939968258142471, "rewards/margins": 0.052006423473358154, "rewards/rejected": -0.13140609860420227, "step": 3340 }, { "epoch": 0.9408621053371774, "grad_norm": 0.7421875, "learning_rate": 5.29761091789463e-08, "log_odds_chosen": 0.10022227466106415, "log_odds_ratio": -0.7228878140449524, "logits/chosen": 0.06098024919629097, "logits/rejected": -0.02456871047616005, "logps/chosen": -0.8794612884521484, "logps/rejected": -0.9316323399543762, "loss": 1.413, "nll_loss": 1.4631612300872803, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08794611692428589, "rewards/margins": 0.00521711353212595, "rewards/rejected": -0.09316324442625046, "step": 3345 }, { "epoch": 0.9422684761971732, "grad_norm": 0.8671875, "learning_rate": 5.049189459541054e-08, "log_odds_chosen": 0.36486390233039856, "log_odds_ratio": -0.6099367141723633, "logits/chosen": 0.06692637503147125, "logits/rejected": -0.11904720962047577, "logps/chosen": -1.2377574443817139, "logps/rejected": -1.5012363195419312, "loss": 1.3822, "nll_loss": 1.4673506021499634, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12377575784921646, "rewards/margins": 0.026347899809479713, "rewards/rejected": -0.15012364089488983, "step": 3350 }, { "epoch": 0.943674847057169, "grad_norm": 1.1328125, "learning_rate": 4.806673944609347e-08, "log_odds_chosen": 0.06244078278541565, "log_odds_ratio": -0.78443843126297, "logits/chosen": 0.17202523350715637, "logits/rejected": 0.10731378942728043, "logps/chosen": -0.9702354669570923, "logps/rejected": -1.0049316883087158, "loss": 1.2405, "nll_loss": 1.2193858623504639, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.09702353924512863, "rewards/margins": 0.0034696266520768404, "rewards/rejected": -0.1004931703209877, "step": 3355 }, { "epoch": 0.9450812179171647, "grad_norm": 0.498046875, "learning_rate": 4.570070220326128e-08, "log_odds_chosen": 0.11939896643161774, "log_odds_ratio": -0.7277523279190063, "logits/chosen": 0.12861505150794983, "logits/rejected": 0.02386235073208809, "logps/chosen": -1.0052900314331055, "logps/rejected": -1.1172101497650146, "loss": 1.2454, "nll_loss": 1.1664661169052124, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10052900016307831, "rewards/margins": 0.011192025616765022, "rewards/rejected": -0.11172102391719818, "step": 3360 }, { "epoch": 0.9464875887771605, "grad_norm": 0.5390625, "learning_rate": 4.33938399138048e-08, "log_odds_chosen": 0.5814080238342285, "log_odds_ratio": -0.5879030227661133, "logits/chosen": 0.28143981099128723, "logits/rejected": 0.061570387333631516, "logps/chosen": -0.7967894077301025, "logps/rejected": -1.0686571598052979, "loss": 1.3116, "nll_loss": 1.094059705734253, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07967893779277802, "rewards/margins": 0.02718677744269371, "rewards/rejected": -0.10686571896076202, "step": 3365 }, { "epoch": 0.9478939596371563, "grad_norm": 0.4765625, "learning_rate": 4.114620819786308e-08, "log_odds_chosen": 0.23346397280693054, "log_odds_ratio": -0.6866267919540405, "logits/chosen": 0.25685185194015503, "logits/rejected": -0.01630682870745659, "logps/chosen": -0.9060823321342468, "logps/rejected": -1.0845293998718262, "loss": 1.3498, "nll_loss": 1.196526288986206, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09060823917388916, "rewards/margins": 0.01784469559788704, "rewards/rejected": -0.1084529384970665, "step": 3370 }, { "epoch": 0.9493003304971521, "grad_norm": 0.52734375, "learning_rate": 3.89578612474828e-08, "log_odds_chosen": -0.14990894496440887, "log_odds_ratio": -0.9641848802566528, "logits/chosen": 0.24826081097126007, "logits/rejected": 0.09183430671691895, "logps/chosen": -1.2104682922363281, "logps/rejected": -1.0689446926116943, "loss": 1.272, "nll_loss": 1.293968915939331, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.12104681879281998, "rewards/margins": -0.014152342453598976, "rewards/rejected": -0.10689447820186615, "step": 3375 }, { "epoch": 0.9507067013571479, "grad_norm": 1.40625, "learning_rate": 3.682885182531154e-08, "log_odds_chosen": 0.2269192934036255, "log_odds_ratio": -0.673160970211029, "logits/chosen": 0.2562906742095947, "logits/rejected": 0.09089311212301254, "logps/chosen": -0.9358326196670532, "logps/rejected": -1.0342806577682495, "loss": 1.3099, "nll_loss": 1.2557461261749268, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09358327090740204, "rewards/margins": 0.009844806976616383, "rewards/rejected": -0.10342808067798615, "step": 3380 }, { "epoch": 0.9521130722171437, "grad_norm": 0.953125, "learning_rate": 3.475923126332575e-08, "log_odds_chosen": -0.13816693425178528, "log_odds_ratio": -0.9078758955001831, "logits/chosen": 0.20385582745075226, "logits/rejected": 0.0857970267534256, "logps/chosen": -0.9905937314033508, "logps/rejected": -0.9162753820419312, "loss": 1.3297, "nll_loss": 1.271182656288147, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09905937314033508, "rewards/margins": -0.007431834936141968, "rewards/rejected": -0.09162753820419312, "step": 3385 }, { "epoch": 0.9535194430771394, "grad_norm": 0.54296875, "learning_rate": 3.2749049461592295e-08, "log_odds_chosen": 0.11129869520664215, "log_odds_ratio": -0.7079453468322754, "logits/chosen": 0.20724210143089294, "logits/rejected": 0.046768300235271454, "logps/chosen": -0.9434338808059692, "logps/rejected": -1.0389223098754883, "loss": 1.2332, "nll_loss": 1.212593913078308, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0943433940410614, "rewards/margins": 0.009548845700919628, "rewards/rejected": -0.10389222949743271, "step": 3390 }, { "epoch": 0.9549258139371353, "grad_norm": 0.498046875, "learning_rate": 3.0798354887066897e-08, "log_odds_chosen": -0.019172105938196182, "log_odds_ratio": -0.7809979915618896, "logits/chosen": -0.1130768209695816, "logits/rejected": 0.10138092190027237, "logps/chosen": -1.010284185409546, "logps/rejected": -0.9839268922805786, "loss": 1.3462, "nll_loss": 1.2669436931610107, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.10102842003107071, "rewards/margins": -0.0026357315946370363, "rewards/rejected": -0.09839268773794174, "step": 3395 }, { "epoch": 0.956332184797131, "grad_norm": 0.357421875, "learning_rate": 2.890719457242397e-08, "log_odds_chosen": 0.4177800714969635, "log_odds_ratio": -0.5798153281211853, "logits/chosen": 0.15526822209358215, "logits/rejected": 0.1292274296283722, "logps/chosen": -0.8883660435676575, "logps/rejected": -1.1707476377487183, "loss": 1.381, "nll_loss": 1.1893246173858643, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08883660286664963, "rewards/margins": 0.02823815681040287, "rewards/rejected": -0.11707476526498795, "step": 3400 }, { "epoch": 0.9577385556571267, "grad_norm": 0.71875, "learning_rate": 2.707561411492393e-08, "log_odds_chosen": 0.24508845806121826, "log_odds_ratio": -0.6735808849334717, "logits/chosen": 0.18047787249088287, "logits/rejected": -0.13641862571239471, "logps/chosen": -0.9271578788757324, "logps/rejected": -1.1271312236785889, "loss": 1.2555, "nll_loss": 1.260265588760376, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.092715784907341, "rewards/margins": 0.01999734900891781, "rewards/rejected": -0.11271314322948456, "step": 3405 }, { "epoch": 0.9591449265171226, "grad_norm": 0.47265625, "learning_rate": 2.5303657675312677e-08, "log_odds_chosen": 0.24291105568408966, "log_odds_ratio": -0.6924997568130493, "logits/chosen": 0.06729461997747421, "logits/rejected": 0.11077950149774551, "logps/chosen": -0.9200528264045715, "logps/rejected": -1.05774986743927, "loss": 1.2593, "nll_loss": 1.0932310819625854, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09200528264045715, "rewards/margins": 0.013769703917205334, "rewards/rejected": -0.10577498376369476, "step": 3410 }, { "epoch": 0.9605512973771183, "grad_norm": 0.76171875, "learning_rate": 2.359136797675743e-08, "log_odds_chosen": 0.7608834505081177, "log_odds_ratio": -0.5808351635932922, "logits/chosen": 0.17602987587451935, "logits/rejected": 0.07526994496583939, "logps/chosen": -0.855022132396698, "logps/rejected": -1.4509608745574951, "loss": 1.2025, "nll_loss": 1.0686630010604858, "rewards/accuracies": 0.75, "rewards/chosen": -0.08550222218036652, "rewards/margins": 0.05959387496113777, "rewards/rejected": -0.145096093416214, "step": 3415 }, { "epoch": 0.9619576682371141, "grad_norm": 0.419921875, "learning_rate": 2.1938786303817295e-08, "log_odds_chosen": 0.45528849959373474, "log_odds_ratio": -0.6140977144241333, "logits/chosen": 0.077461838722229, "logits/rejected": -0.12476935237646103, "logps/chosen": -0.8827389478683472, "logps/rejected": -1.1483733654022217, "loss": 1.3045, "nll_loss": 1.3081581592559814, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08827390521764755, "rewards/margins": 0.026563435792922974, "rewards/rejected": -0.11483733355998993, "step": 3420 }, { "epoch": 0.9633640390971099, "grad_norm": 0.56640625, "learning_rate": 2.0345952501445722e-08, "log_odds_chosen": 0.6777931451797485, "log_odds_ratio": -0.529708743095398, "logits/chosen": 0.3582519292831421, "logits/rejected": -0.17594197392463684, "logps/chosen": -0.8322056531906128, "logps/rejected": -1.263925313949585, "loss": 1.3209, "nll_loss": 1.2668817043304443, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08322056382894516, "rewards/margins": 0.0431719608604908, "rewards/rejected": -0.12639251351356506, "step": 3425 }, { "epoch": 0.9647704099571057, "grad_norm": 1.1171875, "learning_rate": 1.881290497403321e-08, "log_odds_chosen": 0.07137568295001984, "log_odds_ratio": -0.7536412477493286, "logits/chosen": 0.39291954040527344, "logits/rejected": 0.10763299465179443, "logps/chosen": -1.019919753074646, "logps/rejected": -1.0896474123001099, "loss": 1.4614, "nll_loss": 1.3426240682601929, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.10199197381734848, "rewards/margins": 0.006972762290388346, "rewards/rejected": -0.10896474123001099, "step": 3430 }, { "epoch": 0.9661767808171015, "grad_norm": 0.439453125, "learning_rate": 1.73396806844775e-08, "log_odds_chosen": 0.4001692831516266, "log_odds_ratio": -0.6367109417915344, "logits/chosen": 0.19484727084636688, "logits/rejected": 0.09062852710485458, "logps/chosen": -0.950181782245636, "logps/rejected": -1.245031714439392, "loss": 1.2299, "nll_loss": 1.2423794269561768, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.095018170773983, "rewards/margins": 0.029485007748007774, "rewards/rejected": -0.12450318038463593, "step": 3435 }, { "epoch": 0.9675831516770973, "grad_norm": 1.5703125, "learning_rate": 1.5926315153295402e-08, "log_odds_chosen": 0.21363556385040283, "log_odds_ratio": -0.6855403780937195, "logits/chosen": -0.012852529995143414, "logits/rejected": 0.035267848521471024, "logps/chosen": -1.0747730731964111, "logps/rejected": -1.145056962966919, "loss": 1.2873, "nll_loss": 1.251246452331543, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10747730731964111, "rewards/margins": 0.007028372026979923, "rewards/rejected": -0.11450569331645966, "step": 3440 }, { "epoch": 0.968989522537093, "grad_norm": 0.69921875, "learning_rate": 1.4572842457764858e-08, "log_odds_chosen": 0.36453381180763245, "log_odds_ratio": -0.6931215524673462, "logits/chosen": 0.17390012741088867, "logits/rejected": 0.009960390627384186, "logps/chosen": -0.9899552464485168, "logps/rejected": -1.2095123529434204, "loss": 1.3039, "nll_loss": 1.2584543228149414, "rewards/accuracies": 0.5, "rewards/chosen": -0.09899552166461945, "rewards/margins": 0.021955717355012894, "rewards/rejected": -0.12095125019550323, "step": 3445 }, { "epoch": 0.9703958933970888, "grad_norm": 0.59375, "learning_rate": 1.3279295231103661e-08, "log_odds_chosen": 0.22130723297595978, "log_odds_ratio": -0.7411271333694458, "logits/chosen": -0.0032444163225591183, "logits/rejected": -0.20411105453968048, "logps/chosen": -1.0461972951889038, "logps/rejected": -1.230019211769104, "loss": 1.3301, "nll_loss": 1.3564178943634033, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10461972653865814, "rewards/margins": 0.018382195383310318, "rewards/rejected": -0.12300191819667816, "step": 3450 }, { "epoch": 0.9718022642570846, "grad_norm": 0.484375, "learning_rate": 1.204570466168259e-08, "log_odds_chosen": 0.6615599393844604, "log_odds_ratio": -0.45958274602890015, "logits/chosen": 0.2637723684310913, "logits/rejected": -0.2658749222755432, "logps/chosen": -0.797881543636322, "logps/rejected": -1.2056782245635986, "loss": 1.3484, "nll_loss": 1.2117133140563965, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07978816330432892, "rewards/margins": 0.04077966883778572, "rewards/rejected": -0.12056783586740494, "step": 3455 }, { "epoch": 0.9732086351170803, "grad_norm": 2.484375, "learning_rate": 1.087210049227405e-08, "log_odds_chosen": 0.4059210419654846, "log_odds_ratio": -0.6156436204910278, "logits/chosen": 0.42425599694252014, "logits/rejected": -0.07910940796136856, "logps/chosen": -0.7596513032913208, "logps/rejected": -1.0463638305664062, "loss": 1.2396, "nll_loss": 1.1112778186798096, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07596512883901596, "rewards/margins": 0.028671249747276306, "rewards/rejected": -0.10463637113571167, "step": 3460 }, { "epoch": 0.9746150059770762, "grad_norm": 0.5078125, "learning_rate": 9.758511019333505e-09, "log_odds_chosen": 0.18497975170612335, "log_odds_ratio": -0.6618334650993347, "logits/chosen": 0.21915988624095917, "logits/rejected": -0.14438927173614502, "logps/chosen": -0.990871787071228, "logps/rejected": -1.1092342138290405, "loss": 1.2737, "nll_loss": 1.3173089027404785, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0990871861577034, "rewards/margins": 0.011836233548820019, "rewards/rejected": -0.1109234094619751, "step": 3465 }, { "epoch": 0.9760213768370719, "grad_norm": 0.52734375, "learning_rate": 8.704963092319164e-09, "log_odds_chosen": 0.14419862627983093, "log_odds_ratio": -0.6739202737808228, "logits/chosen": 0.13422077894210815, "logits/rejected": 0.11353013664484024, "logps/chosen": -0.8795303106307983, "logps/rejected": -0.9439455270767212, "loss": 1.3173, "nll_loss": 1.1744730472564697, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08795302361249924, "rewards/margins": 0.006441520992666483, "rewards/rejected": -0.09439454972743988, "step": 3470 }, { "epoch": 0.9774277476970678, "grad_norm": 0.3515625, "learning_rate": 7.71148211304279e-09, "log_odds_chosen": 0.14641742408275604, "log_odds_ratio": -0.752571702003479, "logits/chosen": 0.34768936038017273, "logits/rejected": 0.09763796627521515, "logps/chosen": -0.8525916934013367, "logps/rejected": -0.8650640249252319, "loss": 1.291, "nll_loss": 1.110510230064392, "rewards/accuracies": 0.5, "rewards/chosen": -0.08525917679071426, "rewards/margins": 0.0012472260277718306, "rewards/rejected": -0.08650640398263931, "step": 3475 }, { "epoch": 0.9788341185570635, "grad_norm": 0.5078125, "learning_rate": 6.778092035058248e-09, "log_odds_chosen": -0.07389330118894577, "log_odds_ratio": -0.7915335893630981, "logits/chosen": 0.0018207028042525053, "logits/rejected": 0.19812503457069397, "logps/chosen": -0.8738433718681335, "logps/rejected": -0.8644292950630188, "loss": 1.3015, "nll_loss": 1.1114206314086914, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08738434314727783, "rewards/margins": -0.0009413976222276688, "rewards/rejected": -0.08644293993711472, "step": 3480 }, { "epoch": 0.9802404894170593, "grad_norm": 2.0625, "learning_rate": 5.904815363083904e-09, "log_odds_chosen": 0.41229891777038574, "log_odds_ratio": -0.6279317140579224, "logits/chosen": -0.048399388790130615, "logits/rejected": -0.034595172852277756, "logps/chosen": -0.9107322692871094, "logps/rejected": -1.190619707107544, "loss": 1.3319, "nll_loss": 1.3180420398712158, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09107322990894318, "rewards/margins": 0.027988741174340248, "rewards/rejected": -0.11906196922063828, "step": 3485 }, { "epoch": 0.9816468602770551, "grad_norm": 0.65234375, "learning_rate": 5.091673152459731e-09, "log_odds_chosen": -0.03466014191508293, "log_odds_ratio": -0.806627094745636, "logits/chosen": 0.2747122049331665, "logits/rejected": 0.07116059213876724, "logps/chosen": -0.9941873550415039, "logps/rejected": -1.009939432144165, "loss": 1.4112, "nll_loss": 1.218632459640503, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09941873699426651, "rewards/margins": 0.0015752071049064398, "rewards/rejected": -0.10099394619464874, "step": 3490 }, { "epoch": 0.9830532311370508, "grad_norm": 0.6953125, "learning_rate": 4.338685008640209e-09, "log_odds_chosen": 0.4317692220211029, "log_odds_ratio": -0.5760183334350586, "logits/chosen": -0.013361548073589802, "logits/rejected": -0.12007039785385132, "logps/chosen": -0.8380535840988159, "logps/rejected": -1.0656559467315674, "loss": 1.3201, "nll_loss": 1.3138437271118164, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08380536735057831, "rewards/margins": 0.022760232910513878, "rewards/rejected": -0.10656560957431793, "step": 3495 }, { "epoch": 0.9844596019970466, "grad_norm": 0.74609375, "learning_rate": 3.6458690867208214e-09, "log_odds_chosen": 0.28699353337287903, "log_odds_ratio": -0.6248040795326233, "logits/chosen": 0.18887999653816223, "logits/rejected": 0.14614346623420715, "logps/chosen": -0.7974966168403625, "logps/rejected": -0.9402490854263306, "loss": 1.265, "nll_loss": 1.12100350856781, "rewards/accuracies": 0.75, "rewards/chosen": -0.07974965870380402, "rewards/margins": 0.014275247231125832, "rewards/rejected": -0.0940249115228653, "step": 3500 }, { "epoch": 0.9858659728570424, "grad_norm": 0.5, "learning_rate": 3.0132420910006233e-09, "log_odds_chosen": 0.25048303604125977, "log_odds_ratio": -0.6807008981704712, "logits/chosen": 0.18030565977096558, "logits/rejected": 0.07345929741859436, "logps/chosen": -0.8522911071777344, "logps/rejected": -0.9874947667121887, "loss": 1.2869, "nll_loss": 1.2538975477218628, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08522911369800568, "rewards/margins": 0.013520359992980957, "rewards/rejected": -0.09874947369098663, "step": 3505 }, { "epoch": 0.9872723437170382, "grad_norm": 0.52734375, "learning_rate": 2.440819274579509e-09, "log_odds_chosen": 0.19837257266044617, "log_odds_ratio": -0.7717543840408325, "logits/chosen": 0.22246333956718445, "logits/rejected": -0.24095554649829865, "logps/chosen": -1.009987235069275, "logps/rejected": -1.1807398796081543, "loss": 1.3517, "nll_loss": 1.3689885139465332, "rewards/accuracies": 0.5, "rewards/chosen": -0.10099872201681137, "rewards/margins": 0.017075251787900925, "rewards/rejected": -0.11807398498058319, "step": 3510 }, { "epoch": 0.9886787145770339, "grad_norm": 0.435546875, "learning_rate": 1.9286144389915605e-09, "log_odds_chosen": 0.37249404191970825, "log_odds_ratio": -0.6405671834945679, "logits/chosen": 0.19467693567276, "logits/rejected": 0.094179168343544, "logps/chosen": -0.8736664056777954, "logps/rejected": -1.1186236143112183, "loss": 1.214, "nll_loss": 1.075716257095337, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08736663311719894, "rewards/margins": 0.024495726451277733, "rewards/rejected": -0.11186236143112183, "step": 3515 }, { "epoch": 0.9900850854370298, "grad_norm": 0.890625, "learning_rate": 1.4766399338697613e-09, "log_odds_chosen": 0.3097439706325531, "log_odds_ratio": -0.6437228918075562, "logits/chosen": 0.12821228802204132, "logits/rejected": -0.21044449508190155, "logps/chosen": -0.9112231135368347, "logps/rejected": -1.0949690341949463, "loss": 1.3147, "nll_loss": 1.3154476881027222, "rewards/accuracies": 0.75, "rewards/chosen": -0.09112232178449631, "rewards/margins": 0.01837458834052086, "rewards/rejected": -0.10949690639972687, "step": 3520 }, { "epoch": 0.9914914562970255, "grad_norm": 0.6015625, "learning_rate": 1.084906656650675e-09, "log_odds_chosen": 0.3887336850166321, "log_odds_ratio": -0.6401981115341187, "logits/chosen": 0.07849763333797455, "logits/rejected": 0.04411407560110092, "logps/chosen": -0.7694699764251709, "logps/rejected": -0.924534797668457, "loss": 1.3069, "nll_loss": 1.2313811779022217, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07694699615240097, "rewards/margins": 0.015506486408412457, "rewards/rejected": -0.0924534797668457, "step": 3525 }, { "epoch": 0.9928978271570214, "grad_norm": 0.494140625, "learning_rate": 7.534240523107694e-10, "log_odds_chosen": -0.010244468227028847, "log_odds_ratio": -0.8057150840759277, "logits/chosen": -0.1170666366815567, "logits/rejected": 0.07405931502580643, "logps/chosen": -0.9780911207199097, "logps/rejected": -0.910449206829071, "loss": 1.29, "nll_loss": 1.3354380130767822, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.09780912101268768, "rewards/margins": -0.006764202378690243, "rewards/rejected": -0.09104491770267487, "step": 3530 }, { "epoch": 0.9943041980170171, "grad_norm": 0.37109375, "learning_rate": 4.822001131377096e-10, "log_odds_chosen": 0.20615753531455994, "log_odds_ratio": -0.6634969115257263, "logits/chosen": 0.2902953624725342, "logits/rejected": 0.024714922532439232, "logps/chosen": -0.9204000234603882, "logps/rejected": -1.0575330257415771, "loss": 1.2121, "nll_loss": 1.058855652809143, "rewards/accuracies": 0.5, "rewards/chosen": -0.09204001724720001, "rewards/margins": 0.013713277876377106, "rewards/rejected": -0.10575328767299652, "step": 3535 }, { "epoch": 0.9957105688770128, "grad_norm": 0.71484375, "learning_rate": 2.7124137853967723e-10, "log_odds_chosen": -0.03763968497514725, "log_odds_ratio": -0.8463073968887329, "logits/chosen": 0.18455204367637634, "logits/rejected": -0.0659802109003067, "logps/chosen": -0.7890104651451111, "logps/rejected": -0.8145157098770142, "loss": 1.3798, "nll_loss": 1.4178717136383057, "rewards/accuracies": 0.5, "rewards/chosen": -0.07890104502439499, "rewards/margins": 0.0025505167432129383, "rewards/rejected": -0.08145156502723694, "step": 3540 }, { "epoch": 0.9971169397370087, "grad_norm": 0.75, "learning_rate": 1.2055293488633102e-10, "log_odds_chosen": -0.07134605944156647, "log_odds_ratio": -0.8291767835617065, "logits/chosen": 0.11052653938531876, "logits/rejected": 0.15074488520622253, "logps/chosen": -0.9489976763725281, "logps/rejected": -0.9296343922615051, "loss": 1.2384, "nll_loss": 1.1190108060836792, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09489977359771729, "rewards/margins": -0.0019363273167982697, "rewards/rejected": -0.09296345710754395, "step": 3545 }, { "epoch": 0.9985233105970044, "grad_norm": 0.462890625, "learning_rate": 3.013841538640483e-11, "log_odds_chosen": 0.37353289127349854, "log_odds_ratio": -0.6932848691940308, "logits/chosen": -0.08854226768016815, "logits/rejected": -0.006648472044616938, "logps/chosen": -0.9258167147636414, "logps/rejected": -1.1444346904754639, "loss": 1.24, "nll_loss": 1.0886918306350708, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09258167445659637, "rewards/margins": 0.021861806511878967, "rewards/rejected": -0.11444349586963654, "step": 3550 }, { "epoch": 0.9999296814570002, "grad_norm": 1.21875, "learning_rate": 0.0, "log_odds_chosen": 0.21634745597839355, "log_odds_ratio": -0.6914322972297668, "logits/chosen": 0.18516218662261963, "logits/rejected": 0.08085967600345612, "logps/chosen": -0.9154261350631714, "logps/rejected": -1.0730537176132202, "loss": 1.2483, "nll_loss": 1.1246975660324097, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09154261648654938, "rewards/margins": 0.01576276682317257, "rewards/rejected": -0.1073053851723671, "step": 3555 }, { "epoch": 0.9999296814570002, "eval_log_odds_chosen": 0.3767525255680084, "eval_log_odds_ratio": -0.62775719165802, "eval_logits/chosen": 0.20116207003593445, "eval_logits/rejected": -0.0032512724865227938, "eval_logps/chosen": -0.8883616924285889, "eval_logps/rejected": -1.1382248401641846, "eval_loss": 1.2883824110031128, "eval_nll_loss": 1.2074565887451172, "eval_rewards/accuracies": 0.6132478713989258, "eval_rewards/chosen": -0.08883616328239441, "eval_rewards/margins": 0.02498631551861763, "eval_rewards/rejected": -0.11382248252630234, "eval_runtime": 976.6863, "eval_samples_per_second": 1.914, "eval_steps_per_second": 0.479, "step": 3555 }, { "epoch": 0.9999296814570002, "step": 3555, "total_flos": 0.0, "train_loss": 1.4252014341233652, "train_runtime": 102484.8577, "train_samples_per_second": 0.555, "train_steps_per_second": 0.035 } ], "logging_steps": 5, "max_steps": 3555, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }