diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3462 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999080761654629, + "eval_steps": 500, + "global_step": 951, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005252790544977019, + "grad_norm": 33.25, + "learning_rate": 1.0416666666666667e-07, + "log_odds_chosen": -0.12333051860332489, + "log_odds_ratio": -0.8621311187744141, + "logits/chosen": -2.540858030319214, + "logits/rejected": -2.1144332885742188, + "logps/chosen": -1.1002752780914307, + "logps/rejected": -1.0134268999099731, + "loss": 2.3046, + "nll_loss": 1.4424240589141846, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": -1.1002752780914307, + "rewards/margins": -0.08684836328029633, + "rewards/rejected": -1.0134268999099731, + "step": 5 + }, + { + "epoch": 0.010505581089954037, + "grad_norm": 30.125, + "learning_rate": 2.0833333333333333e-07, + "log_odds_chosen": -0.1254591941833496, + "log_odds_ratio": -0.8488509058952332, + "logits/chosen": -2.521646022796631, + "logits/rejected": -2.12934947013855, + "logps/chosen": -1.0548789501190186, + "logps/rejected": -0.9548781514167786, + "loss": 2.2337, + "nll_loss": 1.3848837614059448, + "rewards/accuracies": 0.453125, + "rewards/chosen": -1.0548789501190186, + "rewards/margins": -0.10000075399875641, + "rewards/rejected": -0.9548781514167786, + "step": 10 + }, + { + "epoch": 0.015758371634931056, + "grad_norm": 28.25, + "learning_rate": 3.1249999999999997e-07, + "log_odds_chosen": -0.092379130423069, + "log_odds_ratio": -0.839794933795929, + "logits/chosen": -2.496335744857788, + "logits/rejected": -2.134352445602417, + "logps/chosen": -1.0547659397125244, + "logps/rejected": -0.9888293147087097, + "loss": 2.2323, + "nll_loss": 1.3924893140792847, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -1.0547659397125244, + "rewards/margins": -0.06593648344278336, + "rewards/rejected": -0.9888293147087097, + "step": 15 + }, + { + "epoch": 0.021011162179908074, + "grad_norm": 27.75, + "learning_rate": 4.1666666666666667e-07, + "log_odds_chosen": -0.08341892063617706, + "log_odds_ratio": -0.845537006855011, + "logits/chosen": -2.502532720565796, + "logits/rejected": -2.0534327030181885, + "logps/chosen": -1.0713450908660889, + "logps/rejected": -1.0228570699691772, + "loss": 2.2615, + "nll_loss": 1.415948748588562, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.0713450908660889, + "rewards/margins": -0.04848797246813774, + "rewards/rejected": -1.0228570699691772, + "step": 20 + }, + { + "epoch": 0.026263952724885097, + "grad_norm": 31.625, + "learning_rate": 5.208333333333334e-07, + "log_odds_chosen": -0.05041329935193062, + "log_odds_ratio": -0.8150845766067505, + "logits/chosen": -2.3506855964660645, + "logits/rejected": -2.041471481323242, + "logps/chosen": -1.0723893642425537, + "logps/rejected": -1.0427805185317993, + "loss": 2.2269, + "nll_loss": 1.411767601966858, + "rewards/accuracies": 0.49687498807907104, + "rewards/chosen": -1.0723893642425537, + "rewards/margins": -0.029608914628624916, + "rewards/rejected": -1.0427805185317993, + "step": 25 + }, + { + "epoch": 0.03151674326986211, + "grad_norm": 34.75, + "learning_rate": 6.249999999999999e-07, + "log_odds_chosen": -0.16907325387001038, + "log_odds_ratio": -0.8892423510551453, + "logits/chosen": -2.4877123832702637, + "logits/rejected": -2.091643810272217, + "logps/chosen": -1.0780900716781616, + "logps/rejected": -0.960413932800293, + "loss": 2.2862, + "nll_loss": 1.3969789743423462, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": -1.0780900716781616, + "rewards/margins": -0.1176760345697403, + "rewards/rejected": -0.960413932800293, + "step": 30 + }, + { + "epoch": 0.036769533814839134, + "grad_norm": 24.625, + "learning_rate": 7.291666666666666e-07, + "log_odds_chosen": -0.12296156585216522, + "log_odds_ratio": -0.8445537686347961, + "logits/chosen": -2.460153579711914, + "logits/rejected": -2.100581169128418, + "logps/chosen": -0.9918639063835144, + "logps/rejected": -0.8978347778320312, + "loss": 2.1014, + "nll_loss": 1.256840467453003, + "rewards/accuracies": 0.43437498807907104, + "rewards/chosen": -0.9918639063835144, + "rewards/margins": -0.09402903914451599, + "rewards/rejected": -0.8978347778320312, + "step": 35 + }, + { + "epoch": 0.04202232435981615, + "grad_norm": 20.375, + "learning_rate": 8.333333333333333e-07, + "log_odds_chosen": -0.10392768681049347, + "log_odds_ratio": -0.8281729817390442, + "logits/chosen": -2.4672484397888184, + "logits/rejected": -2.1189260482788086, + "logps/chosen": -0.9796692132949829, + "logps/rejected": -0.8947553634643555, + "loss": 2.0709, + "nll_loss": 1.2427122592926025, + "rewards/accuracies": 0.4781250059604645, + "rewards/chosen": -0.9796692132949829, + "rewards/margins": -0.08491390943527222, + "rewards/rejected": -0.8947553634643555, + "step": 40 + }, + { + "epoch": 0.04727511490479317, + "grad_norm": 25.75, + "learning_rate": 9.374999999999999e-07, + "log_odds_chosen": -0.07403279840946198, + "log_odds_ratio": -0.8119841814041138, + "logits/chosen": -2.5748581886291504, + "logits/rejected": -2.2311367988586426, + "logps/chosen": -0.9425970911979675, + "logps/rejected": -0.8925843238830566, + "loss": 1.966, + "nll_loss": 1.1540277004241943, + "rewards/accuracies": 0.4781250059604645, + "rewards/chosen": -0.9425970911979675, + "rewards/margins": -0.05001285672187805, + "rewards/rejected": -0.8925843238830566, + "step": 45 + }, + { + "epoch": 0.05252790544977019, + "grad_norm": 15.5625, + "learning_rate": 1.0416666666666667e-06, + "log_odds_chosen": -0.015203160233795643, + "log_odds_ratio": -0.7965196371078491, + "logits/chosen": -2.517662763595581, + "logits/rejected": -2.291977882385254, + "logps/chosen": -1.0069010257720947, + "logps/rejected": -0.9928563833236694, + "loss": 2.001, + "nll_loss": 1.2044353485107422, + "rewards/accuracies": 0.49687498807907104, + "rewards/chosen": -1.0069010257720947, + "rewards/margins": -0.014044714160263538, + "rewards/rejected": -0.9928563833236694, + "step": 50 + }, + { + "epoch": 0.05778069599474721, + "grad_norm": 19.125, + "learning_rate": 1.1458333333333333e-06, + "log_odds_chosen": -0.06918958574533463, + "log_odds_ratio": -0.8064200282096863, + "logits/chosen": -2.7286930084228516, + "logits/rejected": -2.3158278465270996, + "logps/chosen": -0.9621369242668152, + "logps/rejected": -0.9042080044746399, + "loss": 1.9673, + "nll_loss": 1.1608707904815674, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.9621369242668152, + "rewards/margins": -0.05792900174856186, + "rewards/rejected": -0.9042080044746399, + "step": 55 + }, + { + "epoch": 0.06303348653972422, + "grad_norm": 20.375, + "learning_rate": 1.2499999999999999e-06, + "log_odds_chosen": -0.055296190083026886, + "log_odds_ratio": -0.795842170715332, + "logits/chosen": -2.733304500579834, + "logits/rejected": -2.257201671600342, + "logps/chosen": -0.9258626699447632, + "logps/rejected": -0.8971433639526367, + "loss": 1.9557, + "nll_loss": 1.159847378730774, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": -0.9258626699447632, + "rewards/margins": -0.028719374909996986, + "rewards/rejected": -0.8971433639526367, + "step": 60 + }, + { + "epoch": 0.06828627708470125, + "grad_norm": 18.25, + "learning_rate": 1.3541666666666667e-06, + "log_odds_chosen": -0.05717029422521591, + "log_odds_ratio": -0.7737418413162231, + "logits/chosen": -2.6654744148254395, + "logits/rejected": -2.187049627304077, + "logps/chosen": -0.8003360033035278, + "logps/rejected": -0.7723677754402161, + "loss": 1.8696, + "nll_loss": 1.0958433151245117, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.8003360033035278, + "rewards/margins": -0.02796824648976326, + "rewards/rejected": -0.7723677754402161, + "step": 65 + }, + { + "epoch": 0.07353906762967827, + "grad_norm": 19.375, + "learning_rate": 1.4583333333333333e-06, + "log_odds_chosen": 0.002531373407691717, + "log_odds_ratio": -0.7339381575584412, + "logits/chosen": -2.5733718872070312, + "logits/rejected": -2.1028685569763184, + "logps/chosen": -0.7143228054046631, + "logps/rejected": -0.718761146068573, + "loss": 1.7947, + "nll_loss": 1.0607960224151611, + "rewards/accuracies": 0.5218750238418579, + "rewards/chosen": -0.7143228054046631, + "rewards/margins": 0.004438319243490696, + "rewards/rejected": -0.718761146068573, + "step": 70 + }, + { + "epoch": 0.07879185817465528, + "grad_norm": 15.9375, + "learning_rate": 1.5624999999999999e-06, + "log_odds_chosen": 0.06011660769581795, + "log_odds_ratio": -0.7009418606758118, + "logits/chosen": -2.5496840476989746, + "logits/rejected": -2.0580315589904785, + "logps/chosen": -0.6317678689956665, + "logps/rejected": -0.6753242611885071, + "loss": 1.6452, + "nll_loss": 0.9442570805549622, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -0.6317678689956665, + "rewards/margins": 0.04355642572045326, + "rewards/rejected": -0.6753242611885071, + "step": 75 + }, + { + "epoch": 0.0840446487196323, + "grad_norm": 14.875, + "learning_rate": 1.6666666666666667e-06, + "log_odds_chosen": 0.10804717242717743, + "log_odds_ratio": -0.6780250072479248, + "logits/chosen": -2.371317148208618, + "logits/rejected": -1.9558740854263306, + "logps/chosen": -0.5971282124519348, + "logps/rejected": -0.6553691029548645, + "loss": 1.6518, + "nll_loss": 0.9737834930419922, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -0.5971282124519348, + "rewards/margins": 0.05824087932705879, + "rewards/rejected": -0.6553691029548645, + "step": 80 + }, + { + "epoch": 0.08929743926460933, + "grad_norm": 15.375, + "learning_rate": 1.7708333333333332e-06, + "log_odds_chosen": 0.13051114976406097, + "log_odds_ratio": -0.6608899235725403, + "logits/chosen": -2.441239833831787, + "logits/rejected": -2.080503225326538, + "logps/chosen": -0.5396751165390015, + "logps/rejected": -0.6057919263839722, + "loss": 1.6033, + "nll_loss": 0.9424022436141968, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.5396751165390015, + "rewards/margins": 0.06611678004264832, + "rewards/rejected": -0.6057919263839722, + "step": 85 + }, + { + "epoch": 0.09455022980958634, + "grad_norm": 15.5625, + "learning_rate": 1.8749999999999998e-06, + "log_odds_chosen": 0.19523096084594727, + "log_odds_ratio": -0.6398605108261108, + "logits/chosen": -2.388965606689453, + "logits/rejected": -2.051954507827759, + "logps/chosen": -0.514168381690979, + "logps/rejected": -0.6006937623023987, + "loss": 1.5701, + "nll_loss": 0.9302393794059753, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -0.514168381690979, + "rewards/margins": 0.08652535825967789, + "rewards/rejected": -0.6006937623023987, + "step": 90 + }, + { + "epoch": 0.09980302035456336, + "grad_norm": 10.625, + "learning_rate": 1.9791666666666666e-06, + "log_odds_chosen": 0.12450599670410156, + "log_odds_ratio": -0.6654147505760193, + "logits/chosen": -2.3805699348449707, + "logits/rejected": -2.010688304901123, + "logps/chosen": -0.49114733934402466, + "logps/rejected": -0.5494757890701294, + "loss": 1.5446, + "nll_loss": 0.8791642189025879, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.49114733934402466, + "rewards/margins": 0.05832843855023384, + "rewards/rejected": -0.5494757890701294, + "step": 95 + }, + { + "epoch": 0.10505581089954039, + "grad_norm": 7.8125, + "learning_rate": 1.9998919935516766e-06, + "log_odds_chosen": 0.17239874601364136, + "log_odds_ratio": -0.6507178544998169, + "logits/chosen": -2.2754485607147217, + "logits/rejected": -2.040553569793701, + "logps/chosen": -0.485573947429657, + "logps/rejected": -0.5674648284912109, + "loss": 1.4726, + "nll_loss": 0.8218661546707153, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.485573947429657, + "rewards/margins": 0.08189092576503754, + "rewards/rejected": -0.5674648284912109, + "step": 100 + }, + { + "epoch": 0.1103086014445174, + "grad_norm": 8.3125, + "learning_rate": 1.999453257340926e-06, + "log_odds_chosen": 0.2180129736661911, + "log_odds_ratio": -0.6303091049194336, + "logits/chosen": -2.4427425861358643, + "logits/rejected": -2.181597948074341, + "logps/chosen": -0.4835621416568756, + "logps/rejected": -0.5780085325241089, + "loss": 1.4945, + "nll_loss": 0.8642352223396301, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -0.4835621416568756, + "rewards/margins": 0.09444637596607208, + "rewards/rejected": -0.5780085325241089, + "step": 105 + }, + { + "epoch": 0.11556139198949442, + "grad_norm": 8.0, + "learning_rate": 1.998677188931617e-06, + "log_odds_chosen": 0.27974802255630493, + "log_odds_ratio": -0.6000305414199829, + "logits/chosen": -2.4073500633239746, + "logits/rejected": -2.158104419708252, + "logps/chosen": -0.4692881107330322, + "logps/rejected": -0.5915614366531372, + "loss": 1.5236, + "nll_loss": 0.9235590100288391, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.4692881107330322, + "rewards/margins": 0.12227334082126617, + "rewards/rejected": -0.5915614366531372, + "step": 110 + }, + { + "epoch": 0.12081418253447143, + "grad_norm": 7.9375, + "learning_rate": 1.997564050259824e-06, + "log_odds_chosen": 0.28100112080574036, + "log_odds_ratio": -0.601650595664978, + "logits/chosen": -2.3918166160583496, + "logits/rejected": -2.029897689819336, + "logps/chosen": -0.4723443388938904, + "logps/rejected": -0.5918693542480469, + "loss": 1.5166, + "nll_loss": 0.9149250984191895, + "rewards/accuracies": 0.671875, + "rewards/chosen": -0.4723443388938904, + "rewards/margins": 0.11952495574951172, + "rewards/rejected": -0.5918693542480469, + "step": 115 + }, + { + "epoch": 0.12606697307944845, + "grad_norm": 8.8125, + "learning_rate": 1.996114217028476e-06, + "log_odds_chosen": 0.25655943155288696, + "log_odds_ratio": -0.6146520376205444, + "logits/chosen": -2.470524311065674, + "logits/rejected": -2.134540557861328, + "logps/chosen": -0.477255642414093, + "logps/rejected": -0.5925866961479187, + "loss": 1.5111, + "nll_loss": 0.8964211344718933, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.477255642414093, + "rewards/margins": 0.11533106863498688, + "rewards/rejected": -0.5925866961479187, + "step": 120 + }, + { + "epoch": 0.1313197636244255, + "grad_norm": 8.75, + "learning_rate": 1.994328178580548e-06, + "log_odds_chosen": 0.2803216576576233, + "log_odds_ratio": -0.601326584815979, + "logits/chosen": -2.367903232574463, + "logits/rejected": -2.018990993499756, + "logps/chosen": -0.46639877557754517, + "logps/rejected": -0.5851758718490601, + "loss": 1.481, + "nll_loss": 0.8796539306640625, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.46639877557754517, + "rewards/margins": 0.11877720057964325, + "rewards/rejected": -0.5851758718490601, + "step": 125 + }, + { + "epoch": 0.1365725541694025, + "grad_norm": 8.3125, + "learning_rate": 1.9922065377339033e-06, + "log_odds_chosen": 0.2894327640533447, + "log_odds_ratio": -0.6087297201156616, + "logits/chosen": -2.5040173530578613, + "logits/rejected": -2.2061374187469482, + "logps/chosen": -0.4694454073905945, + "logps/rejected": -0.5906943678855896, + "loss": 1.4968, + "nll_loss": 0.888100266456604, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -0.4694454073905945, + "rewards/margins": 0.12124893814325333, + "rewards/rejected": -0.5906943678855896, + "step": 130 + }, + { + "epoch": 0.14182534471437952, + "grad_norm": 7.375, + "learning_rate": 1.98975001057783e-06, + "log_odds_chosen": 0.30140143632888794, + "log_odds_ratio": -0.5964145660400391, + "logits/chosen": -2.4213032722473145, + "logits/rejected": -2.004279375076294, + "logps/chosen": -0.44823235273361206, + "logps/rejected": -0.583377480506897, + "loss": 1.4442, + "nll_loss": 0.8478012084960938, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.44823235273361206, + "rewards/margins": 0.13514509797096252, + "rewards/rejected": -0.583377480506897, + "step": 135 + }, + { + "epoch": 0.14707813525935653, + "grad_norm": 9.4375, + "learning_rate": 1.986959426231349e-06, + "log_odds_chosen": 0.33596453070640564, + "log_odds_ratio": -0.5885840654373169, + "logits/chosen": -2.471541166305542, + "logits/rejected": -2.1307930946350098, + "logps/chosen": -0.47856172919273376, + "logps/rejected": -0.6193875074386597, + "loss": 1.4974, + "nll_loss": 0.9087700843811035, + "rewards/accuracies": 0.684374988079071, + "rewards/chosen": -0.47856172919273376, + "rewards/margins": 0.14082582294940948, + "rewards/rejected": -0.6193875074386597, + "step": 140 + }, + { + "epoch": 0.15233092580433355, + "grad_norm": 8.0625, + "learning_rate": 1.9838357265633724e-06, + "log_odds_chosen": 0.35230931639671326, + "log_odds_ratio": -0.5799855589866638, + "logits/chosen": -2.4745469093322754, + "logits/rejected": -2.0399346351623535, + "logps/chosen": -0.45584583282470703, + "logps/rejected": -0.6081861257553101, + "loss": 1.4708, + "nll_loss": 0.8907746076583862, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.45584583282470703, + "rewards/margins": 0.15234029293060303, + "rewards/rejected": -0.6081861257553101, + "step": 145 + }, + { + "epoch": 0.15758371634931057, + "grad_norm": 7.59375, + "learning_rate": 1.9803799658748095e-06, + "log_odds_chosen": 0.32377585768699646, + "log_odds_ratio": -0.5951502919197083, + "logits/chosen": -2.3601431846618652, + "logits/rejected": -2.0099222660064697, + "logps/chosen": -0.46314555406570435, + "logps/rejected": -0.6009119153022766, + "loss": 1.4988, + "nll_loss": 0.9036917686462402, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.46314555406570435, + "rewards/margins": 0.13776634633541107, + "rewards/rejected": -0.6009119153022766, + "step": 150 + }, + { + "epoch": 0.16283650689428758, + "grad_norm": 9.75, + "learning_rate": 1.9765933105427177e-06, + "log_odds_chosen": 0.29054537415504456, + "log_odds_ratio": -0.6080166101455688, + "logits/chosen": -2.429213762283325, + "logits/rejected": -2.1127424240112305, + "logps/chosen": -0.48361191153526306, + "logps/rejected": -0.6187745928764343, + "loss": 1.5015, + "nll_loss": 0.8934603929519653, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.48361191153526306, + "rewards/margins": 0.13516271114349365, + "rewards/rejected": -0.6187745928764343, + "step": 155 + }, + { + "epoch": 0.1680892974392646, + "grad_norm": 14.0625, + "learning_rate": 1.972477038626636e-06, + "log_odds_chosen": 0.27817827463150024, + "log_odds_ratio": -0.6112152338027954, + "logits/chosen": -2.4246554374694824, + "logits/rejected": -2.0224289894104004, + "logps/chosen": -0.49589210748672485, + "logps/rejected": -0.6248718500137329, + "loss": 1.4978, + "nll_loss": 0.886622428894043, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.49589210748672485, + "rewards/margins": 0.12897971272468567, + "rewards/rejected": -0.6248718500137329, + "step": 160 + }, + { + "epoch": 0.17334208798424164, + "grad_norm": 10.875, + "learning_rate": 1.9680325394372147e-06, + "log_odds_chosen": 0.35008612275123596, + "log_odds_ratio": -0.5786347389221191, + "logits/chosen": -2.506772756576538, + "logits/rejected": -2.057096004486084, + "logps/chosen": -0.46079978346824646, + "logps/rejected": -0.6112517714500427, + "loss": 1.4896, + "nll_loss": 0.9109176397323608, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.46079978346824646, + "rewards/margins": 0.15045206248760223, + "rewards/rejected": -0.6112517714500427, + "step": 165 + }, + { + "epoch": 0.17859487852921865, + "grad_norm": 15.1875, + "learning_rate": 1.9632613130673015e-06, + "log_odds_chosen": 0.33634597063064575, + "log_odds_ratio": -0.589142918586731, + "logits/chosen": -2.467883348464966, + "logits/rejected": -1.9834989309310913, + "logps/chosen": -0.4864015579223633, + "logps/rejected": -0.6304683089256287, + "loss": 1.4988, + "nll_loss": 0.9096533060073853, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -0.4864015579223633, + "rewards/margins": 0.144066721200943, + "rewards/rejected": -0.6304683089256287, + "step": 170 + }, + { + "epoch": 0.18384766907419567, + "grad_norm": 26.75, + "learning_rate": 1.9581649698856357e-06, + "log_odds_chosen": 0.351374089717865, + "log_odds_ratio": -0.5786073207855225, + "logits/chosen": -2.3902525901794434, + "logits/rejected": -2.0138325691223145, + "logps/chosen": -0.45923271775245667, + "logps/rejected": -0.6129686236381531, + "loss": 1.477, + "nll_loss": 0.8983781933784485, + "rewards/accuracies": 0.715624988079071, + "rewards/chosen": -0.45923271775245667, + "rewards/margins": 0.1537359207868576, + "rewards/rejected": -0.6129686236381531, + "step": 175 + }, + { + "epoch": 0.18910045961917268, + "grad_norm": 8.5625, + "learning_rate": 1.952745229993319e-06, + "log_odds_chosen": 0.3817608952522278, + "log_odds_ratio": -0.5729137659072876, + "logits/chosen": -2.52931547164917, + "logits/rejected": -2.1916394233703613, + "logps/chosen": -0.48729705810546875, + "logps/rejected": -0.6591955423355103, + "loss": 1.4891, + "nll_loss": 0.9161707758903503, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.48729705810546875, + "rewards/margins": 0.1718985140323639, + "rewards/rejected": -0.6591955423355103, + "step": 180 + }, + { + "epoch": 0.1943532501641497, + "grad_norm": 8.5, + "learning_rate": 1.947003922643256e-06, + "log_odds_chosen": 0.379459023475647, + "log_odds_ratio": -0.5737109184265137, + "logits/chosen": -2.282898426055908, + "logits/rejected": -1.9805419445037842, + "logps/chosen": -0.47503146529197693, + "logps/rejected": -0.6488234400749207, + "loss": 1.4454, + "nll_loss": 0.8717378377914429, + "rewards/accuracies": 0.721875011920929, + "rewards/chosen": -0.47503146529197693, + "rewards/margins": 0.1737920045852661, + "rewards/rejected": -0.6488234400749207, + "step": 185 + }, + { + "epoch": 0.19960604070912671, + "grad_norm": 10.125, + "learning_rate": 1.9409429856227482e-06, + "log_odds_chosen": 0.4121369421482086, + "log_odds_ratio": -0.5561366081237793, + "logits/chosen": -2.488356113433838, + "logits/rejected": -2.0776686668395996, + "logps/chosen": -0.4683772921562195, + "logps/rejected": -0.647982656955719, + "loss": 1.4436, + "nll_loss": 0.8874515295028687, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -0.4683772921562195, + "rewards/margins": 0.17960533499717712, + "rewards/rejected": -0.647982656955719, + "step": 190 + }, + { + "epoch": 0.20485883125410373, + "grad_norm": 11.5, + "learning_rate": 1.934564464599461e-06, + "log_odds_chosen": 0.32919231057167053, + "log_odds_ratio": -0.5908551812171936, + "logits/chosen": -2.501392364501953, + "logits/rejected": -2.0592591762542725, + "logps/chosen": -0.49434512853622437, + "logps/rejected": -0.6509113311767578, + "loss": 1.4187, + "nll_loss": 0.8278582692146301, + "rewards/accuracies": 0.6968749761581421, + "rewards/chosen": -0.49434512853622437, + "rewards/margins": 0.15656621754169464, + "rewards/rejected": -0.6509113311767578, + "step": 195 + }, + { + "epoch": 0.21011162179908077, + "grad_norm": 12.8125, + "learning_rate": 1.927870512430972e-06, + "log_odds_chosen": 0.42371082305908203, + "log_odds_ratio": -0.5525480508804321, + "logits/chosen": -2.4069533348083496, + "logits/rejected": -2.019406795501709, + "logps/chosen": -0.4768436551094055, + "logps/rejected": -0.6629732251167297, + "loss": 1.4572, + "nll_loss": 0.9046151041984558, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.4768436551094055, + "rewards/margins": 0.18612954020500183, + "rewards/rejected": -0.6629732251167297, + "step": 200 + }, + { + "epoch": 0.2153644123440578, + "grad_norm": 9.0, + "learning_rate": 1.9208633884381526e-06, + "log_odds_chosen": 0.42966872453689575, + "log_odds_ratio": -0.5522044897079468, + "logits/chosen": -2.430342197418213, + "logits/rejected": -2.0743634700775146, + "logps/chosen": -0.4722970426082611, + "logps/rejected": -0.6621736884117126, + "loss": 1.4295, + "nll_loss": 0.8772872090339661, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.4722970426082611, + "rewards/margins": 0.18987664580345154, + "rewards/rejected": -0.6621736884117126, + "step": 205 + }, + { + "epoch": 0.2206172028890348, + "grad_norm": 9.1875, + "learning_rate": 1.9135454576426007e-06, + "log_odds_chosen": 0.40302562713623047, + "log_odds_ratio": -0.5604028105735779, + "logits/chosen": -2.412562847137451, + "logits/rejected": -2.0246427059173584, + "logps/chosen": -0.4761424660682678, + "logps/rejected": -0.661251425743103, + "loss": 1.3993, + "nll_loss": 0.8388580083847046, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.4761424660682678, + "rewards/margins": 0.18510892987251282, + "rewards/rejected": -0.661251425743103, + "step": 210 + }, + { + "epoch": 0.22586999343401182, + "grad_norm": 7.875, + "learning_rate": 1.905919189968415e-06, + "log_odds_chosen": 0.4606761932373047, + "log_odds_ratio": -0.5445691347122192, + "logits/chosen": -2.4419312477111816, + "logits/rejected": -2.030771493911743, + "logps/chosen": -0.4771277904510498, + "logps/rejected": -0.6932464838027954, + "loss": 1.4377, + "nll_loss": 0.8931263089179993, + "rewards/accuracies": 0.734375, + "rewards/chosen": -0.4771277904510498, + "rewards/margins": 0.21611860394477844, + "rewards/rejected": -0.6932464838027954, + "step": 215 + }, + { + "epoch": 0.23112278397898883, + "grad_norm": 7.78125, + "learning_rate": 1.897987159408548e-06, + "log_odds_chosen": 0.4278109073638916, + "log_odds_ratio": -0.5563892722129822, + "logits/chosen": -2.4070868492126465, + "logits/rejected": -2.033133029937744, + "logps/chosen": -0.4777792990207672, + "logps/rejected": -0.6746242642402649, + "loss": 1.3836, + "nll_loss": 0.827177882194519, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.4777792990207672, + "rewards/margins": 0.19684496521949768, + "rewards/rejected": -0.6746242642402649, + "step": 220 + }, + { + "epoch": 0.23637557452396585, + "grad_norm": 9.4375, + "learning_rate": 1.8897520431560433e-06, + "log_odds_chosen": 0.39412638545036316, + "log_odds_ratio": -0.5616167187690735, + "logits/chosen": -2.437281608581543, + "logits/rejected": -2.0233240127563477, + "logps/chosen": -0.49209141731262207, + "logps/rejected": -0.670540988445282, + "loss": 1.3984, + "nll_loss": 0.8367835879325867, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.49209141731262207, + "rewards/margins": 0.17844951152801514, + "rewards/rejected": -0.670540988445282, + "step": 225 + }, + { + "epoch": 0.24162836506894286, + "grad_norm": 9.4375, + "learning_rate": 1.8812166207004366e-06, + "log_odds_chosen": 0.45934948325157166, + "log_odds_ratio": -0.5536540746688843, + "logits/chosen": -2.4575705528259277, + "logits/rejected": -2.0787205696105957, + "logps/chosen": -0.4777277112007141, + "logps/rejected": -0.6928449869155884, + "loss": 1.3871, + "nll_loss": 0.8334070444107056, + "rewards/accuracies": 0.7593749761581421, + "rewards/chosen": -0.4777277112007141, + "rewards/margins": 0.21511724591255188, + "rewards/rejected": -0.6928449869155884, + "step": 230 + }, + { + "epoch": 0.2468811556139199, + "grad_norm": 7.71875, + "learning_rate": 1.8723837728896337e-06, + "log_odds_chosen": 0.45329445600509644, + "log_odds_ratio": -0.5616171360015869, + "logits/chosen": -2.522167682647705, + "logits/rejected": -2.1475300788879395, + "logps/chosen": -0.4945332407951355, + "logps/rejected": -0.7140644788742065, + "loss": 1.4402, + "nll_loss": 0.878614068031311, + "rewards/accuracies": 0.7093750238418579, + "rewards/chosen": -0.4945332407951355, + "rewards/margins": 0.21953122317790985, + "rewards/rejected": -0.7140644788742065, + "step": 235 + }, + { + "epoch": 0.2521339461588969, + "grad_norm": 7.75, + "learning_rate": 1.8632564809575738e-06, + "log_odds_chosen": 0.4688095152378082, + "log_odds_ratio": -0.5438790917396545, + "logits/chosen": -2.512554168701172, + "logits/rejected": -2.105734348297119, + "logps/chosen": -0.48634210228919983, + "logps/rejected": -0.7048304677009583, + "loss": 1.4387, + "nll_loss": 0.8948429226875305, + "rewards/accuracies": 0.753125011920929, + "rewards/chosen": -0.48634210228919983, + "rewards/margins": 0.21848826110363007, + "rewards/rejected": -0.7048304677009583, + "step": 240 + }, + { + "epoch": 0.2573867367038739, + "grad_norm": 9.9375, + "learning_rate": 1.8538378255180138e-06, + "log_odds_chosen": 0.488097608089447, + "log_odds_ratio": -0.5403500199317932, + "logits/chosen": -2.3577160835266113, + "logits/rejected": -2.0601189136505127, + "logps/chosen": -0.5090717077255249, + "logps/rejected": -0.7453780174255371, + "loss": 1.4193, + "nll_loss": 0.878923773765564, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.5090717077255249, + "rewards/margins": 0.23630623519420624, + "rewards/rejected": -0.7453780174255371, + "step": 245 + }, + { + "epoch": 0.262639527248851, + "grad_norm": 8.4375, + "learning_rate": 1.8441309855247707e-06, + "log_odds_chosen": 0.6032781004905701, + "log_odds_ratio": -0.5000559091567993, + "logits/chosen": -2.403979539871216, + "logits/rejected": -2.1050338745117188, + "logps/chosen": -0.5098007917404175, + "logps/rejected": -0.8097056150436401, + "loss": 1.4018, + "nll_loss": 0.9017453193664551, + "rewards/accuracies": 0.796875, + "rewards/chosen": -0.5098007917404175, + "rewards/margins": 0.29990485310554504, + "rewards/rejected": -0.8097056150436401, + "step": 250 + }, + { + "epoch": 0.267892317793828, + "grad_norm": 7.90625, + "learning_rate": 1.83413923719877e-06, + "log_odds_chosen": 0.5410558581352234, + "log_odds_ratio": -0.5238425135612488, + "logits/chosen": -2.42203688621521, + "logits/rejected": -2.095054864883423, + "logps/chosen": -0.49079209566116333, + "logps/rejected": -0.763100266456604, + "loss": 1.3797, + "nll_loss": 0.8558791875839233, + "rewards/accuracies": 0.778124988079071, + "rewards/chosen": -0.49079209566116333, + "rewards/margins": 0.2723081707954407, + "rewards/rejected": -0.763100266456604, + "step": 255 + }, + { + "epoch": 0.273145108338805, + "grad_norm": 10.1875, + "learning_rate": 1.8238659529222668e-06, + "log_odds_chosen": 0.5387502908706665, + "log_odds_ratio": -0.5273549556732178, + "logits/chosen": -2.458590269088745, + "logits/rejected": -2.1467177867889404, + "logps/chosen": -0.5123028755187988, + "logps/rejected": -0.781539797782898, + "loss": 1.4312, + "nll_loss": 0.9038845300674438, + "rewards/accuracies": 0.765625, + "rewards/chosen": -0.5123028755187988, + "rewards/margins": 0.2692369818687439, + "rewards/rejected": -0.781539797782898, + "step": 260 + }, + { + "epoch": 0.278397898883782, + "grad_norm": 12.3125, + "learning_rate": 1.8133146001006117e-06, + "log_odds_chosen": 0.585041880607605, + "log_odds_ratio": -0.5241442322731018, + "logits/chosen": -2.434957504272461, + "logits/rejected": -2.08172345161438, + "logps/chosen": -0.5419186353683472, + "logps/rejected": -0.8563257455825806, + "loss": 1.4995, + "nll_loss": 0.9753583669662476, + "rewards/accuracies": 0.7593749761581421, + "rewards/chosen": -0.5419186353683472, + "rewards/margins": 0.314407080411911, + "rewards/rejected": -0.8563257455825806, + "step": 265 + }, + { + "epoch": 0.28365068942875904, + "grad_norm": 12.8125, + "learning_rate": 1.8024887399919408e-06, + "log_odds_chosen": 0.686429500579834, + "log_odds_ratio": -0.49835652112960815, + "logits/chosen": -2.493675947189331, + "logits/rejected": -2.192899465560913, + "logps/chosen": -0.533765435218811, + "logps/rejected": -0.9061405062675476, + "loss": 1.4053, + "nll_loss": 0.9069935083389282, + "rewards/accuracies": 0.765625, + "rewards/chosen": -0.533765435218811, + "rewards/margins": 0.3723750710487366, + "rewards/rejected": -0.9061405062675476, + "step": 270 + }, + { + "epoch": 0.28890347997373605, + "grad_norm": 11.5, + "learning_rate": 1.7913920265051946e-06, + "log_odds_chosen": 0.7045778036117554, + "log_odds_ratio": -0.49370041489601135, + "logits/chosen": -2.4899590015411377, + "logits/rejected": -2.1618402004241943, + "logps/chosen": -0.5214771032333374, + "logps/rejected": -0.9220815896987915, + "loss": 1.4176, + "nll_loss": 0.92388916015625, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -0.5214771032333374, + "rewards/margins": 0.4006044268608093, + "rewards/rejected": -0.9220815896987915, + "step": 275 + }, + { + "epoch": 0.29415627051871307, + "grad_norm": 9.5625, + "learning_rate": 1.780028204966859e-06, + "log_odds_chosen": 0.6810405254364014, + "log_odds_ratio": -0.4989449381828308, + "logits/chosen": -2.3327696323394775, + "logits/rejected": -2.0119078159332275, + "logps/chosen": -0.5228633880615234, + "logps/rejected": -0.8827990293502808, + "loss": 1.405, + "nll_loss": 0.9060786962509155, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.5228633880615234, + "rewards/margins": 0.3599356710910797, + "rewards/rejected": -0.8827990293502808, + "step": 280 + }, + { + "epoch": 0.2994090610636901, + "grad_norm": 10.8125, + "learning_rate": 1.768401110856859e-06, + "log_odds_chosen": 0.7910138964653015, + "log_odds_ratio": -0.47219276428222656, + "logits/chosen": -2.465003252029419, + "logits/rejected": -2.085939407348633, + "logps/chosen": -0.5146728754043579, + "logps/rejected": -0.9470351934432983, + "loss": 1.3015, + "nll_loss": 0.8292847871780396, + "rewards/accuracies": 0.8062499761581421, + "rewards/chosen": -0.5146728754043579, + "rewards/margins": 0.43236231803894043, + "rewards/rejected": -0.9470351934432983, + "step": 285 + }, + { + "epoch": 0.3046618516086671, + "grad_norm": 13.6875, + "learning_rate": 1.7565146685140167e-06, + "log_odds_chosen": 0.771044135093689, + "log_odds_ratio": -0.4853692948818207, + "logits/chosen": -2.4471678733825684, + "logits/rejected": -2.1012349128723145, + "logps/chosen": -0.5462040901184082, + "logps/rejected": -0.9886453747749329, + "loss": 1.3976, + "nll_loss": 0.9122269749641418, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.5462040901184082, + "rewards/margins": 0.4424411654472351, + "rewards/rejected": -0.9886453747749329, + "step": 290 + }, + { + "epoch": 0.3099146421536441, + "grad_norm": 12.0, + "learning_rate": 1.7443728898115224e-06, + "log_odds_chosen": 0.6316434144973755, + "log_odds_ratio": -0.5107887983322144, + "logits/chosen": -2.432225465774536, + "logits/rejected": -2.0828986167907715, + "logps/chosen": -0.5212147235870361, + "logps/rejected": -0.8626314997673035, + "loss": 1.3532, + "nll_loss": 0.8424150347709656, + "rewards/accuracies": 0.7406250238418579, + "rewards/chosen": -0.5212147235870361, + "rewards/margins": 0.34141671657562256, + "rewards/rejected": -0.8626314997673035, + "step": 295 + }, + { + "epoch": 0.31516743269862113, + "grad_norm": 19.875, + "learning_rate": 1.7319798728028616e-06, + "log_odds_chosen": 0.8003711700439453, + "log_odds_ratio": -0.4749313294887543, + "logits/chosen": -2.4634110927581787, + "logits/rejected": -2.111607313156128, + "logps/chosen": -0.5615866780281067, + "logps/rejected": -1.0098183155059814, + "loss": 1.4088, + "nll_loss": 0.9338866472244263, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -0.5615866780281067, + "rewards/margins": 0.44823163747787476, + "rewards/rejected": -1.0098183155059814, + "step": 300 + }, + { + "epoch": 0.32042022324359815, + "grad_norm": 17.25, + "learning_rate": 1.719339800338651e-06, + "log_odds_chosen": 0.8279815912246704, + "log_odds_ratio": -0.4675443172454834, + "logits/chosen": -2.5601465702056885, + "logits/rejected": -2.2116811275482178, + "logps/chosen": -0.5433454513549805, + "logps/rejected": -1.0226011276245117, + "loss": 1.3768, + "nll_loss": 0.9092954397201538, + "rewards/accuracies": 0.8031250238418579, + "rewards/chosen": -0.5433454513549805, + "rewards/margins": 0.479255735874176, + "rewards/rejected": -1.0226011276245117, + "step": 305 + }, + { + "epoch": 0.32567301378857516, + "grad_norm": 15.3125, + "learning_rate": 1.7064569386548585e-06, + "log_odds_chosen": 0.859075665473938, + "log_odds_ratio": -0.4543831944465637, + "logits/chosen": -2.531367301940918, + "logits/rejected": -2.2318122386932373, + "logps/chosen": -0.5256025195121765, + "logps/rejected": -1.0284937620162964, + "loss": 1.3533, + "nll_loss": 0.8989534378051758, + "rewards/accuracies": 0.8031250238418579, + "rewards/chosen": -0.5256025195121765, + "rewards/margins": 0.5028911828994751, + "rewards/rejected": -1.0284937620162964, + "step": 310 + }, + { + "epoch": 0.3309258043335522, + "grad_norm": 16.625, + "learning_rate": 1.6933356359328754e-06, + "log_odds_chosen": 0.7117995619773865, + "log_odds_ratio": -0.4905334413051605, + "logits/chosen": -2.5292108058929443, + "logits/rejected": -2.1856768131256104, + "logps/chosen": -0.5284509658813477, + "logps/rejected": -0.915407657623291, + "loss": 1.371, + "nll_loss": 0.8804505467414856, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.5284509658813477, + "rewards/margins": 0.3869567811489105, + "rewards/rejected": -0.915407657623291, + "step": 315 + }, + { + "epoch": 0.3361785948785292, + "grad_norm": 17.875, + "learning_rate": 1.679980320831934e-06, + "log_odds_chosen": 0.7291110754013062, + "log_odds_ratio": -0.4787971079349518, + "logits/chosen": -2.4682400226593018, + "logits/rejected": -2.2220332622528076, + "logps/chosen": -0.5479062795639038, + "logps/rejected": -0.9491809606552124, + "loss": 1.3781, + "nll_loss": 0.8992602229118347, + "rewards/accuracies": 0.8062499761581421, + "rewards/chosen": -0.5479062795639038, + "rewards/margins": 0.40127477049827576, + "rewards/rejected": -0.9491809606552124, + "step": 320 + }, + { + "epoch": 0.34143138542350626, + "grad_norm": 32.0, + "learning_rate": 1.6663955009943602e-06, + "log_odds_chosen": 0.9077841639518738, + "log_odds_ratio": -0.4515516757965088, + "logits/chosen": -2.4324584007263184, + "logits/rejected": -2.178394317626953, + "logps/chosen": -0.5766757726669312, + "logps/rejected": -1.1069071292877197, + "loss": 1.374, + "nll_loss": 0.9224408268928528, + "rewards/accuracies": 0.828125, + "rewards/chosen": -0.5766757726669312, + "rewards/margins": 0.530231237411499, + "rewards/rejected": -1.1069071292877197, + "step": 325 + }, + { + "epoch": 0.3466841759684833, + "grad_norm": 23.0, + "learning_rate": 1.6525857615241685e-06, + "log_odds_chosen": 0.733812689781189, + "log_odds_ratio": -0.4906436800956726, + "logits/chosen": -2.523135185241699, + "logits/rejected": -2.1835999488830566, + "logps/chosen": -0.5466452836990356, + "logps/rejected": -0.9662971496582031, + "loss": 1.4195, + "nll_loss": 0.9288629293441772, + "rewards/accuracies": 0.8031250238418579, + "rewards/chosen": -0.5466452836990356, + "rewards/margins": 0.4196518361568451, + "rewards/rejected": -0.9662971496582031, + "step": 330 + }, + { + "epoch": 0.3519369665134603, + "grad_norm": 21.875, + "learning_rate": 1.6385557634395136e-06, + "log_odds_chosen": 0.7822979688644409, + "log_odds_ratio": -0.47422999143600464, + "logits/chosen": -2.4535679817199707, + "logits/rejected": -2.2028393745422363, + "logps/chosen": -0.5340802669525146, + "logps/rejected": -0.9806568026542664, + "loss": 1.3555, + "nll_loss": 0.881304144859314, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.5340802669525146, + "rewards/margins": 0.44657665491104126, + "rewards/rejected": -0.9806568026542664, + "step": 335 + }, + { + "epoch": 0.3571897570584373, + "grad_norm": 21.5, + "learning_rate": 1.624310242099518e-06, + "log_odds_chosen": 0.7664231061935425, + "log_odds_ratio": -0.48080235719680786, + "logits/chosen": -2.453505039215088, + "logits/rejected": -2.18292498588562, + "logps/chosen": -0.5327800512313843, + "logps/rejected": -0.9523041844367981, + "loss": 1.4089, + "nll_loss": 0.9281209111213684, + "rewards/accuracies": 0.796875, + "rewards/chosen": -0.5327800512313843, + "rewards/margins": 0.4195241332054138, + "rewards/rejected": -0.9523041844367981, + "step": 340 + }, + { + "epoch": 0.3624425476034143, + "grad_norm": 24.5, + "learning_rate": 1.609854005606009e-06, + "log_odds_chosen": 0.9470375776290894, + "log_odds_ratio": -0.4272763729095459, + "logits/chosen": -2.5423166751861572, + "logits/rejected": -2.210846424102783, + "logps/chosen": -0.5365777015686035, + "logps/rejected": -1.076774001121521, + "loss": 1.3329, + "nll_loss": 0.9056490063667297, + "rewards/accuracies": 0.815625011920929, + "rewards/chosen": -0.5365777015686035, + "rewards/margins": 0.5401962995529175, + "rewards/rejected": -1.076774001121521, + "step": 345 + }, + { + "epoch": 0.36769533814839134, + "grad_norm": 15.125, + "learning_rate": 1.5951919331807048e-06, + "log_odds_chosen": 0.9901137351989746, + "log_odds_ratio": -0.43201208114624023, + "logits/chosen": -2.3910915851593018, + "logits/rejected": -2.085310935974121, + "logps/chosen": -0.5491678714752197, + "logps/rejected": -1.141390085220337, + "loss": 1.3711, + "nll_loss": 0.9390678405761719, + "rewards/accuracies": 0.796875, + "rewards/chosen": -0.5491678714752197, + "rewards/margins": 0.5922220945358276, + "rewards/rejected": -1.141390085220337, + "step": 350 + }, + { + "epoch": 0.37294812869336835, + "grad_norm": 19.25, + "learning_rate": 1.5803289735183949e-06, + "log_odds_chosen": 0.9613128900527954, + "log_odds_ratio": -0.43703293800354004, + "logits/chosen": -2.404744863510132, + "logits/rejected": -2.0907814502716064, + "logps/chosen": -0.5635210871696472, + "logps/rejected": -1.1492810249328613, + "loss": 1.3534, + "nll_loss": 0.9164144396781921, + "rewards/accuracies": 0.831250011920929, + "rewards/chosen": -0.5635210871696472, + "rewards/margins": 0.5857599377632141, + "rewards/rejected": -1.1492810249328613, + "step": 355 + }, + { + "epoch": 0.37820091923834537, + "grad_norm": 32.25, + "learning_rate": 1.5652701431166717e-06, + "log_odds_chosen": 0.9359542727470398, + "log_odds_ratio": -0.4396037459373474, + "logits/chosen": -2.4650635719299316, + "logits/rejected": -2.122915267944336, + "logps/chosen": -0.5267240405082703, + "logps/rejected": -1.0681325197219849, + "loss": 1.3381, + "nll_loss": 0.8984518051147461, + "rewards/accuracies": 0.831250011920929, + "rewards/chosen": -0.5267240405082703, + "rewards/margins": 0.5414084792137146, + "rewards/rejected": -1.0681325197219849, + "step": 360 + }, + { + "epoch": 0.3834537097833224, + "grad_norm": 22.5, + "learning_rate": 1.550020524582781e-06, + "log_odds_chosen": 0.9607855677604675, + "log_odds_ratio": -0.4296341836452484, + "logits/chosen": -2.556321620941162, + "logits/rejected": -2.233931064605713, + "logps/chosen": -0.5581452250480652, + "logps/rejected": -1.131134033203125, + "loss": 1.2919, + "nll_loss": 0.8622277975082397, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5581452250480652, + "rewards/margins": 0.5729888677597046, + "rewards/rejected": -1.131134033203125, + "step": 365 + }, + { + "epoch": 0.3887065003282994, + "grad_norm": 20.375, + "learning_rate": 1.5345852649181553e-06, + "log_odds_chosen": 0.9939554333686829, + "log_odds_ratio": -0.4331156313419342, + "logits/chosen": -2.4889018535614014, + "logits/rejected": -2.2245144844055176, + "logps/chosen": -0.5625091791152954, + "logps/rejected": -1.159073829650879, + "loss": 1.3688, + "nll_loss": 0.9356663823127747, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5625091791152954, + "rewards/margins": 0.5965645909309387, + "rewards/rejected": -1.159073829650879, + "step": 370 + }, + { + "epoch": 0.3939592908732764, + "grad_norm": 23.625, + "learning_rate": 1.5189695737812151e-06, + "log_odds_chosen": 1.057094931602478, + "log_odds_ratio": -0.4173505902290344, + "logits/chosen": -2.63775634765625, + "logits/rejected": -2.2736358642578125, + "logps/chosen": -0.5382205843925476, + "logps/rejected": -1.1550116539001465, + "loss": 1.3662, + "nll_loss": 0.9488565325737, + "rewards/accuracies": 0.831250011920929, + "rewards/chosen": -0.5382205843925476, + "rewards/margins": 0.6167910099029541, + "rewards/rejected": -1.1550116539001465, + "step": 375 + }, + { + "epoch": 0.39921208141825343, + "grad_norm": 20.375, + "learning_rate": 1.5031787217290216e-06, + "log_odds_chosen": 1.2109272480010986, + "log_odds_ratio": -0.40476536750793457, + "logits/chosen": -2.441784143447876, + "logits/rejected": -2.141080856323242, + "logps/chosen": -0.5574549436569214, + "logps/rejected": -1.3256219625473022, + "loss": 1.3395, + "nll_loss": 0.9347711801528931, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.5574549436569214, + "rewards/margins": 0.7681670188903809, + "rewards/rejected": -1.3256219625473022, + "step": 380 + }, + { + "epoch": 0.40446487196323044, + "grad_norm": 22.125, + "learning_rate": 1.487218038438377e-06, + "log_odds_chosen": 1.0492345094680786, + "log_odds_ratio": -0.41920414566993713, + "logits/chosen": -2.4877960681915283, + "logits/rejected": -2.2220120429992676, + "logps/chosen": -0.5476792454719543, + "logps/rejected": -1.1791220903396606, + "loss": 1.3255, + "nll_loss": 0.9063073992729187, + "rewards/accuracies": 0.8218749761581421, + "rewards/chosen": -0.5476792454719543, + "rewards/margins": 0.6314427256584167, + "rewards/rejected": -1.1791220903396606, + "step": 385 + }, + { + "epoch": 0.40971766250820746, + "grad_norm": 52.5, + "learning_rate": 1.4710929109069672e-06, + "log_odds_chosen": 1.1698648929595947, + "log_odds_ratio": -0.4003461003303528, + "logits/chosen": -2.450030565261841, + "logits/rejected": -2.1449716091156006, + "logps/chosen": -0.5605112314224243, + "logps/rejected": -1.2878248691558838, + "loss": 1.3523, + "nll_loss": 0.9519191980361938, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5605112314224243, + "rewards/margins": 0.7273136377334595, + "rewards/rejected": -1.2878248691558838, + "step": 390 + }, + { + "epoch": 0.41497045305318453, + "grad_norm": 23.75, + "learning_rate": 1.4548087816351614e-06, + "log_odds_chosen": 1.1297777891159058, + "log_odds_ratio": -0.41146859526634216, + "logits/chosen": -2.514195442199707, + "logits/rejected": -2.1877148151397705, + "logps/chosen": -0.5281041860580444, + "logps/rejected": -1.2085294723510742, + "loss": 1.2817, + "nll_loss": 0.8702155947685242, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.5281041860580444, + "rewards/margins": 0.6804252862930298, + "rewards/rejected": -1.2085294723510742, + "step": 395 + }, + { + "epoch": 0.42022324359816154, + "grad_norm": 68.0, + "learning_rate": 1.4383711467890773e-06, + "log_odds_chosen": 1.1593742370605469, + "log_odds_ratio": -0.4072793424129486, + "logits/chosen": -2.410384178161621, + "logits/rejected": -2.1880173683166504, + "logps/chosen": -0.5577239990234375, + "logps/rejected": -1.2925007343292236, + "loss": 1.2823, + "nll_loss": 0.8749955892562866, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -0.5577239990234375, + "rewards/margins": 0.7347767353057861, + "rewards/rejected": -1.2925007343292236, + "step": 400 + }, + { + "epoch": 0.42547603414313856, + "grad_norm": 26.75, + "learning_rate": 1.4217855543455323e-06, + "log_odds_chosen": 1.0840833187103271, + "log_odds_ratio": -0.4106718599796295, + "logits/chosen": -2.384483575820923, + "logits/rejected": -2.11120343208313, + "logps/chosen": -0.5574430227279663, + "logps/rejected": -1.2079960107803345, + "loss": 1.3143, + "nll_loss": 0.9036461710929871, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -0.5574430227279663, + "rewards/margins": 0.6505529880523682, + "rewards/rejected": -1.2079960107803345, + "step": 405 + }, + { + "epoch": 0.4307288246881156, + "grad_norm": 22.625, + "learning_rate": 1.4050576022195082e-06, + "log_odds_chosen": 0.8836471438407898, + "log_odds_ratio": -0.4627167582511902, + "logits/chosen": -2.4845831394195557, + "logits/rejected": -2.3066840171813965, + "logps/chosen": -0.5467715263366699, + "logps/rejected": -1.0581128597259521, + "loss": 1.3641, + "nll_loss": 0.9014018774032593, + "rewards/accuracies": 0.796875, + "rewards/chosen": -0.5467715263366699, + "rewards/margins": 0.5113412141799927, + "rewards/rejected": -1.0581128597259521, + "step": 410 + }, + { + "epoch": 0.4359816152330926, + "grad_norm": 34.0, + "learning_rate": 1.3881929363747626e-06, + "log_odds_chosen": 1.0594258308410645, + "log_odds_ratio": -0.4148578643798828, + "logits/chosen": -2.3405816555023193, + "logits/rejected": -2.115149974822998, + "logps/chosen": -0.5290128588676453, + "logps/rejected": -1.1592894792556763, + "loss": 1.3394, + "nll_loss": 0.9245734214782715, + "rewards/accuracies": 0.831250011920929, + "rewards/chosen": -0.5290128588676453, + "rewards/margins": 0.6302765607833862, + "rewards/rejected": -1.1592894792556763, + "step": 415 + }, + { + "epoch": 0.4412344057780696, + "grad_norm": 47.0, + "learning_rate": 1.3711972489182206e-06, + "log_odds_chosen": 1.4167802333831787, + "log_odds_ratio": -0.3603227734565735, + "logits/chosen": -2.4658875465393066, + "logits/rejected": -2.18940806388855, + "logps/chosen": -0.5862340331077576, + "logps/rejected": -1.5004864931106567, + "loss": 1.305, + "nll_loss": 0.9447038769721985, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.5862340331077576, + "rewards/margins": 0.9142524003982544, + "rewards/rejected": -1.5004864931106567, + "step": 420 + }, + { + "epoch": 0.4464871963230466, + "grad_norm": 19.75, + "learning_rate": 1.3540762761787936e-06, + "log_odds_chosen": 1.2667293548583984, + "log_odds_ratio": -0.3922019898891449, + "logits/chosen": -2.449897289276123, + "logits/rejected": -2.1496291160583496, + "logps/chosen": -0.5754435658454895, + "logps/rejected": -1.3866373300552368, + "loss": 1.2676, + "nll_loss": 0.8754428625106812, + "rewards/accuracies": 0.859375, + "rewards/chosen": -0.5754435658454895, + "rewards/margins": 0.8111938238143921, + "rewards/rejected": -1.3866373300552368, + "step": 425 + }, + { + "epoch": 0.45173998686802364, + "grad_norm": 39.0, + "learning_rate": 1.3368357967712725e-06, + "log_odds_chosen": 1.156019687652588, + "log_odds_ratio": -0.395340234041214, + "logits/chosen": -2.553677797317505, + "logits/rejected": -2.2673325538635254, + "logps/chosen": -0.5371165871620178, + "logps/rejected": -1.2541286945343018, + "loss": 1.3117, + "nll_loss": 0.9164005517959595, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5371165871620178, + "rewards/margins": 0.7170120477676392, + "rewards/rejected": -1.2541286945343018, + "step": 430 + }, + { + "epoch": 0.45699277741300065, + "grad_norm": 44.25, + "learning_rate": 1.3194816296459482e-06, + "log_odds_chosen": 1.1215965747833252, + "log_odds_ratio": -0.40178972482681274, + "logits/chosen": -2.4841268062591553, + "logits/rejected": -2.2464358806610107, + "logps/chosen": -0.6227961182594299, + "logps/rejected": -1.3194401264190674, + "loss": 1.3687, + "nll_loss": 0.9668703079223633, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.6227961182594299, + "rewards/margins": 0.6966440081596375, + "rewards/rejected": -1.3194401264190674, + "step": 435 + }, + { + "epoch": 0.46224556795797767, + "grad_norm": 30.625, + "learning_rate": 1.302019632124619e-06, + "log_odds_chosen": 1.4459072351455688, + "log_odds_ratio": -0.3312341868877411, + "logits/chosen": -2.497469902038574, + "logits/rejected": -2.215177297592163, + "logps/chosen": -0.5155361294746399, + "logps/rejected": -1.4294028282165527, + "loss": 1.227, + "nll_loss": 0.8957819938659668, + "rewards/accuracies": 0.8968750238418579, + "rewards/chosen": -0.5155361294746399, + "rewards/margins": 0.9138666391372681, + "rewards/rejected": -1.4294028282165527, + "step": 440 + }, + { + "epoch": 0.4674983585029547, + "grad_norm": 27.5, + "learning_rate": 1.284455697923646e-06, + "log_odds_chosen": 1.5342215299606323, + "log_odds_ratio": -0.3261391222476959, + "logits/chosen": -2.5461294651031494, + "logits/rejected": -2.2099266052246094, + "logps/chosen": -0.5843450427055359, + "logps/rejected": -1.5760066509246826, + "loss": 1.312, + "nll_loss": 0.9858700037002563, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.5843450427055359, + "rewards/margins": 0.991661548614502, + "rewards/rejected": -1.5760066509246826, + "step": 445 + }, + { + "epoch": 0.4727511490479317, + "grad_norm": 43.0, + "learning_rate": 1.2667957551647261e-06, + "log_odds_chosen": 1.2222964763641357, + "log_odds_ratio": -0.3712048828601837, + "logits/chosen": -2.5557785034179688, + "logits/rejected": -2.261915922164917, + "logps/chosen": -0.5360510945320129, + "logps/rejected": -1.2696157693862915, + "loss": 1.233, + "nll_loss": 0.8618295788764954, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -0.5360510945320129, + "rewards/margins": 0.7335647344589233, + "rewards/rejected": -1.2696157693862915, + "step": 450 + }, + { + "epoch": 0.4780039395929087, + "grad_norm": 50.5, + "learning_rate": 1.24904576437405e-06, + "log_odds_chosen": 1.1964861154556274, + "log_odds_ratio": -0.380424439907074, + "logits/chosen": -2.387500762939453, + "logits/rejected": -2.2171878814697266, + "logps/chosen": -0.5144879221916199, + "logps/rejected": -1.2391068935394287, + "loss": 1.182, + "nll_loss": 0.801527202129364, + "rewards/accuracies": 0.8843749761581421, + "rewards/chosen": -0.5144879221916199, + "rewards/margins": 0.7246190309524536, + "rewards/rejected": -1.2391068935394287, + "step": 455 + }, + { + "epoch": 0.4832567301378857, + "grad_norm": 34.5, + "learning_rate": 1.2312117164705265e-06, + "log_odds_chosen": 1.319461703300476, + "log_odds_ratio": -0.37714654207229614, + "logits/chosen": -2.5138354301452637, + "logits/rejected": -2.2482171058654785, + "logps/chosen": -0.5467159748077393, + "logps/rejected": -1.3964442014694214, + "loss": 1.2877, + "nll_loss": 0.9105404019355774, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5467159748077393, + "rewards/margins": 0.8497281074523926, + "rewards/rejected": -1.3964442014694214, + "step": 460 + }, + { + "epoch": 0.4885095206828628, + "grad_norm": 43.5, + "learning_rate": 1.2132996307437468e-06, + "log_odds_chosen": 1.3355519771575928, + "log_odds_ratio": -0.3902519941329956, + "logits/chosen": -2.482901096343994, + "logits/rejected": -2.2286696434020996, + "logps/chosen": -0.566125750541687, + "logps/rejected": -1.4363183975219727, + "loss": 1.3035, + "nll_loss": 0.9132728576660156, + "rewards/accuracies": 0.815625011920929, + "rewards/chosen": -0.566125750541687, + "rewards/margins": 0.8701925277709961, + "rewards/rejected": -1.4363183975219727, + "step": 465 + }, + { + "epoch": 0.4937623112278398, + "grad_norm": 83.0, + "learning_rate": 1.1953155528223725e-06, + "log_odds_chosen": 1.1865278482437134, + "log_odds_ratio": -0.392407089471817, + "logits/chosen": -2.425886869430542, + "logits/rejected": -2.155287265777588, + "logps/chosen": -0.5029312968254089, + "logps/rejected": -1.2368618249893188, + "loss": 1.2357, + "nll_loss": 0.8432880640029907, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5029312968254089, + "rewards/margins": 0.7339304089546204, + "rewards/rejected": -1.2368618249893188, + "step": 470 + }, + { + "epoch": 0.4990151017728168, + "grad_norm": 40.5, + "learning_rate": 1.1772655526336367e-06, + "log_odds_chosen": 1.4356929063796997, + "log_odds_ratio": -0.3839671313762665, + "logits/chosen": -2.398430585861206, + "logits/rejected": -2.104560136795044, + "logps/chosen": -0.5578696131706238, + "logps/rejected": -1.5088526010513306, + "loss": 1.2412, + "nll_loss": 0.8572656512260437, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5578696131706238, + "rewards/margins": 0.9509830474853516, + "rewards/rejected": -1.5088526010513306, + "step": 475 + }, + { + "epoch": 0.5042678923177938, + "grad_norm": 28.25, + "learning_rate": 1.1591557223546393e-06, + "log_odds_chosen": 1.148279070854187, + "log_odds_ratio": -0.3996050953865051, + "logits/chosen": -2.365521192550659, + "logits/rejected": -2.152665615081787, + "logps/chosen": -0.566467821598053, + "logps/rejected": -1.2856696844100952, + "loss": 1.3237, + "nll_loss": 0.9241225123405457, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.566467821598053, + "rewards/margins": 0.719201922416687, + "rewards/rejected": -1.2856696844100952, + "step": 480 + }, + { + "epoch": 0.5095206828627709, + "grad_norm": 36.25, + "learning_rate": 1.1409921743561381e-06, + "log_odds_chosen": 1.1759016513824463, + "log_odds_ratio": -0.41472458839416504, + "logits/chosen": -2.404526472091675, + "logits/rejected": -2.2163596153259277, + "logps/chosen": -0.5324310064315796, + "logps/rejected": -1.2714060544967651, + "loss": 1.293, + "nll_loss": 0.8782441020011902, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5324310064315796, + "rewards/margins": 0.7389749884605408, + "rewards/rejected": -1.2714060544967651, + "step": 485 + }, + { + "epoch": 0.5147734734077478, + "grad_norm": 223.0, + "learning_rate": 1.1227810391395199e-06, + "log_odds_chosen": 1.385846734046936, + "log_odds_ratio": -0.3814238905906677, + "logits/chosen": -2.4934306144714355, + "logits/rejected": -2.2085797786712646, + "logps/chosen": -0.5657092928886414, + "logps/rejected": -1.4650784730911255, + "loss": 1.2852, + "nll_loss": 0.9037421345710754, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -0.5657092928886414, + "rewards/margins": 0.8993691205978394, + "rewards/rejected": -1.4650784730911255, + "step": 490 + }, + { + "epoch": 0.5200262639527249, + "grad_norm": 27.625, + "learning_rate": 1.1045284632676535e-06, + "log_odds_chosen": 1.637117624282837, + "log_odds_ratio": -0.36074963212013245, + "logits/chosen": -2.505157947540283, + "logits/rejected": -2.18147611618042, + "logps/chosen": -0.5794259905815125, + "logps/rejected": -1.7134405374526978, + "loss": 1.2555, + "nll_loss": 0.8947887420654297, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5794259905815125, + "rewards/margins": 1.1340144872665405, + "rewards/rejected": -1.7134405374526978, + "step": 495 + }, + { + "epoch": 0.525279054497702, + "grad_norm": 25.375, + "learning_rate": 1.0862406072903223e-06, + "log_odds_chosen": 1.4640438556671143, + "log_odds_ratio": -0.36846035718917847, + "logits/chosen": -2.5681748390197754, + "logits/rejected": -2.232964038848877, + "logps/chosen": -0.5701361298561096, + "logps/rejected": -1.5233440399169922, + "loss": 1.2435, + "nll_loss": 0.8750120997428894, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5701361298561096, + "rewards/margins": 0.9532078504562378, + "rewards/rejected": -1.5233440399169922, + "step": 500 + }, + { + "epoch": 0.5305318450426789, + "grad_norm": 23.75, + "learning_rate": 1.067923643664936e-06, + "log_odds_chosen": 1.4654853343963623, + "log_odds_ratio": -0.35504215955734253, + "logits/chosen": -2.502295970916748, + "logits/rejected": -2.181178569793701, + "logps/chosen": -0.5419307947158813, + "logps/rejected": -1.5056110620498657, + "loss": 1.2431, + "nll_loss": 0.8880621790885925, + "rewards/accuracies": 0.8812500238418579, + "rewards/chosen": -0.5419307947158813, + "rewards/margins": 0.9636803865432739, + "rewards/rejected": -1.5056110620498657, + "step": 505 + }, + { + "epoch": 0.535784635587656, + "grad_norm": 35.0, + "learning_rate": 1.0495837546732222e-06, + "log_odds_chosen": 1.5194576978683472, + "log_odds_ratio": -0.37253108620643616, + "logits/chosen": -2.413229465484619, + "logits/rejected": -2.184525728225708, + "logps/chosen": -0.5820909738540649, + "logps/rejected": -1.6039245128631592, + "loss": 1.3383, + "nll_loss": 0.9657222032546997, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5820909738540649, + "rewards/margins": 1.0218335390090942, + "rewards/rejected": -1.6039245128631592, + "step": 510 + }, + { + "epoch": 0.541037426132633, + "grad_norm": 32.25, + "learning_rate": 1.0312271303346038e-06, + "log_odds_chosen": 1.314542531967163, + "log_odds_ratio": -0.396615594625473, + "logits/chosen": -2.545009136199951, + "logits/rejected": -2.301347017288208, + "logps/chosen": -0.562983512878418, + "logps/rejected": -1.4147989749908447, + "loss": 1.3396, + "nll_loss": 0.9429594278335571, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.562983512878418, + "rewards/margins": 0.851815402507782, + "rewards/rejected": -1.4147989749908447, + "step": 515 + }, + { + "epoch": 0.54629021667761, + "grad_norm": 47.75, + "learning_rate": 1.0128599663169628e-06, + "log_odds_chosen": 1.084162950515747, + "log_odds_ratio": -0.4125159680843353, + "logits/chosen": -2.4878952503204346, + "logits/rejected": -2.245314359664917, + "logps/chosen": -0.5130459666252136, + "logps/rejected": -1.1407145261764526, + "loss": 1.3142, + "nll_loss": 0.901726245880127, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -0.5130459666252136, + "rewards/margins": 0.6276686191558838, + "rewards/rejected": -1.1407145261764526, + "step": 520 + }, + { + "epoch": 0.551543007222587, + "grad_norm": 74.0, + "learning_rate": 9.944884618454995e-07, + "log_odds_chosen": 1.5892114639282227, + "log_odds_ratio": -0.3318895697593689, + "logits/chosen": -2.5057709217071533, + "logits/rejected": -2.110414505004883, + "logps/chosen": -0.5387485027313232, + "logps/rejected": -1.5842351913452148, + "loss": 1.2507, + "nll_loss": 0.9187744855880737, + "rewards/accuracies": 0.8812500238418579, + "rewards/chosen": -0.5387485027313232, + "rewards/margins": 1.0454866886138916, + "rewards/rejected": -1.5842351913452148, + "step": 525 + }, + { + "epoch": 0.556795797767564, + "grad_norm": 73.0, + "learning_rate": 9.7611881761039e-07, + "log_odds_chosen": 1.6785354614257812, + "log_odds_ratio": -0.3325541019439697, + "logits/chosen": -2.462970733642578, + "logits/rejected": -2.220999240875244, + "logps/chosen": -0.6112784147262573, + "logps/rejected": -1.7486165761947632, + "loss": 1.3345, + "nll_loss": 1.0019125938415527, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -0.6112784147262573, + "rewards/margins": 1.1373381614685059, + "rewards/rejected": -1.7486165761947632, + "step": 530 + }, + { + "epoch": 0.562048588312541, + "grad_norm": 36.0, + "learning_rate": 9.57757233673949e-07, + "log_odds_chosen": 1.4563804864883423, + "log_odds_ratio": -0.36100301146507263, + "logits/chosen": -2.4625449180603027, + "logits/rejected": -2.1974194049835205, + "logps/chosen": -0.5516290664672852, + "logps/rejected": -1.515852928161621, + "loss": 1.2346, + "nll_loss": 0.8735913038253784, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5516290664672852, + "rewards/margins": 0.9642238616943359, + "rewards/rejected": -1.515852928161621, + "step": 535 + }, + { + "epoch": 0.5673013788575181, + "grad_norm": 36.0, + "learning_rate": 9.394099073780066e-07, + "log_odds_chosen": 1.4258034229278564, + "log_odds_ratio": -0.36102384328842163, + "logits/chosen": -2.5518240928649902, + "logits/rejected": -2.2731943130493164, + "logps/chosen": -0.5590797662734985, + "logps/rejected": -1.4916408061981201, + "loss": 1.2649, + "nll_loss": 0.9038828015327454, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -0.5590797662734985, + "rewards/margins": 0.9325610399246216, + "rewards/rejected": -1.4916408061981201, + "step": 540 + }, + { + "epoch": 0.572554169402495, + "grad_norm": 64.0, + "learning_rate": 9.210830312521991e-07, + "log_odds_chosen": 1.605653166770935, + "log_odds_ratio": -0.338408887386322, + "logits/chosen": -2.5818705558776855, + "logits/rejected": -2.311086416244507, + "logps/chosen": -0.5466338992118835, + "logps/rejected": -1.6157076358795166, + "loss": 1.3041, + "nll_loss": 0.9657169580459595, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.5466338992118835, + "rewards/margins": 1.0690736770629883, + "rewards/rejected": -1.6157076358795166, + "step": 545 + }, + { + "epoch": 0.5778069599474721, + "grad_norm": 57.25, + "learning_rate": 9.027827909238901e-07, + "log_odds_chosen": 1.8266319036483765, + "log_odds_ratio": -0.3148033320903778, + "logits/chosen": -2.48435115814209, + "logits/rejected": -2.166586399078369, + "logps/chosen": -0.5606757402420044, + "logps/rejected": -1.8262403011322021, + "loss": 1.2896, + "nll_loss": 0.97479248046875, + "rewards/accuracies": 0.8687499761581421, + "rewards/chosen": -0.5606757402420044, + "rewards/margins": 1.2655649185180664, + "rewards/rejected": -1.8262403011322021, + "step": 550 + }, + { + "epoch": 0.5830597504924491, + "grad_norm": 47.5, + "learning_rate": 8.845153630304139e-07, + "log_odds_chosen": 1.663627028465271, + "log_odds_ratio": -0.3311775028705597, + "logits/chosen": -2.4467196464538574, + "logits/rejected": -2.2170791625976562, + "logps/chosen": -0.5954256057739258, + "logps/rejected": -1.7486213445663452, + "loss": 1.2756, + "nll_loss": 0.9444006085395813, + "rewards/accuracies": 0.878125011920929, + "rewards/chosen": -0.5954256057739258, + "rewards/margins": 1.1531956195831299, + "rewards/rejected": -1.7486213445663452, + "step": 555 + }, + { + "epoch": 0.5883125410374261, + "grad_norm": 45.5, + "learning_rate": 8.662869131343606e-07, + "log_odds_chosen": 1.4104127883911133, + "log_odds_ratio": -0.39170485734939575, + "logits/chosen": -2.5256340503692627, + "logits/rejected": -2.213099241256714, + "logps/chosen": -0.5794434547424316, + "logps/rejected": -1.5348830223083496, + "loss": 1.3375, + "nll_loss": 0.9457686543464661, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5794434547424316, + "rewards/margins": 0.955439567565918, + "rewards/rejected": -1.5348830223083496, + "step": 560 + }, + { + "epoch": 0.5935653315824031, + "grad_norm": 72.0, + "learning_rate": 8.481035936425926e-07, + "log_odds_chosen": 1.1931443214416504, + "log_odds_ratio": -0.3968736529350281, + "logits/chosen": -2.56657338142395, + "logits/rejected": -2.191765785217285, + "logps/chosen": -0.5020140409469604, + "logps/rejected": -1.227325201034546, + "loss": 1.2792, + "nll_loss": 0.8822978138923645, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.5020140409469604, + "rewards/margins": 0.7253111600875854, + "rewards/rejected": -1.227325201034546, + "step": 565 + }, + { + "epoch": 0.5988181221273802, + "grad_norm": 37.25, + "learning_rate": 8.29971541729707e-07, + "log_odds_chosen": 1.549736738204956, + "log_odds_ratio": -0.3515177369117737, + "logits/chosen": -2.526639461517334, + "logits/rejected": -2.2129909992218018, + "logps/chosen": -0.5579209923744202, + "logps/rejected": -1.5522905588150024, + "loss": 1.2671, + "nll_loss": 0.9156067967414856, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -0.5579209923744202, + "rewards/margins": 0.9943695068359375, + "rewards/rejected": -1.5522905588150024, + "step": 570 + }, + { + "epoch": 0.6040709126723572, + "grad_norm": 78.5, + "learning_rate": 8.118968772666338e-07, + "log_odds_chosen": 1.9918029308319092, + "log_odds_ratio": -0.33105817437171936, + "logits/chosen": -2.5553669929504395, + "logits/rejected": -2.255253791809082, + "logps/chosen": -0.6138916015625, + "logps/rejected": -2.058006763458252, + "loss": 1.261, + "nll_loss": 0.9299631118774414, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -0.6138916015625, + "rewards/margins": 1.4441156387329102, + "rewards/rejected": -2.058006763458252, + "step": 575 + }, + { + "epoch": 0.6093237032173342, + "grad_norm": 32.25, + "learning_rate": 7.938857007550796e-07, + "log_odds_chosen": 1.5095994472503662, + "log_odds_ratio": -0.36659660935401917, + "logits/chosen": -2.4949142932891846, + "logits/rejected": -2.217616558074951, + "logps/chosen": -0.5693143606185913, + "logps/rejected": -1.5770564079284668, + "loss": 1.2795, + "nll_loss": 0.9128750562667847, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5693143606185913, + "rewards/margins": 1.007741928100586, + "rewards/rejected": -1.5770564079284668, + "step": 580 + }, + { + "epoch": 0.6145764937623113, + "grad_norm": 44.5, + "learning_rate": 7.759440912685042e-07, + "log_odds_chosen": 1.313231348991394, + "log_odds_ratio": -0.39206627011299133, + "logits/chosen": -2.4366495609283447, + "logits/rejected": -2.1927928924560547, + "logps/chosen": -0.5398006439208984, + "logps/rejected": -1.4002869129180908, + "loss": 1.2987, + "nll_loss": 0.9065971374511719, + "rewards/accuracies": 0.8218749761581421, + "rewards/chosen": -0.5398006439208984, + "rewards/margins": 0.8604865074157715, + "rewards/rejected": -1.4002869129180908, + "step": 585 + }, + { + "epoch": 0.6198292843072882, + "grad_norm": 41.75, + "learning_rate": 7.580781044003324e-07, + "log_odds_chosen": 1.5099523067474365, + "log_odds_ratio": -0.37858808040618896, + "logits/chosen": -2.5282700061798096, + "logits/rejected": -2.1985023021698, + "logps/chosen": -0.554128110408783, + "logps/rejected": -1.5762214660644531, + "loss": 1.2642, + "nll_loss": 0.885593593120575, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.554128110408783, + "rewards/margins": 1.022093415260315, + "rewards/rejected": -1.5762214660644531, + "step": 590 + }, + { + "epoch": 0.6250820748522653, + "grad_norm": 94.0, + "learning_rate": 7.402937702200904e-07, + "log_odds_chosen": 1.7455905675888062, + "log_odds_ratio": -0.3350276052951813, + "logits/chosen": -2.5306236743927, + "logits/rejected": -2.249689817428589, + "logps/chosen": -0.5238341093063354, + "logps/rejected": -1.7180259227752686, + "loss": 1.2212, + "nll_loss": 0.8861449956893921, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.5238341093063354, + "rewards/margins": 1.1941916942596436, + "rewards/rejected": -1.7180259227752686, + "step": 595 + }, + { + "epoch": 0.6303348653972423, + "grad_norm": 57.0, + "learning_rate": 7.225970912381556e-07, + "log_odds_chosen": 1.5003291368484497, + "log_odds_ratio": -0.391081303358078, + "logits/chosen": -2.381641387939453, + "logits/rejected": -2.1322736740112305, + "logps/chosen": -0.5944348573684692, + "logps/rejected": -1.6424591541290283, + "loss": 1.3066, + "nll_loss": 0.9154736399650574, + "rewards/accuracies": 0.8218749761581421, + "rewards/chosen": -0.5944348573684692, + "rewards/margins": 1.048024296760559, + "rewards/rejected": -1.6424591541290283, + "step": 600 + }, + { + "epoch": 0.6355876559422193, + "grad_norm": 41.0, + "learning_rate": 7.049940403798089e-07, + "log_odds_chosen": 1.531709909439087, + "log_odds_ratio": -0.3830433487892151, + "logits/chosen": -2.4697697162628174, + "logits/rejected": -2.217533826828003, + "logps/chosen": -0.5523134469985962, + "logps/rejected": -1.5712653398513794, + "loss": 1.314, + "nll_loss": 0.9309525489807129, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5523134469985962, + "rewards/margins": 1.0189517736434937, + "rewards/rejected": -1.5712653398513794, + "step": 605 + }, + { + "epoch": 0.6408404464871963, + "grad_norm": 46.5, + "learning_rate": 6.874905589692733e-07, + "log_odds_chosen": 1.6414533853530884, + "log_odds_ratio": -0.34355098009109497, + "logits/chosen": -2.509610176086426, + "logits/rejected": -2.1736972332000732, + "logps/chosen": -0.5539788007736206, + "logps/rejected": -1.6842210292816162, + "loss": 1.2389, + "nll_loss": 0.8953197598457336, + "rewards/accuracies": 0.8687499761581421, + "rewards/chosen": -0.5539788007736206, + "rewards/margins": 1.1302422285079956, + "rewards/rejected": -1.6842210292816162, + "step": 610 + }, + { + "epoch": 0.6460932370321734, + "grad_norm": 32.25, + "learning_rate": 6.700925547244171e-07, + "log_odds_chosen": 1.9415044784545898, + "log_odds_ratio": -0.31946122646331787, + "logits/chosen": -2.4332690238952637, + "logits/rejected": -2.26471209526062, + "logps/chosen": -0.6300308704376221, + "logps/rejected": -2.049290180206299, + "loss": 1.2482, + "nll_loss": 0.9287741780281067, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.6300308704376221, + "rewards/margins": 1.4192593097686768, + "rewards/rejected": -2.049290180206299, + "step": 615 + }, + { + "epoch": 0.6513460275771503, + "grad_norm": 32.5, + "learning_rate": 6.528058997627995e-07, + "log_odds_chosen": 1.9388889074325562, + "log_odds_ratio": -0.3166273534297943, + "logits/chosen": -2.5412425994873047, + "logits/rejected": -2.1768264770507812, + "logps/chosen": -0.5474293828010559, + "logps/rejected": -1.9378162622451782, + "loss": 1.2866, + "nll_loss": 0.9699424505233765, + "rewards/accuracies": 0.878125011920929, + "rewards/chosen": -0.5474293828010559, + "rewards/margins": 1.3903871774673462, + "rewards/rejected": -1.9378162622451782, + "step": 620 + }, + { + "epoch": 0.6565988181221274, + "grad_norm": 40.0, + "learning_rate": 6.35636428619734e-07, + "log_odds_chosen": 1.7123737335205078, + "log_odds_ratio": -0.34193840622901917, + "logits/chosen": -2.5048129558563232, + "logits/rejected": -2.1842281818389893, + "logps/chosen": -0.5440694093704224, + "logps/rejected": -1.7357890605926514, + "loss": 1.2903, + "nll_loss": 0.9483565092086792, + "rewards/accuracies": 0.859375, + "rewards/chosen": -0.5440694093704224, + "rewards/margins": 1.1917197704315186, + "rewards/rejected": -1.7357890605926514, + "step": 625 + }, + { + "epoch": 0.6618516086671044, + "grad_norm": 46.25, + "learning_rate": 6.185899362790338e-07, + "log_odds_chosen": 1.6516172885894775, + "log_odds_ratio": -0.3549567461013794, + "logits/chosen": -2.4393770694732666, + "logits/rejected": -2.138049602508545, + "logps/chosen": -0.5555499196052551, + "logps/rejected": -1.7016226053237915, + "loss": 1.2573, + "nll_loss": 0.9023006558418274, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5555499196052551, + "rewards/margins": 1.1460726261138916, + "rewards/rejected": -1.7016226053237915, + "step": 630 + }, + { + "epoch": 0.6671043992120814, + "grad_norm": 101.5, + "learning_rate": 6.016721762171098e-07, + "log_odds_chosen": 1.636366605758667, + "log_odds_ratio": -0.3687242567539215, + "logits/chosen": -2.469954252243042, + "logits/rejected": -2.2552268505096436, + "logps/chosen": -0.6394462585449219, + "logps/rejected": -1.7851154804229736, + "loss": 1.3697, + "nll_loss": 1.000967025756836, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.6394462585449219, + "rewards/margins": 1.1456692218780518, + "rewards/rejected": -1.7851154804229736, + "step": 635 + }, + { + "epoch": 0.6723571897570584, + "grad_norm": 64.5, + "learning_rate": 5.848888584610726e-07, + "log_odds_chosen": 1.693683385848999, + "log_odds_ratio": -0.34921011328697205, + "logits/chosen": -2.486765146255493, + "logits/rejected": -2.2645862102508545, + "logps/chosen": -0.5731798410415649, + "logps/rejected": -1.7742217779159546, + "loss": 1.261, + "nll_loss": 0.9118081331253052, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.5731798410415649, + "rewards/margins": 1.2010419368743896, + "rewards/rejected": -1.7742217779159546, + "step": 640 + }, + { + "epoch": 0.6776099803020355, + "grad_norm": 58.5, + "learning_rate": 5.682456476615072e-07, + "log_odds_chosen": 1.4461402893066406, + "log_odds_ratio": -0.3787740170955658, + "logits/chosen": -2.355269432067871, + "logits/rejected": -2.16302490234375, + "logps/chosen": -0.5690776705741882, + "logps/rejected": -1.5551892518997192, + "loss": 1.2771, + "nll_loss": 0.8983281254768372, + "rewards/accuracies": 0.846875011920929, + "rewards/chosen": -0.5690776705741882, + "rewards/margins": 0.9861115217208862, + "rewards/rejected": -1.5551892518997192, + "step": 645 + }, + { + "epoch": 0.6828627708470125, + "grad_norm": 36.75, + "learning_rate": 5.517481611805539e-07, + "log_odds_chosen": 1.5578912496566772, + "log_odds_ratio": -0.35105592012405396, + "logits/chosen": -2.3847219944000244, + "logits/rejected": -2.130415439605713, + "logps/chosen": -0.537613570690155, + "logps/rejected": -1.5832931995391846, + "loss": 1.246, + "nll_loss": 0.8949264287948608, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.537613570690155, + "rewards/margins": 1.0456795692443848, + "rewards/rejected": -1.5832931995391846, + "step": 650 + }, + { + "epoch": 0.6881155613919895, + "grad_norm": 28.25, + "learning_rate": 5.354019671959599e-07, + "log_odds_chosen": 1.4725126028060913, + "log_odds_ratio": -0.38070547580718994, + "logits/chosen": -2.3801114559173584, + "logits/rejected": -2.134171724319458, + "logps/chosen": -0.5319515466690063, + "logps/rejected": -1.5217872858047485, + "loss": 1.3054, + "nll_loss": 0.9246999621391296, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5319515466690063, + "rewards/margins": 0.9898357391357422, + "rewards/rejected": -1.5217872858047485, + "step": 655 + }, + { + "epoch": 0.6933683519369666, + "grad_norm": 35.5, + "learning_rate": 5.192125828217202e-07, + "log_odds_chosen": 1.628064513206482, + "log_odds_ratio": -0.370327889919281, + "logits/chosen": -2.5233168601989746, + "logits/rejected": -2.1562933921813965, + "logps/chosen": -0.5629066824913025, + "logps/rejected": -1.6909490823745728, + "loss": 1.2606, + "nll_loss": 0.8903215527534485, + "rewards/accuracies": 0.846875011920929, + "rewards/chosen": -0.5629066824913025, + "rewards/margins": 1.128042459487915, + "rewards/rejected": -1.6909490823745728, + "step": 660 + }, + { + "epoch": 0.6986211424819435, + "grad_norm": 51.5, + "learning_rate": 5.031854722459652e-07, + "log_odds_chosen": 1.8480112552642822, + "log_odds_ratio": -0.3127003610134125, + "logits/chosen": -2.4370510578155518, + "logits/rejected": -2.0890867710113525, + "logps/chosen": -0.5302228927612305, + "logps/rejected": -1.8121706247329712, + "loss": 1.2074, + "nll_loss": 0.8947356939315796, + "rewards/accuracies": 0.878125011920929, + "rewards/chosen": -0.5302228927612305, + "rewards/margins": 1.2819478511810303, + "rewards/rejected": -1.8121706247329712, + "step": 665 + }, + { + "epoch": 0.7038739330269206, + "grad_norm": 31.5, + "learning_rate": 4.873260448867004e-07, + "log_odds_chosen": 2.02109956741333, + "log_odds_ratio": -0.31728652119636536, + "logits/chosen": -2.470301628112793, + "logits/rejected": -2.2189319133758545, + "logps/chosen": -0.6230054497718811, + "logps/rejected": -2.0598232746124268, + "loss": 1.3239, + "nll_loss": 1.0066121816635132, + "rewards/accuracies": 0.8812500238418579, + "rewards/chosen": -0.6230054497718811, + "rewards/margins": 1.4368176460266113, + "rewards/rejected": -2.0598232746124268, + "step": 670 + }, + { + "epoch": 0.7091267235718975, + "grad_norm": 68.0, + "learning_rate": 4.7163965356604117e-07, + "log_odds_chosen": 1.897443413734436, + "log_odds_ratio": -0.3486331105232239, + "logits/chosen": -2.554206132888794, + "logits/rejected": -2.1669750213623047, + "logps/chosen": -0.64203941822052, + "logps/rejected": -2.0166876316070557, + "loss": 1.3553, + "nll_loss": 1.0066633224487305, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.64203941822052, + "rewards/margins": 1.3746483325958252, + "rewards/rejected": -2.0166876316070557, + "step": 675 + }, + { + "epoch": 0.7143795141168746, + "grad_norm": 34.5, + "learning_rate": 4.561315927035445e-07, + "log_odds_chosen": 1.707550048828125, + "log_odds_ratio": -0.34410637617111206, + "logits/chosen": -2.440441846847534, + "logits/rejected": -2.1145124435424805, + "logps/chosen": -0.5574239492416382, + "logps/rejected": -1.7339591979980469, + "loss": 1.2025, + "nll_loss": 0.8583625555038452, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.5574239492416382, + "rewards/margins": 1.1765353679656982, + "rewards/rejected": -1.7339591979980469, + "step": 680 + }, + { + "epoch": 0.7196323046618516, + "grad_norm": 64.5, + "learning_rate": 4.408070965292533e-07, + "log_odds_chosen": 1.7007535696029663, + "log_odds_ratio": -0.35346347093582153, + "logits/chosen": -2.456326961517334, + "logits/rejected": -2.1892619132995605, + "logps/chosen": -0.5550821423530579, + "logps/rejected": -1.7205698490142822, + "loss": 1.2778, + "nll_loss": 0.9243642091751099, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5550821423530579, + "rewards/margins": 1.1654876470565796, + "rewards/rejected": -1.7205698490142822, + "step": 685 + }, + { + "epoch": 0.7248850952068286, + "grad_norm": 42.25, + "learning_rate": 4.256713373170564e-07, + "log_odds_chosen": 1.5547049045562744, + "log_odds_ratio": -0.36127448081970215, + "logits/chosen": -2.46553373336792, + "logits/rejected": -2.2510862350463867, + "logps/chosen": -0.603643536567688, + "logps/rejected": -1.6664206981658936, + "loss": 1.3272, + "nll_loss": 0.9659638404846191, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -0.603643536567688, + "rewards/margins": 1.0627771615982056, + "rewards/rejected": -1.6664206981658936, + "step": 690 + }, + { + "epoch": 0.7301378857518056, + "grad_norm": 38.5, + "learning_rate": 4.1072942363896025e-07, + "log_odds_chosen": 1.6411514282226562, + "log_odds_ratio": -0.3377731442451477, + "logits/chosen": -2.5552942752838135, + "logits/rejected": -2.229196071624756, + "logps/chosen": -0.5576506853103638, + "logps/rejected": -1.6714286804199219, + "loss": 1.2942, + "nll_loss": 0.9564154744148254, + "rewards/accuracies": 0.878125011920929, + "rewards/chosen": -0.5576506853103638, + "rewards/margins": 1.1137781143188477, + "rewards/rejected": -1.6714286804199219, + "step": 695 + }, + { + "epoch": 0.7353906762967827, + "grad_norm": 26.125, + "learning_rate": 3.9598639864085925e-07, + "log_odds_chosen": 1.2980868816375732, + "log_odds_ratio": -0.3892515301704407, + "logits/chosen": -2.417532444000244, + "logits/rejected": -2.2620291709899902, + "logps/chosen": -0.5406171083450317, + "logps/rejected": -1.3745439052581787, + "loss": 1.283, + "nll_loss": 0.893776535987854, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.5406171083450317, + "rewards/margins": 0.833926796913147, + "rewards/rejected": -1.3745439052581787, + "step": 700 + }, + { + "epoch": 0.7406434668417596, + "grad_norm": 57.75, + "learning_rate": 3.8144723834039073e-07, + "log_odds_chosen": 1.3730871677398682, + "log_odds_ratio": -0.38403210043907166, + "logits/chosen": -2.492102861404419, + "logits/rejected": -2.1305251121520996, + "logps/chosen": -0.5153442621231079, + "logps/rejected": -1.4106855392456055, + "loss": 1.2797, + "nll_loss": 0.8956896662712097, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -0.5153442621231079, + "rewards/margins": 0.8953412175178528, + "rewards/rejected": -1.4106855392456055, + "step": 705 + }, + { + "epoch": 0.7458962573867367, + "grad_norm": 28.25, + "learning_rate": 3.6711684994744486e-07, + "log_odds_chosen": 1.7186520099639893, + "log_odds_ratio": -0.33004146814346313, + "logits/chosen": -2.537470817565918, + "logits/rejected": -2.23635196685791, + "logps/chosen": -0.4957657754421234, + "logps/rejected": -1.6590726375579834, + "loss": 1.2277, + "nll_loss": 0.8976136445999146, + "rewards/accuracies": 0.859375, + "rewards/chosen": -0.4957657754421234, + "rewards/margins": 1.1633068323135376, + "rewards/rejected": -1.6590726375579834, + "step": 710 + }, + { + "epoch": 0.7511490479317138, + "grad_norm": 50.25, + "learning_rate": 3.530000702078999e-07, + "log_odds_chosen": 1.9104875326156616, + "log_odds_ratio": -0.30225199460983276, + "logits/chosen": -2.41103196144104, + "logits/rejected": -2.163609743118286, + "logps/chosen": -0.535643458366394, + "logps/rejected": -1.8592544794082642, + "loss": 1.2363, + "nll_loss": 0.9340142011642456, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.535643458366394, + "rewards/margins": 1.3236110210418701, + "rewards/rejected": -1.8592544794082642, + "step": 715 + }, + { + "epoch": 0.7564018384766907, + "grad_norm": 215.0, + "learning_rate": 3.391016637711389e-07, + "log_odds_chosen": 1.9387279748916626, + "log_odds_ratio": -0.32732483744621277, + "logits/chosen": -2.503373861312866, + "logits/rejected": -2.184051990509033, + "logps/chosen": -0.6013236045837402, + "logps/rejected": -1.979087233543396, + "loss": 1.2995, + "nll_loss": 0.9722166061401367, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -0.6013236045837402, + "rewards/margins": 1.3777637481689453, + "rewards/rejected": -1.979087233543396, + "step": 720 + }, + { + "epoch": 0.7616546290216678, + "grad_norm": 64.5, + "learning_rate": 3.2542632158190133e-07, + "log_odds_chosen": 1.8217693567276, + "log_odds_ratio": -0.3460733890533447, + "logits/chosen": -2.4695355892181396, + "logits/rejected": -2.266535758972168, + "logps/chosen": -0.5930324792861938, + "logps/rejected": -1.8648335933685303, + "loss": 1.2692, + "nll_loss": 0.9231220483779907, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.5930324792861938, + "rewards/margins": 1.271801233291626, + "rewards/rejected": -1.8648335933685303, + "step": 725 + }, + { + "epoch": 0.7669074195666448, + "grad_norm": 60.0, + "learning_rate": 3.1197865929701017e-07, + "log_odds_chosen": 1.9611870050430298, + "log_odds_ratio": -0.3502156138420105, + "logits/chosen": -2.595439910888672, + "logits/rejected": -2.2361018657684326, + "logps/chosen": -0.5836862921714783, + "logps/rejected": -2.0304791927337646, + "loss": 1.3186, + "nll_loss": 0.9683855175971985, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5836862921714783, + "rewards/margins": 1.4467928409576416, + "rewards/rejected": -2.0304791927337646, + "step": 730 + }, + { + "epoch": 0.7721602101116218, + "grad_norm": 63.75, + "learning_rate": 2.987632157275114e-07, + "log_odds_chosen": 1.6977773904800415, + "log_odds_ratio": -0.3493327796459198, + "logits/chosen": -2.5089340209960938, + "logits/rejected": -2.2651724815368652, + "logps/chosen": -0.5790574550628662, + "logps/rejected": -1.748196005821228, + "loss": 1.2328, + "nll_loss": 0.883512020111084, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -0.5790574550628662, + "rewards/margins": 1.1691386699676514, + "rewards/rejected": -1.748196005821228, + "step": 735 + }, + { + "epoch": 0.7774130006565988, + "grad_norm": 47.0, + "learning_rate": 2.8578445130674833e-07, + "log_odds_chosen": 1.5758211612701416, + "log_odds_ratio": -0.3468172550201416, + "logits/chosen": -2.4574217796325684, + "logits/rejected": -2.2445011138916016, + "logps/chosen": -0.5336965322494507, + "logps/rejected": -1.6166375875473022, + "loss": 1.2211, + "nll_loss": 0.8742717504501343, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5336965322494507, + "rewards/margins": 1.082940936088562, + "rewards/rejected": -1.6166375875473022, + "step": 740 + }, + { + "epoch": 0.7826657912015759, + "grad_norm": 33.25, + "learning_rate": 2.73046746584891e-07, + "log_odds_chosen": 1.6906464099884033, + "log_odds_ratio": -0.3406273126602173, + "logits/chosen": -2.5112786293029785, + "logits/rejected": -2.2304630279541016, + "logps/chosen": -0.5315414667129517, + "logps/rejected": -1.6976295709609985, + "loss": 1.2098, + "nll_loss": 0.8692021369934082, + "rewards/accuracies": 0.859375, + "rewards/chosen": -0.5315414667129517, + "rewards/margins": 1.1660881042480469, + "rewards/rejected": -1.6976295709609985, + "step": 745 + }, + { + "epoch": 0.7879185817465528, + "grad_norm": 40.25, + "learning_rate": 2.605544007504279e-07, + "log_odds_chosen": 1.7450376749038696, + "log_odds_ratio": -0.32459336519241333, + "logits/chosen": -2.553576946258545, + "logits/rejected": -2.259354591369629, + "logps/chosen": -0.5844911336898804, + "logps/rejected": -1.801825761795044, + "loss": 1.2855, + "nll_loss": 0.9608856439590454, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -0.5844911336898804, + "rewards/margins": 1.217334508895874, + "rewards/rejected": -1.801825761795044, + "step": 750 + }, + { + "epoch": 0.7931713722915299, + "grad_norm": 36.5, + "learning_rate": 2.4831163017911683e-07, + "log_odds_chosen": 1.651958703994751, + "log_odds_ratio": -0.34634822607040405, + "logits/chosen": -2.405233144760132, + "logits/rejected": -2.138745069503784, + "logps/chosen": -0.5561404228210449, + "logps/rejected": -1.6944749355316162, + "loss": 1.2428, + "nll_loss": 0.8964967727661133, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.5561404228210449, + "rewards/margins": 1.1383345127105713, + "rewards/rejected": -1.6944749355316162, + "step": 755 + }, + { + "epoch": 0.7984241628365069, + "grad_norm": 46.75, + "learning_rate": 2.3632256701088814e-07, + "log_odds_chosen": 1.698676347732544, + "log_odds_ratio": -0.3407271206378937, + "logits/chosen": -2.5164520740509033, + "logits/rejected": -2.169098377227783, + "logps/chosen": -0.546515166759491, + "logps/rejected": -1.726548433303833, + "loss": 1.2007, + "nll_loss": 0.8599587678909302, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.546515166759491, + "rewards/margins": 1.1800330877304077, + "rewards/rejected": -1.726548433303833, + "step": 760 + }, + { + "epoch": 0.8036769533814839, + "grad_norm": 31.625, + "learning_rate": 2.245912577551785e-07, + "log_odds_chosen": 1.7021366357803345, + "log_odds_ratio": -0.36240798234939575, + "logits/chosen": -2.583963632583618, + "logits/rejected": -2.3067448139190674, + "logps/chosen": -0.610865592956543, + "logps/rejected": -1.795292854309082, + "loss": 1.3449, + "nll_loss": 0.9824475049972534, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.610865592956543, + "rewards/margins": 1.184427261352539, + "rewards/rejected": -1.795292854309082, + "step": 765 + }, + { + "epoch": 0.8089297439264609, + "grad_norm": 97.5, + "learning_rate": 2.131216619251659e-07, + "log_odds_chosen": 1.825273871421814, + "log_odds_ratio": -0.3238641917705536, + "logits/chosen": -2.533202648162842, + "logits/rejected": -2.3293657302856445, + "logps/chosen": -0.6178978681564331, + "logps/rejected": -1.9215917587280273, + "loss": 1.3183, + "nll_loss": 0.9943979978561401, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -0.6178978681564331, + "rewards/margins": 1.3036938905715942, + "rewards/rejected": -1.9215917587280273, + "step": 770 + }, + { + "epoch": 0.814182534471438, + "grad_norm": 63.0, + "learning_rate": 2.0191765070136768e-07, + "log_odds_chosen": 1.8990042209625244, + "log_odds_ratio": -0.3358913064002991, + "logits/chosen": -2.4345898628234863, + "logits/rejected": -2.134831190109253, + "logps/chosen": -0.5476903915405273, + "logps/rejected": -1.865012526512146, + "loss": 1.2756, + "nll_loss": 0.9396783709526062, + "rewards/accuracies": 0.878125011920929, + "rewards/chosen": -0.5476903915405273, + "rewards/margins": 1.317322015762329, + "rewards/rejected": -1.865012526512146, + "step": 775 + }, + { + "epoch": 0.8194353250164149, + "grad_norm": 102.5, + "learning_rate": 1.9098300562505264e-07, + "log_odds_chosen": 1.6969549655914307, + "log_odds_ratio": -0.3712518811225891, + "logits/chosen": -2.4698281288146973, + "logits/rejected": -2.181797981262207, + "logps/chosen": -0.5788697004318237, + "logps/rejected": -1.7834043502807617, + "loss": 1.2457, + "nll_loss": 0.8744741678237915, + "rewards/accuracies": 0.815625011920929, + "rewards/chosen": -0.5788697004318237, + "rewards/margins": 1.2045344114303589, + "rewards/rejected": -1.7834043502807617, + "step": 780 + }, + { + "epoch": 0.824688115561392, + "grad_norm": 28.625, + "learning_rate": 1.803214173219072e-07, + "log_odds_chosen": 1.9696476459503174, + "log_odds_ratio": -0.30190950632095337, + "logits/chosen": -2.483811616897583, + "logits/rejected": -2.173767328262329, + "logps/chosen": -0.535027265548706, + "logps/rejected": -1.9312782287597656, + "loss": 1.2045, + "nll_loss": 0.902554988861084, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.535027265548706, + "rewards/margins": 1.3962510824203491, + "rewards/rejected": -1.9312782287597656, + "step": 785 + }, + { + "epoch": 0.8299409061063691, + "grad_norm": 31.125, + "learning_rate": 1.6993648425638796e-07, + "log_odds_chosen": 1.6274923086166382, + "log_odds_ratio": -0.3982171416282654, + "logits/chosen": -2.5815181732177734, + "logits/rejected": -2.206310987472534, + "logps/chosen": -0.5905428528785706, + "logps/rejected": -1.7506492137908936, + "loss": 1.3427, + "nll_loss": 0.9444986581802368, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -0.5905428528785706, + "rewards/margins": 1.1601064205169678, + "rewards/rejected": -1.7506492137908936, + "step": 790 + }, + { + "epoch": 0.835193696651346, + "grad_norm": 61.0, + "learning_rate": 1.5983171151717921e-07, + "log_odds_chosen": 1.5922825336456299, + "log_odds_ratio": -0.3533628284931183, + "logits/chosen": -2.4570369720458984, + "logits/rejected": -2.210930824279785, + "logps/chosen": -0.581910252571106, + "logps/rejected": -1.6624376773834229, + "loss": 1.2185, + "nll_loss": 0.8651579022407532, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -0.581910252571106, + "rewards/margins": 1.080527424812317, + "rewards/rejected": -1.6624376773834229, + "step": 795 + }, + { + "epoch": 0.8404464871963231, + "grad_norm": 56.25, + "learning_rate": 1.5001050963416716e-07, + "log_odds_chosen": 1.7499481439590454, + "log_odds_ratio": -0.3268365263938904, + "logits/chosen": -2.4593491554260254, + "logits/rejected": -2.1416468620300293, + "logps/chosen": -0.5591254234313965, + "logps/rejected": -1.7770287990570068, + "loss": 1.1877, + "nll_loss": 0.8609007596969604, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.5591254234313965, + "rewards/margins": 1.2179033756256104, + "rewards/rejected": -1.7770287990570068, + "step": 800 + }, + { + "epoch": 0.8456992777413, + "grad_norm": 59.25, + "learning_rate": 1.4047619342732908e-07, + "log_odds_chosen": 1.5950630903244019, + "log_odds_ratio": -0.3615456819534302, + "logits/chosen": -2.5065274238586426, + "logits/rejected": -2.24869441986084, + "logps/chosen": -0.6077946424484253, + "logps/rejected": -1.7057603597640991, + "loss": 1.273, + "nll_loss": 0.911415696144104, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.6077946424484253, + "rewards/margins": 1.0979657173156738, + "rewards/rejected": -1.7057603597640991, + "step": 805 + }, + { + "epoch": 0.8509520682862771, + "grad_norm": 30.0, + "learning_rate": 1.3123198088792577e-07, + "log_odds_chosen": 1.6475883722305298, + "log_odds_ratio": -0.37195760011672974, + "logits/chosen": -2.4656014442443848, + "logits/rejected": -2.1296868324279785, + "logps/chosen": -0.5928062200546265, + "logps/rejected": -1.764866828918457, + "loss": 1.2998, + "nll_loss": 0.9278379678726196, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.5928062200546265, + "rewards/margins": 1.172060489654541, + "rewards/rejected": -1.764866828918457, + "step": 810 + }, + { + "epoch": 0.8562048588312541, + "grad_norm": 48.25, + "learning_rate": 1.2228099209237607e-07, + "log_odds_chosen": 1.6707931756973267, + "log_odds_ratio": -0.35219767689704895, + "logits/chosen": -2.416558027267456, + "logits/rejected": -2.1250758171081543, + "logps/chosen": -0.577375054359436, + "logps/rejected": -1.7357622385025024, + "loss": 1.3212, + "nll_loss": 0.969050407409668, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.577375054359436, + "rewards/margins": 1.1583871841430664, + "rewards/rejected": -1.7357622385025024, + "step": 815 + }, + { + "epoch": 0.8614576493762311, + "grad_norm": 48.5, + "learning_rate": 1.1362624814917842e-07, + "log_odds_chosen": 1.469254732131958, + "log_odds_ratio": -0.3808806836605072, + "logits/chosen": -2.428011178970337, + "logits/rejected": -2.1460485458374023, + "logps/chosen": -0.549521803855896, + "logps/rejected": -1.5279179811477661, + "loss": 1.2772, + "nll_loss": 0.8962807655334473, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.549521803855896, + "rewards/margins": 0.9783961176872253, + "rewards/rejected": -1.5279179811477661, + "step": 820 + }, + { + "epoch": 0.8667104399212081, + "grad_norm": 34.5, + "learning_rate": 1.0527067017923652e-07, + "log_odds_chosen": 1.5520200729370117, + "log_odds_ratio": -0.3591814637184143, + "logits/chosen": -2.5619750022888184, + "logits/rejected": -2.3039004802703857, + "logps/chosen": -0.5574966073036194, + "logps/rejected": -1.6048591136932373, + "loss": 1.2779, + "nll_loss": 0.918703556060791, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5574966073036194, + "rewards/margins": 1.0473625659942627, + "rewards/rejected": -1.6048591136932373, + "step": 825 + }, + { + "epoch": 0.8719632304661852, + "grad_norm": 39.0, + "learning_rate": 9.721707832993231e-08, + "log_odds_chosen": 1.7053543329238892, + "log_odds_ratio": -0.33547329902648926, + "logits/chosen": -2.483564615249634, + "logits/rejected": -2.2165513038635254, + "logps/chosen": -0.5104734301567078, + "logps/rejected": -1.664214849472046, + "loss": 1.1955, + "nll_loss": 0.8600661158561707, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5104734301567078, + "rewards/margins": 1.1537415981292725, + "rewards/rejected": -1.664214849472046, + "step": 830 + }, + { + "epoch": 0.8772160210111621, + "grad_norm": 34.25, + "learning_rate": 8.946819082327828e-08, + "log_odds_chosen": 1.5886516571044922, + "log_odds_ratio": -0.3529045283794403, + "logits/chosen": -2.3829362392425537, + "logits/rejected": -2.1005430221557617, + "logps/chosen": -0.5660222172737122, + "logps/rejected": -1.645013451576233, + "loss": 1.2596, + "nll_loss": 0.9066807627677917, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5660222172737122, + "rewards/margins": 1.078991174697876, + "rewards/rejected": -1.645013451576233, + "step": 835 + }, + { + "epoch": 0.8824688115561392, + "grad_norm": 36.75, + "learning_rate": 8.202662303847297e-08, + "log_odds_chosen": 1.7980045080184937, + "log_odds_ratio": -0.3362274765968323, + "logits/chosen": -2.490861654281616, + "logits/rejected": -2.1576590538024902, + "logps/chosen": -0.5558806657791138, + "logps/rejected": -1.7892097234725952, + "loss": 1.2791, + "nll_loss": 0.9428805112838745, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -0.5558806657791138, + "rewards/margins": 1.2333290576934814, + "rewards/rejected": -1.7892097234725952, + "step": 840 + }, + { + "epoch": 0.8877216021011162, + "grad_norm": 55.5, + "learning_rate": 7.48948866291661e-08, + "log_odds_chosen": 1.7913442850112915, + "log_odds_ratio": -0.32501915097236633, + "logits/chosen": -2.5119128227233887, + "logits/rejected": -2.193650960922241, + "logps/chosen": -0.5597657561302185, + "logps/rejected": -1.8090870380401611, + "loss": 1.234, + "nll_loss": 0.9089807271957397, + "rewards/accuracies": 0.890625, + "rewards/chosen": -0.5597657561302185, + "rewards/margins": 1.2493212223052979, + "rewards/rejected": -1.8090870380401611, + "step": 845 + }, + { + "epoch": 0.8929743926460932, + "grad_norm": 39.5, + "learning_rate": 6.80753886757336e-08, + "log_odds_chosen": 1.5741755962371826, + "log_odds_ratio": -0.34667596220970154, + "logits/chosen": -2.4587669372558594, + "logits/rejected": -2.187401056289673, + "logps/chosen": -0.5418094396591187, + "logps/rejected": -1.594808578491211, + "loss": 1.2259, + "nll_loss": 0.8791839480400085, + "rewards/accuracies": 0.8687499761581421, + "rewards/chosen": -0.5418094396591187, + "rewards/margins": 1.0529991388320923, + "rewards/rejected": -1.594808578491211, + "step": 850 + }, + { + "epoch": 0.8982271831910703, + "grad_norm": 25.625, + "learning_rate": 6.157043087284797e-08, + "log_odds_chosen": 1.708722710609436, + "log_odds_ratio": -0.34805282950401306, + "logits/chosen": -2.472571849822998, + "logits/rejected": -2.1671009063720703, + "logps/chosen": -0.5452659130096436, + "logps/rejected": -1.7160043716430664, + "loss": 1.2583, + "nll_loss": 0.9102743268013, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -0.5452659130096436, + "rewards/margins": 1.1707384586334229, + "rewards/rejected": -1.7160043716430664, + "step": 855 + }, + { + "epoch": 0.9034799737360473, + "grad_norm": 30.75, + "learning_rate": 5.538220875261734e-08, + "log_odds_chosen": 1.7142833471298218, + "log_odds_ratio": -0.31549376249313354, + "logits/chosen": -2.5251572132110596, + "logits/rejected": -2.228562593460083, + "logps/chosen": -0.5315389633178711, + "logps/rejected": -1.699853539466858, + "loss": 1.2153, + "nll_loss": 0.8998427391052246, + "rewards/accuracies": 0.8812500238418579, + "rewards/chosen": -0.5315389633178711, + "rewards/margins": 1.1683146953582764, + "rewards/rejected": -1.699853539466858, + "step": 860 + }, + { + "epoch": 0.9087327642810243, + "grad_norm": 50.75, + "learning_rate": 4.9512810943557083e-08, + "log_odds_chosen": 1.7466316223144531, + "log_odds_ratio": -0.3088250756263733, + "logits/chosen": -2.492593288421631, + "logits/rejected": -2.1745035648345947, + "logps/chosen": -0.5664678812026978, + "logps/rejected": -1.7472212314605713, + "loss": 1.2514, + "nll_loss": 0.9425439834594727, + "rewards/accuracies": 0.8968750238418579, + "rewards/chosen": -0.5664678812026978, + "rewards/margins": 1.1807533502578735, + "rewards/rejected": -1.7472212314605713, + "step": 865 + }, + { + "epoch": 0.9139855548260013, + "grad_norm": 42.25, + "learning_rate": 4.396421846564235e-08, + "log_odds_chosen": 1.420175313949585, + "log_odds_ratio": -0.39961543679237366, + "logits/chosen": -2.5364463329315186, + "logits/rejected": -2.272904634475708, + "logps/chosen": -0.5728206038475037, + "logps/rejected": -1.547858715057373, + "loss": 1.3665, + "nll_loss": 0.9668663144111633, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.5728206038475037, + "rewards/margins": 0.9750380516052246, + "rewards/rejected": -1.547858715057373, + "step": 870 + }, + { + "epoch": 0.9192383453709784, + "grad_norm": 88.0, + "learning_rate": 3.87383040616811e-08, + "log_odds_chosen": 1.8361127376556396, + "log_odds_ratio": -0.3314815163612366, + "logits/chosen": -2.5305237770080566, + "logits/rejected": -2.205706834793091, + "logps/chosen": -0.5290949940681458, + "logps/rejected": -1.7841472625732422, + "loss": 1.2038, + "nll_loss": 0.8723037838935852, + "rewards/accuracies": 0.871874988079071, + "rewards/chosen": -0.5290949940681458, + "rewards/margins": 1.2550525665283203, + "rewards/rejected": -1.7841472625732422, + "step": 875 + }, + { + "epoch": 0.9244911359159553, + "grad_norm": 59.25, + "learning_rate": 3.383683156523187e-08, + "log_odds_chosen": 1.5235865116119385, + "log_odds_ratio": -0.3648485541343689, + "logits/chosen": -2.4326975345611572, + "logits/rejected": -2.0849132537841797, + "logps/chosen": -0.5309010744094849, + "logps/rejected": -1.563246726989746, + "loss": 1.2608, + "nll_loss": 0.8959411382675171, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.5309010744094849, + "rewards/margins": 1.0323456525802612, + "rewards/rejected": -1.563246726989746, + "step": 880 + }, + { + "epoch": 0.9297439264609324, + "grad_norm": 33.25, + "learning_rate": 2.9261455305280014e-08, + "log_odds_chosen": 1.715073585510254, + "log_odds_ratio": -0.3189467787742615, + "logits/chosen": -2.4626471996307373, + "logits/rejected": -2.1226587295532227, + "logps/chosen": -0.5422563552856445, + "logps/rejected": -1.709451675415039, + "loss": 1.2541, + "nll_loss": 0.9351384043693542, + "rewards/accuracies": 0.8843749761581421, + "rewards/chosen": -0.5422563552856445, + "rewards/margins": 1.1671955585479736, + "rewards/rejected": -1.709451675415039, + "step": 885 + }, + { + "epoch": 0.9349967170059094, + "grad_norm": 60.0, + "learning_rate": 2.5013719547874788e-08, + "log_odds_chosen": 1.6406991481781006, + "log_odds_ratio": -0.37183278799057007, + "logits/chosen": -2.503505229949951, + "logits/rejected": -2.177072525024414, + "logps/chosen": -0.5782598257064819, + "logps/rejected": -1.7351022958755493, + "loss": 1.2879, + "nll_loss": 0.9161151051521301, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5782598257064819, + "rewards/margins": 1.1568424701690674, + "rewards/rejected": -1.7351022958755493, + "step": 890 + }, + { + "epoch": 0.9402495075508864, + "grad_norm": 30.5, + "learning_rate": 2.1095057974913177e-08, + "log_odds_chosen": 1.5425198078155518, + "log_odds_ratio": -0.3476109504699707, + "logits/chosen": -2.463806390762329, + "logits/rejected": -2.2360615730285645, + "logps/chosen": -0.5494548082351685, + "logps/rejected": -1.5607731342315674, + "loss": 1.2287, + "nll_loss": 0.8811271786689758, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.5494548082351685, + "rewards/margins": 1.011318325996399, + "rewards/rejected": -1.5607731342315674, + "step": 895 + }, + { + "epoch": 0.9455022980958634, + "grad_norm": 40.25, + "learning_rate": 1.7506793200248504e-08, + "log_odds_chosen": 1.79372239112854, + "log_odds_ratio": -0.34891271591186523, + "logits/chosen": -2.4137704372406006, + "logits/rejected": -2.1525025367736816, + "logps/chosen": -0.5806652307510376, + "logps/rejected": -1.8389291763305664, + "loss": 1.2788, + "nll_loss": 0.9298731684684753, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5806652307510376, + "rewards/margins": 1.2582640647888184, + "rewards/rejected": -1.8389291763305664, + "step": 900 + }, + { + "epoch": 0.9507550886408405, + "grad_norm": 33.75, + "learning_rate": 1.4250136323285866e-08, + "log_odds_chosen": 1.7694854736328125, + "log_odds_ratio": -0.339056134223938, + "logits/chosen": -2.458627223968506, + "logits/rejected": -2.133309841156006, + "logps/chosen": -0.5246182680130005, + "logps/rejected": -1.748004674911499, + "loss": 1.2399, + "nll_loss": 0.9008275866508484, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -0.5246182680130005, + "rewards/margins": 1.2233861684799194, + "rewards/rejected": -1.748004674911499, + "step": 905 + }, + { + "epoch": 0.9560078791858174, + "grad_norm": 42.25, + "learning_rate": 1.1326186520215885e-08, + "log_odds_chosen": 1.4994810819625854, + "log_odds_ratio": -0.3889666199684143, + "logits/chosen": -2.42987322807312, + "logits/rejected": -2.2474777698516846, + "logps/chosen": -0.5686417818069458, + "logps/rejected": -1.6017091274261475, + "loss": 1.3525, + "nll_loss": 0.9635759592056274, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.5686417818069458, + "rewards/margins": 1.0330675840377808, + "rewards/rejected": -1.6017091274261475, + "step": 910 + }, + { + "epoch": 0.9612606697307945, + "grad_norm": 77.0, + "learning_rate": 8.735930673024805e-09, + "log_odds_chosen": 1.6517369747161865, + "log_odds_ratio": -0.34624212980270386, + "logits/chosen": -2.3800384998321533, + "logits/rejected": -2.0897443294525146, + "logps/chosen": -0.5255088806152344, + "logps/rejected": -1.6485977172851562, + "loss": 1.3009, + "nll_loss": 0.9546435475349426, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -0.5255088806152344, + "rewards/margins": 1.1230888366699219, + "rewards/rejected": -1.6485977172851562, + "step": 915 + }, + { + "epoch": 0.9665134602757715, + "grad_norm": 30.875, + "learning_rate": 6.480243036404598e-09, + "log_odds_chosen": 1.8001991510391235, + "log_odds_ratio": -0.3332251012325287, + "logits/chosen": -2.499809980392456, + "logits/rejected": -2.291926860809326, + "logps/chosen": -0.5624955892562866, + "logps/rejected": -1.824375867843628, + "loss": 1.2743, + "nll_loss": 0.941113293170929, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.5624955892562866, + "rewards/margins": 1.2618802785873413, + "rewards/rejected": -1.824375867843628, + "step": 920 + }, + { + "epoch": 0.9717662508207485, + "grad_norm": 44.75, + "learning_rate": 4.559884942677783e-09, + "log_odds_chosen": 1.4665955305099487, + "log_odds_ratio": -0.38308554887771606, + "logits/chosen": -2.397916078567505, + "logits/rejected": -2.142017126083374, + "logps/chosen": -0.5283843874931335, + "logps/rejected": -1.4880872964859009, + "loss": 1.2288, + "nll_loss": 0.8457143902778625, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.5283843874931335, + "rewards/margins": 0.9597029685974121, + "rewards/rejected": -1.4880872964859009, + "step": 925 + }, + { + "epoch": 0.9770190413657256, + "grad_norm": 47.75, + "learning_rate": 2.9755045448351944e-09, + "log_odds_chosen": 1.4579670429229736, + "log_odds_ratio": -0.3762872815132141, + "logits/chosen": -2.4584195613861084, + "logits/rejected": -2.198525905609131, + "logps/chosen": -0.5691961050033569, + "logps/rejected": -1.5534415245056152, + "loss": 1.2789, + "nll_loss": 0.9026187062263489, + "rewards/accuracies": 0.8187500238418579, + "rewards/chosen": -0.5691961050033569, + "rewards/margins": 0.9842453002929688, + "rewards/rejected": -1.5534415245056152, + "step": 930 + }, + { + "epoch": 0.9822718319107026, + "grad_norm": 49.0, + "learning_rate": 1.7276365977730856e-09, + "log_odds_chosen": 1.5441417694091797, + "log_odds_ratio": -0.3624028265476227, + "logits/chosen": -2.535742998123169, + "logits/rejected": -2.1748859882354736, + "logps/chosen": -0.5510035753250122, + "logps/rejected": -1.6074680089950562, + "loss": 1.2792, + "nll_loss": 0.9167704582214355, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -0.5510035753250122, + "rewards/margins": 1.056464433670044, + "rewards/rejected": -1.6074680089950562, + "step": 935 + }, + { + "epoch": 0.9875246224556796, + "grad_norm": 50.0, + "learning_rate": 8.16702277804504e-10, + "log_odds_chosen": 1.6150617599487305, + "log_odds_ratio": -0.3433099687099457, + "logits/chosen": -2.4907350540161133, + "logits/rejected": -2.166508674621582, + "logps/chosen": -0.5353943109512329, + "logps/rejected": -1.6448442935943604, + "loss": 1.2318, + "nll_loss": 0.888446033000946, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -0.5353943109512329, + "rewards/margins": 1.1094499826431274, + "rewards/rejected": -1.6448442935943604, + "step": 940 + }, + { + "epoch": 0.9927774130006566, + "grad_norm": 44.75, + "learning_rate": 2.430090405054486e-10, + "log_odds_chosen": 1.457880973815918, + "log_odds_ratio": -0.36118173599243164, + "logits/chosen": -2.4720263481140137, + "logits/rejected": -2.178345203399658, + "logps/chosen": -0.5418224334716797, + "logps/rejected": -1.4925849437713623, + "loss": 1.309, + "nll_loss": 0.9478532671928406, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.5418224334716797, + "rewards/margins": 0.9507624506950378, + "rewards/rejected": -1.4925849437713623, + "step": 945 + }, + { + "epoch": 0.9980302035456337, + "grad_norm": 33.0, + "learning_rate": 6.750516943321294e-12, + "log_odds_chosen": 1.7491207122802734, + "log_odds_ratio": -0.319837361574173, + "logits/chosen": -2.4439542293548584, + "logits/rejected": -2.1569535732269287, + "logps/chosen": -0.5160128474235535, + "logps/rejected": -1.7111313343048096, + "loss": 1.2106, + "nll_loss": 0.8908060193061829, + "rewards/accuracies": 0.871874988079071, + "rewards/chosen": -0.5160128474235535, + "rewards/margins": 1.1951183080673218, + "rewards/rejected": -1.7111313343048096, + "step": 950 + }, + { + "epoch": 0.999080761654629, + "step": 951, + "total_flos": 0.0, + "train_loss": 1.3879666121600178, + "train_runtime": 22584.718, + "train_samples_per_second": 2.697, + "train_steps_per_second": 0.042 + } + ], + "logging_steps": 5, + "max_steps": 951, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}