diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,17607 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.995276334435522, + "eval_steps": 50, + "global_step": 5290, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.001889466225791214, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits": -1.2554917335510254, + "logps": -90.04276275634766, + "loss": 0.0748, + "objective": 0.07446074485778809, + "ranking_idealized": 0.4375, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.005146026611328125, + "step": 1 + }, + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.00944733112895607, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits": -1.3566728830337524, + "logps": -93.35393524169922, + "loss": 0.0753, + "objective": 0.07558518648147583, + "ranking_idealized": 0.5859375, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.006270468235015869, + "step": 5 + }, + { + "dpo_loss": 0.6931472420692444, + "epoch": 0.01889466225791214, + "grad_norm": 96.03528762444088, + "learning_rate": 4.7258979206049145e-09, + "logits": -1.314980149269104, + "logps": -92.82933044433594, + "loss": 0.0748, + "objective": 0.074301578104496, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.004986858461052179, + "step": 10 + }, + { + "dpo_loss": 0.6931472420692444, + "epoch": 0.02834199338686821, + "grad_norm": 106.03244133035287, + "learning_rate": 8.506616257088846e-09, + "logits": -1.303232192993164, + "logps": -91.14088439941406, + "loss": 0.0751, + "objective": 0.07503427565097809, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.005719560198485851, + "step": 15 + }, + { + "dpo_loss": 0.6926484704017639, + "epoch": 0.03778932451582428, + "grad_norm": 96.31144733105825, + "learning_rate": 1.323251417769376e-08, + "logits": -1.454129934310913, + "logps": -93.50907897949219, + "loss": 0.0796, + "objective": 0.07954580336809158, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.01028094906359911, + "step": 20 + }, + { + "dpo_loss": 0.6928218007087708, + "epoch": 0.04723665564478035, + "grad_norm": 92.14233006839046, + "learning_rate": 1.7958412098298676e-08, + "logits": -1.3390535116195679, + "logps": -90.30491638183594, + "loss": 0.08, + "objective": 0.07990650832653046, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.01062433235347271, + "step": 25 + }, + { + "dpo_loss": 0.6925551295280457, + "epoch": 0.05668398677373642, + "grad_norm": 91.05770478423896, + "learning_rate": 2.268431001890359e-08, + "logits": -1.3069889545440674, + "logps": -91.43885803222656, + "loss": 0.0801, + "objective": 0.08068785816431046, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.011432340368628502, + "step": 30 + }, + { + "dpo_loss": 0.6934799551963806, + "epoch": 0.06613131790269249, + "grad_norm": 95.27624930021037, + "learning_rate": 2.7410207939508506e-08, + "logits": -1.283834457397461, + "logps": -92.07040405273438, + "loss": 0.081, + "objective": 0.08080057054758072, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.011452572420239449, + "step": 35 + }, + { + "dpo_loss": 0.6932560205459595, + "epoch": 0.07557864903164856, + "grad_norm": 95.669774962342, + "learning_rate": 3.213610586011342e-08, + "logits": -1.4033466577529907, + "logps": -92.30833435058594, + "loss": 0.0809, + "objective": 0.08118347078561783, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.011857859790325165, + "step": 40 + }, + { + "dpo_loss": 0.6935173273086548, + "epoch": 0.08502598016060463, + "grad_norm": 87.06329457602646, + "learning_rate": 3.6862003780718335e-08, + "logits": -1.3774633407592773, + "logps": -91.71311950683594, + "loss": 0.0798, + "objective": 0.07959654182195663, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.010244803503155708, + "step": 45 + }, + { + "dpo_loss": 0.6932135820388794, + "epoch": 0.0944733112895607, + "grad_norm": 90.79462025433014, + "learning_rate": 4.158790170132325e-08, + "logits": -1.343457579612732, + "logps": -91.4620132446289, + "loss": 0.0798, + "objective": 0.0787525326013565, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.00943115632981062, + "step": 50 + }, + { + "epoch": 0.0944733112895607, + "eval_dpo_loss": 0.6932454705238342, + "eval_logits": -1.3072285652160645, + "eval_logps": -98.50404357910156, + "eval_loss": 0.08073805272579193, + "eval_objective": 0.08079613000154495, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.01147159282118082, + "eval_runtime": 159.4296, + "eval_samples_per_second": 36.317, + "eval_steps_per_second": 3.03, + "step": 50 + }, + { + "dpo_loss": 0.6931546926498413, + "epoch": 0.10392064241851677, + "grad_norm": 95.1381387245793, + "learning_rate": 4.6313799621928164e-08, + "logits": -1.3497530221939087, + "logps": -91.14179229736328, + "loss": 0.0799, + "objective": 0.08008146286010742, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.010765998624265194, + "step": 55 + }, + { + "dpo_loss": 0.6940718293190002, + "epoch": 0.11336797354747284, + "grad_norm": 85.84378688398391, + "learning_rate": 5.103969754253308e-08, + "logits": -1.326425313949585, + "logps": -91.68214416503906, + "loss": 0.08, + "objective": 0.08076535165309906, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.011358163319528103, + "step": 60 + }, + { + "dpo_loss": 0.6940113306045532, + "epoch": 0.12281530467642891, + "grad_norm": 94.98552874921538, + "learning_rate": 5.576559546313799e-08, + "logits": -1.3491050004959106, + "logps": -91.77569580078125, + "loss": 0.0814, + "objective": 0.08189814537763596, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.012497019954025745, + "step": 65 + }, + { + "dpo_loss": 0.693263828754425, + "epoch": 0.13226263580538497, + "grad_norm": 107.33910773824219, + "learning_rate": 6.049149338374291e-08, + "logits": -1.3523788452148438, + "logps": -91.95494842529297, + "loss": 0.0801, + "objective": 0.08068925887346268, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.01136286836117506, + "step": 70 + }, + { + "dpo_loss": 0.6932634115219116, + "epoch": 0.14170996693434104, + "grad_norm": 97.18132543333557, + "learning_rate": 6.521739130434782e-08, + "logits": -1.3806952238082886, + "logps": -93.421630859375, + "loss": 0.0805, + "objective": 0.08125968277454376, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.011933336034417152, + "step": 75 + }, + { + "dpo_loss": 0.6934519410133362, + "epoch": 0.1511572980632971, + "grad_norm": 90.58288269347415, + "learning_rate": 6.994328922495274e-08, + "logits": -1.2946566343307495, + "logps": -93.24769592285156, + "loss": 0.0808, + "objective": 0.08052171766757965, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.011176527477800846, + "step": 80 + }, + { + "dpo_loss": 0.6921312212944031, + "epoch": 0.16060462919225318, + "grad_norm": 88.36217330236191, + "learning_rate": 7.466918714555766e-08, + "logits": -1.3575432300567627, + "logps": -92.65788269042969, + "loss": 0.0806, + "objective": 0.08070734888315201, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.011494224891066551, + "step": 85 + }, + { + "dpo_loss": 0.6927862167358398, + "epoch": 0.17005196032120926, + "grad_norm": 105.19633396106833, + "learning_rate": 7.939508506616256e-08, + "logits": -1.2258633375167847, + "logps": -93.01042175292969, + "loss": 0.0798, + "objective": 0.07944050431251526, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.01016187947243452, + "step": 90 + }, + { + "dpo_loss": 0.6920500993728638, + "epoch": 0.17949929145016533, + "grad_norm": 96.30196423359166, + "learning_rate": 8.412098298676749e-08, + "logits": -1.3657176494598389, + "logps": -89.35536193847656, + "loss": 0.0807, + "objective": 0.08035007864236832, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.011145063675940037, + "step": 95 + }, + { + "dpo_loss": 0.6925091743469238, + "epoch": 0.1889466225791214, + "grad_norm": 93.35552835482645, + "learning_rate": 8.88468809073724e-08, + "logits": -1.4030728340148926, + "logps": -93.86295318603516, + "loss": 0.081, + "objective": 0.08110513538122177, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.543749988079071, + "regularize": 0.01185421273112297, + "step": 100 + }, + { + "epoch": 0.1889466225791214, + "eval_dpo_loss": 0.6933540105819702, + "eval_logits": -1.3084157705307007, + "eval_logps": -98.49323272705078, + "eval_loss": 0.0818963274359703, + "eval_objective": 0.08186686784029007, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.012531458400189877, + "eval_runtime": 158.2662, + "eval_samples_per_second": 36.584, + "eval_steps_per_second": 3.052, + "step": 100 + }, + { + "dpo_loss": 0.691580593585968, + "epoch": 0.19839395370807747, + "grad_norm": 91.03790534738991, + "learning_rate": 9.357277882797732e-08, + "logits": -1.3384134769439697, + "logps": -91.71461486816406, + "loss": 0.0805, + "objective": 0.08078125864267349, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.011623199097812176, + "step": 105 + }, + { + "dpo_loss": 0.6921985149383545, + "epoch": 0.20784128483703354, + "grad_norm": 93.5398896154081, + "learning_rate": 9.829867674858222e-08, + "logits": -1.2784953117370605, + "logps": -91.62667083740234, + "loss": 0.081, + "objective": 0.0812302827835083, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.012010429054498672, + "step": 110 + }, + { + "dpo_loss": 0.6925197839736938, + "epoch": 0.2172886159659896, + "grad_norm": 114.1699761501263, + "learning_rate": 1.0302457466918714e-07, + "logits": -1.321418285369873, + "logps": -93.04322814941406, + "loss": 0.0809, + "objective": 0.08117768913507462, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.01192571222782135, + "step": 115 + }, + { + "dpo_loss": 0.691599428653717, + "epoch": 0.22673594709494568, + "grad_norm": 100.677720262526, + "learning_rate": 1.0775047258979206e-07, + "logits": -1.3501144647598267, + "logps": -91.29630279541016, + "loss": 0.0822, + "objective": 0.08226821571588516, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.013108278624713421, + "step": 120 + }, + { + "dpo_loss": 0.6913750767707825, + "epoch": 0.23618327822390175, + "grad_norm": 107.79833398829818, + "learning_rate": 1.1247637051039697e-07, + "logits": -1.3559753894805908, + "logps": -91.870849609375, + "loss": 0.0816, + "objective": 0.08255833387374878, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.013420837000012398, + "step": 125 + }, + { + "dpo_loss": 0.6917082667350769, + "epoch": 0.24563060935285783, + "grad_norm": 87.3599630697971, + "learning_rate": 1.1720226843100187e-07, + "logits": -1.3852940797805786, + "logps": -93.1257095336914, + "loss": 0.0821, + "objective": 0.08141469210386276, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.012243862263858318, + "step": 130 + }, + { + "dpo_loss": 0.6916366815567017, + "epoch": 0.25507794048181387, + "grad_norm": 85.96528652950495, + "learning_rate": 1.219281663516068e-07, + "logits": -1.4204727411270142, + "logps": -91.48652648925781, + "loss": 0.0819, + "objective": 0.08242340385913849, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.013259735889732838, + "step": 135 + }, + { + "dpo_loss": 0.6917210221290588, + "epoch": 0.26452527161076994, + "grad_norm": 87.464567760636, + "learning_rate": 1.266540642722117e-07, + "logits": -1.4328950643539429, + "logps": -91.34432220458984, + "loss": 0.0815, + "objective": 0.08194929361343384, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.012777194380760193, + "step": 140 + }, + { + "dpo_loss": 0.6909077167510986, + "epoch": 0.273972602739726, + "grad_norm": 98.17150093607263, + "learning_rate": 1.3137996219281664e-07, + "logits": -1.2796088457107544, + "logps": -94.21308898925781, + "loss": 0.0841, + "objective": 0.08374190330505371, + "ranking_idealized": 0.4312500059604645, + "ranking_idealized_expo": 0.41874998807907104, + "ranking_simple": 0.41874998807907104, + "regularize": 0.014651129022240639, + "step": 145 + }, + { + "dpo_loss": 0.6928467154502869, + "epoch": 0.2834199338686821, + "grad_norm": 99.65212586497037, + "learning_rate": 1.3610586011342153e-07, + "logits": -1.299617052078247, + "logps": -88.99101257324219, + "loss": 0.0839, + "objective": 0.08220638334751129, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.012921703048050404, + "step": 150 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.6930533647537231, + "eval_logits": -1.3078515529632568, + "eval_logps": -98.5417251586914, + "eval_loss": 0.08232778310775757, + "eval_objective": 0.08233249187469482, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5232919454574585, + "eval_regularize": 0.013027146458625793, + "eval_runtime": 157.9508, + "eval_samples_per_second": 36.657, + "eval_steps_per_second": 3.058, + "step": 150 + }, + { + "dpo_loss": 0.6929137110710144, + "epoch": 0.29286726499763815, + "grad_norm": 118.8781397472593, + "learning_rate": 1.4083175803402647e-07, + "logits": -1.4878828525543213, + "logps": -93.26710510253906, + "loss": 0.0856, + "objective": 0.08600855618715286, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.016717184334993362, + "step": 155 + }, + { + "dpo_loss": 0.6932097673416138, + "epoch": 0.3023145961265942, + "grad_norm": 97.86517664421345, + "learning_rate": 1.455576559546314e-07, + "logits": -1.348954200744629, + "logps": -93.52423858642578, + "loss": 0.0843, + "objective": 0.0843389481306076, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.015017963945865631, + "step": 160 + }, + { + "dpo_loss": 0.6927060484886169, + "epoch": 0.3117619272555503, + "grad_norm": 96.31846370142493, + "learning_rate": 1.5028355387523628e-07, + "logits": -1.3225867748260498, + "logps": -92.67644500732422, + "loss": 0.0848, + "objective": 0.08455438911914825, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.01528378389775753, + "step": 165 + }, + { + "dpo_loss": 0.6917023658752441, + "epoch": 0.32120925838450637, + "grad_norm": 104.27989577822625, + "learning_rate": 1.5500945179584122e-07, + "logits": -1.3173997402191162, + "logps": -90.8909912109375, + "loss": 0.085, + "objective": 0.08586680889129639, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.01669657602906227, + "step": 170 + }, + { + "dpo_loss": 0.6916298270225525, + "epoch": 0.33065658951346244, + "grad_norm": 83.6068402947374, + "learning_rate": 1.597353497164461e-07, + "logits": -1.3807752132415771, + "logps": -92.2252426147461, + "loss": 0.0851, + "objective": 0.08548159152269363, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.016318608075380325, + "step": 175 + }, + { + "dpo_loss": 0.691738486289978, + "epoch": 0.3401039206424185, + "grad_norm": 105.18661952420578, + "learning_rate": 1.6446124763705102e-07, + "logits": -1.3236361742019653, + "logps": -92.10786437988281, + "loss": 0.0869, + "objective": 0.08777324855327606, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.01859939657151699, + "step": 180 + }, + { + "dpo_loss": 0.6920045018196106, + "epoch": 0.3495512517713746, + "grad_norm": 96.87931928806626, + "learning_rate": 1.6918714555765596e-07, + "logits": -1.3653669357299805, + "logps": -94.2525405883789, + "loss": 0.0866, + "objective": 0.08760502189397812, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.4625000059604645, + "regularize": 0.01840457320213318, + "step": 185 + }, + { + "dpo_loss": 0.6901124715805054, + "epoch": 0.35899858290033065, + "grad_norm": 101.71062430711056, + "learning_rate": 1.7391304347826085e-07, + "logits": -1.2960126399993896, + "logps": -94.1360855102539, + "loss": 0.0873, + "objective": 0.08737105876207352, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.018359806388616562, + "step": 190 + }, + { + "dpo_loss": 0.693382740020752, + "epoch": 0.3684459140292867, + "grad_norm": 108.0065626001207, + "learning_rate": 1.786389413988658e-07, + "logits": -1.3760709762573242, + "logps": -92.12065124511719, + "loss": 0.088, + "objective": 0.08760654181241989, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.018268268555402756, + "step": 195 + }, + { + "dpo_loss": 0.6914373636245728, + "epoch": 0.3778932451582428, + "grad_norm": 124.97011802709235, + "learning_rate": 1.833648393194707e-07, + "logits": -1.4247692823410034, + "logps": -92.43400573730469, + "loss": 0.0891, + "objective": 0.09008407592773438, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.02094034105539322, + "step": 200 + }, + { + "epoch": 0.3778932451582428, + "eval_dpo_loss": 0.6926960945129395, + "eval_logits": -1.3062703609466553, + "eval_logps": -98.65170288085938, + "eval_loss": 0.08403145521879196, + "eval_objective": 0.0839371606707573, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.014667555689811707, + "eval_runtime": 158.3979, + "eval_samples_per_second": 36.554, + "eval_steps_per_second": 3.049, + "step": 200 + }, + { + "dpo_loss": 0.6928633451461792, + "epoch": 0.38734057628719887, + "grad_norm": 102.87640442387767, + "learning_rate": 1.880907372400756e-07, + "logits": -1.2827486991882324, + "logps": -94.01448059082031, + "loss": 0.089, + "objective": 0.0891382023692131, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.019851867109537125, + "step": 205 + }, + { + "dpo_loss": 0.6920486688613892, + "epoch": 0.39678790741615494, + "grad_norm": 90.49515192046192, + "learning_rate": 1.9281663516068053e-07, + "logits": -1.3010823726654053, + "logps": -92.64601135253906, + "loss": 0.0896, + "objective": 0.09058623015880585, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.021381361410021782, + "step": 210 + }, + { + "dpo_loss": 0.6928970813751221, + "epoch": 0.406235238545111, + "grad_norm": 110.965752990135, + "learning_rate": 1.9754253308128542e-07, + "logits": -1.3557052612304688, + "logps": -93.74603271484375, + "loss": 0.0935, + "objective": 0.09472814947366714, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.02543843351304531, + "step": 215 + }, + { + "dpo_loss": 0.6901801824569702, + "epoch": 0.4156825696740671, + "grad_norm": 111.3886176276061, + "learning_rate": 2.0226843100189034e-07, + "logits": -1.4235761165618896, + "logps": -94.99393463134766, + "loss": 0.0967, + "objective": 0.09937222301959991, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.030354226008057594, + "step": 220 + }, + { + "dpo_loss": 0.6928871870040894, + "epoch": 0.42512990080302315, + "grad_norm": 92.08065964120154, + "learning_rate": 2.0699432892249528e-07, + "logits": -1.3547779321670532, + "logps": -92.96757507324219, + "loss": 0.0922, + "objective": 0.09014561027288437, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.020856894552707672, + "step": 225 + }, + { + "dpo_loss": 0.6924599409103394, + "epoch": 0.4345772319319792, + "grad_norm": 100.38486388141452, + "learning_rate": 2.1172022684310017e-07, + "logits": -1.351255178451538, + "logps": -91.6011734008789, + "loss": 0.0928, + "objective": 0.0948859453201294, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.02563994750380516, + "step": 230 + }, + { + "dpo_loss": 0.6900066137313843, + "epoch": 0.4440245630609353, + "grad_norm": 97.70828365629224, + "learning_rate": 2.164461247637051e-07, + "logits": -1.4029417037963867, + "logps": -92.07453918457031, + "loss": 0.0961, + "objective": 0.09497665613889694, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.025975991040468216, + "step": 235 + }, + { + "dpo_loss": 0.6901751756668091, + "epoch": 0.45347189418989137, + "grad_norm": 110.99658377067563, + "learning_rate": 2.2117202268431002e-07, + "logits": -1.3475522994995117, + "logps": -93.2119140625, + "loss": 0.0951, + "objective": 0.0938529521226883, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48124998807907104, + "regularize": 0.024835431948304176, + "step": 240 + }, + { + "dpo_loss": 0.691163957118988, + "epoch": 0.46291922531884744, + "grad_norm": 100.7251612930126, + "learning_rate": 2.258979206049149e-07, + "logits": -1.4657642841339111, + "logps": -92.83688354492188, + "loss": 0.0988, + "objective": 0.09848640859127045, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.029370009899139404, + "step": 245 + }, + { + "dpo_loss": 0.689066469669342, + "epoch": 0.4723665564478035, + "grad_norm": 132.46845729407312, + "learning_rate": 2.3062381852551985e-07, + "logits": -1.3548660278320312, + "logps": -93.12916564941406, + "loss": 0.1019, + "objective": 0.09985167533159256, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48750001192092896, + "regularize": 0.03094502165913582, + "step": 250 + }, + { + "epoch": 0.4723665564478035, + "eval_dpo_loss": 0.6928930878639221, + "eval_logits": -1.3058041334152222, + "eval_logps": -98.67533111572266, + "eval_loss": 0.08647485077381134, + "eval_objective": 0.08637857437133789, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5232919454574585, + "eval_regularize": 0.01708926260471344, + "eval_runtime": 158.8308, + "eval_samples_per_second": 36.454, + "eval_steps_per_second": 3.041, + "step": 250 + }, + { + "dpo_loss": 0.6961080431938171, + "epoch": 0.4818138875767596, + "grad_norm": 103.43649747338549, + "learning_rate": 2.3534971644612476e-07, + "logits": -1.4218876361846924, + "logps": -91.47744750976562, + "loss": 0.0992, + "objective": 0.09804344922304153, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.028432641178369522, + "step": 255 + }, + { + "dpo_loss": 0.6928529739379883, + "epoch": 0.49126121870571565, + "grad_norm": 89.70604380119666, + "learning_rate": 2.400756143667297e-07, + "logits": -1.355158805847168, + "logps": -94.02273559570312, + "loss": 0.1005, + "objective": 0.10035960376262665, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.031074311584234238, + "step": 260 + }, + { + "dpo_loss": 0.6879535913467407, + "epoch": 0.5007085498346717, + "grad_norm": 95.56790417868868, + "learning_rate": 2.448015122873346e-07, + "logits": -1.2801698446273804, + "logps": -93.42267608642578, + "loss": 0.1003, + "objective": 0.09648537635803223, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.027690013870596886, + "step": 265 + }, + { + "dpo_loss": 0.6904926300048828, + "epoch": 0.5101558809636277, + "grad_norm": 100.34085108311201, + "learning_rate": 2.495274102079395e-07, + "logits": -1.2917563915252686, + "logps": -92.55985260009766, + "loss": 0.1033, + "objective": 0.10361208021640778, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.034562818706035614, + "step": 270 + }, + { + "dpo_loss": 0.6918493509292603, + "epoch": 0.5196032120925839, + "grad_norm": 102.50008848820325, + "learning_rate": 2.542533081285444e-07, + "logits": -1.304248571395874, + "logps": -93.20156860351562, + "loss": 0.1024, + "objective": 0.1000850573182106, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.030900120735168457, + "step": 275 + }, + { + "dpo_loss": 0.6872326731681824, + "epoch": 0.5290505432215399, + "grad_norm": 91.85551703188915, + "learning_rate": 2.589792060491493e-07, + "logits": -1.4421064853668213, + "logps": -91.59370422363281, + "loss": 0.1063, + "objective": 0.10864508152008057, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.0399218313395977, + "step": 280 + }, + { + "dpo_loss": 0.6879855394363403, + "epoch": 0.538497874350496, + "grad_norm": 102.22439237075963, + "learning_rate": 2.6370510396975425e-07, + "logits": -1.3105735778808594, + "logps": -92.08065032958984, + "loss": 0.1029, + "objective": 0.101453498005867, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.03265494108200073, + "step": 285 + }, + { + "dpo_loss": 0.6928714513778687, + "epoch": 0.547945205479452, + "grad_norm": 97.93507304637171, + "learning_rate": 2.6843100189035917e-07, + "logits": -1.302470326423645, + "logps": -91.57052612304688, + "loss": 0.1071, + "objective": 0.10190291702747345, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.03261576220393181, + "step": 290 + }, + { + "dpo_loss": 0.6913698315620422, + "epoch": 0.5573925366084082, + "grad_norm": 93.2934267056007, + "learning_rate": 2.731568998109641e-07, + "logits": -1.309118628501892, + "logps": -91.43770599365234, + "loss": 0.1105, + "objective": 0.11521486192941666, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.04607786983251572, + "step": 295 + }, + { + "dpo_loss": 0.689521074295044, + "epoch": 0.5668398677373642, + "grad_norm": 87.42650287342656, + "learning_rate": 2.77882797731569e-07, + "logits": -1.3558248281478882, + "logps": -93.9669418334961, + "loss": 0.1094, + "objective": 0.11233736574649811, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.04338525980710983, + "step": 300 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 0.6925599575042725, + "eval_logits": -1.308714509010315, + "eval_logps": -98.34481811523438, + "eval_loss": 0.09280507266521454, + "eval_objective": 0.09301475435495377, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.02375875413417816, + "eval_runtime": 160.5672, + "eval_samples_per_second": 36.06, + "eval_steps_per_second": 3.008, + "step": 300 + }, + { + "dpo_loss": 0.6917223930358887, + "epoch": 0.5762871988663203, + "grad_norm": 111.48363245589638, + "learning_rate": 2.8260869565217386e-07, + "logits": -1.283851981163025, + "logps": -92.88015747070312, + "loss": 0.1157, + "objective": 0.11618302762508392, + "ranking_idealized": 0.45625001192092896, + "ranking_idealized_expo": 0.4312500059604645, + "ranking_simple": 0.4312500059604645, + "regularize": 0.047010790556669235, + "step": 305 + }, + { + "dpo_loss": 0.6910533905029297, + "epoch": 0.5857345299952763, + "grad_norm": 122.66031310330784, + "learning_rate": 2.873345935727788e-07, + "logits": -1.3154358863830566, + "logps": -91.87257385253906, + "loss": 0.1096, + "objective": 0.10647080838680267, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.606249988079071, + "ranking_simple": 0.612500011920929, + "regularize": 0.03736545890569687, + "step": 310 + }, + { + "dpo_loss": 0.6930454969406128, + "epoch": 0.5951818611242324, + "grad_norm": 97.36343792538779, + "learning_rate": 2.911153119092628e-07, + "logits": -1.2765394449234009, + "logps": -93.13780212402344, + "loss": 0.1144, + "objective": 0.1138184517621994, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.044513899832963943, + "step": 315 + }, + { + "dpo_loss": 0.689300000667572, + "epoch": 0.6046291922531885, + "grad_norm": 101.14091961784702, + "learning_rate": 2.9584120982986764e-07, + "logits": -1.3148740530014038, + "logps": -91.87626647949219, + "loss": 0.1129, + "objective": 0.1134437695145607, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4749999940395355, + "regularize": 0.0445137694478035, + "step": 320 + }, + { + "dpo_loss": 0.6846424341201782, + "epoch": 0.6140765233821446, + "grad_norm": 125.67390238555228, + "learning_rate": 3.0056710775047255e-07, + "logits": -1.3823215961456299, + "logps": -91.69265747070312, + "loss": 0.1176, + "objective": 0.12137912213802338, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.05291489511728287, + "step": 325 + }, + { + "dpo_loss": 0.690563440322876, + "epoch": 0.6235238545111006, + "grad_norm": 103.15774875968413, + "learning_rate": 3.0529300567107747e-07, + "logits": -1.376556396484375, + "logps": -92.85224914550781, + "loss": 0.1143, + "objective": 0.11595580726861954, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.0468994677066803, + "step": 330 + }, + { + "dpo_loss": 0.6915899515151978, + "epoch": 0.6329711856400567, + "grad_norm": 93.95463269073304, + "learning_rate": 3.1001890359168243e-07, + "logits": -1.3574047088623047, + "logps": -93.62723541259766, + "loss": 0.1184, + "objective": 0.11612270027399063, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.04696370288729668, + "step": 335 + }, + { + "dpo_loss": 0.6864021420478821, + "epoch": 0.6424185167690127, + "grad_norm": 98.76696875690139, + "learning_rate": 3.1474480151228735e-07, + "logits": -1.3963720798492432, + "logps": -92.58355712890625, + "loss": 0.1177, + "objective": 0.11631828546524048, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.04767807573080063, + "step": 340 + }, + { + "dpo_loss": 0.688799262046814, + "epoch": 0.6518658478979689, + "grad_norm": 95.66225766173847, + "learning_rate": 3.194706994328922e-07, + "logits": -1.336104154586792, + "logps": -91.61724090576172, + "loss": 0.1219, + "objective": 0.12555508315563202, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.05667515844106674, + "step": 345 + }, + { + "dpo_loss": 0.6907819509506226, + "epoch": 0.6613131790269249, + "grad_norm": 102.20491139749774, + "learning_rate": 3.241965973534971e-07, + "logits": -1.2857093811035156, + "logps": -94.47968292236328, + "loss": 0.1267, + "objective": 0.12202408164739609, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.05294587463140488, + "step": 350 + }, + { + "epoch": 0.6613131790269249, + "eval_dpo_loss": 0.694220244884491, + "eval_logits": -1.3096948862075806, + "eval_logps": -98.48033142089844, + "eval_loss": 0.09951319545507431, + "eval_objective": 0.10041753947734833, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.030995529145002365, + "eval_runtime": 158.8978, + "eval_samples_per_second": 36.439, + "eval_steps_per_second": 3.04, + "step": 350 + }, + { + "dpo_loss": 0.6890446543693542, + "epoch": 0.670760510155881, + "grad_norm": 107.47087696174563, + "learning_rate": 3.2892249527410204e-07, + "logits": -1.3915925025939941, + "logps": -93.3402099609375, + "loss": 0.1234, + "objective": 0.12433197349309921, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.055427514016628265, + "step": 355 + }, + { + "dpo_loss": 0.6911527514457703, + "epoch": 0.680207841284837, + "grad_norm": 102.70324138647139, + "learning_rate": 3.33648393194707e-07, + "logits": -1.3372268676757812, + "logps": -92.9881591796875, + "loss": 0.121, + "objective": 0.12949387729167938, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.518750011920929, + "regularize": 0.06037859991192818, + "step": 360 + }, + { + "dpo_loss": 0.6866102814674377, + "epoch": 0.6896551724137931, + "grad_norm": 88.6752156599502, + "learning_rate": 3.383742911153119e-07, + "logits": -1.3452448844909668, + "logps": -93.36683654785156, + "loss": 0.1229, + "objective": 0.12604525685310364, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.05738421529531479, + "step": 365 + }, + { + "dpo_loss": 0.6896581053733826, + "epoch": 0.6991025035427492, + "grad_norm": 91.19850585461762, + "learning_rate": 3.431001890359168e-07, + "logits": -1.3197768926620483, + "logps": -91.77013397216797, + "loss": 0.1276, + "objective": 0.11843843758106232, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.0494726225733757, + "step": 370 + }, + { + "dpo_loss": 0.691429615020752, + "epoch": 0.7085498346717053, + "grad_norm": 113.5680710791941, + "learning_rate": 3.478260869565217e-07, + "logits": -1.2648178339004517, + "logps": -91.7554702758789, + "loss": 0.1275, + "objective": 0.12085701525211334, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.05171404033899307, + "step": 375 + }, + { + "dpo_loss": 0.6935691833496094, + "epoch": 0.7179971658006613, + "grad_norm": 108.42021457365978, + "learning_rate": 3.525519848771266e-07, + "logits": -1.4277188777923584, + "logps": -91.43479919433594, + "loss": 0.1292, + "objective": 0.12868951261043549, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.05933259800076485, + "step": 380 + }, + { + "dpo_loss": 0.6908777952194214, + "epoch": 0.7274444969296174, + "grad_norm": 112.83132769591303, + "learning_rate": 3.572778827977316e-07, + "logits": -1.428979516029358, + "logps": -94.34440612792969, + "loss": 0.1285, + "objective": 0.126731738448143, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.05764396861195564, + "step": 385 + }, + { + "dpo_loss": 0.6864285469055176, + "epoch": 0.7368918280585735, + "grad_norm": 86.03614856711587, + "learning_rate": 3.620037807183365e-07, + "logits": -1.3407856225967407, + "logps": -92.52519226074219, + "loss": 0.1402, + "objective": 0.14342442154884338, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.4437499940395355, + "regularize": 0.07478158175945282, + "step": 390 + }, + { + "dpo_loss": 0.6894232034683228, + "epoch": 0.7463391591875296, + "grad_norm": 90.02582661821339, + "learning_rate": 3.667296786389414e-07, + "logits": -1.450365424156189, + "logps": -94.80558776855469, + "loss": 0.1342, + "objective": 0.13283126056194305, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.06388893723487854, + "step": 395 + }, + { + "dpo_loss": 0.6896533370018005, + "epoch": 0.7557864903164856, + "grad_norm": 111.61106880900202, + "learning_rate": 3.7145557655954627e-07, + "logits": -1.3269095420837402, + "logps": -92.5389633178711, + "loss": 0.1414, + "objective": 0.13954514265060425, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.07057979702949524, + "step": 400 + }, + { + "epoch": 0.7557864903164856, + "eval_dpo_loss": 0.6920130848884583, + "eval_logits": -1.3138436079025269, + "eval_logps": -98.69994354248047, + "eval_loss": 0.1019834652543068, + "eval_objective": 0.10271409153938293, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5248447060585022, + "eval_regularize": 0.033512771129608154, + "eval_runtime": 158.4351, + "eval_samples_per_second": 36.545, + "eval_steps_per_second": 3.049, + "step": 400 + }, + { + "dpo_loss": 0.6916826367378235, + "epoch": 0.7652338214454416, + "grad_norm": 103.02854860900352, + "learning_rate": 3.761814744801512e-07, + "logits": -1.463388204574585, + "logps": -93.37408447265625, + "loss": 0.1505, + "objective": 0.1511785387992859, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.45625001192092896, + "regularize": 0.08201026916503906, + "step": 405 + }, + { + "dpo_loss": 0.6840475797653198, + "epoch": 0.7746811525743977, + "grad_norm": 100.25779449592062, + "learning_rate": 3.809073724007561e-07, + "logits": -1.330974817276001, + "logps": -91.45512390136719, + "loss": 0.1453, + "objective": 0.1453799307346344, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.07697516679763794, + "step": 410 + }, + { + "dpo_loss": 0.6831516027450562, + "epoch": 0.7841284837033538, + "grad_norm": 114.27467092070809, + "learning_rate": 3.8563327032136107e-07, + "logits": -1.4295470714569092, + "logps": -94.0125961303711, + "loss": 0.165, + "objective": 0.15841113030910492, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5062500238418579, + "regularize": 0.09009597450494766, + "step": 415 + }, + { + "dpo_loss": 0.6889381408691406, + "epoch": 0.7935758148323099, + "grad_norm": 99.17563424148113, + "learning_rate": 3.90359168241966e-07, + "logits": -1.3562965393066406, + "logps": -91.68006896972656, + "loss": 0.1444, + "objective": 0.14152219891548157, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.07262839376926422, + "step": 420 + }, + { + "dpo_loss": 0.6892581582069397, + "epoch": 0.8030231459612659, + "grad_norm": 100.05408578170393, + "learning_rate": 3.9508506616257084e-07, + "logits": -1.3055548667907715, + "logps": -91.72136688232422, + "loss": 0.1538, + "objective": 0.14655420184135437, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.40625, + "ranking_simple": 0.4000000059604645, + "regularize": 0.07762838155031204, + "step": 425 + }, + { + "dpo_loss": 0.6880292892456055, + "epoch": 0.812470477090222, + "grad_norm": 97.66357035709157, + "learning_rate": 3.9981096408317576e-07, + "logits": -1.3861249685287476, + "logps": -95.13639831542969, + "loss": 0.1585, + "objective": 0.14511282742023468, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.07630989700555801, + "step": 430 + }, + { + "dpo_loss": 0.6850839257240295, + "epoch": 0.821917808219178, + "grad_norm": 116.23193822053362, + "learning_rate": 4.0453686200378067e-07, + "logits": -1.4044657945632935, + "logps": -91.6175765991211, + "loss": 0.1652, + "objective": 0.1648859977722168, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.09637759625911713, + "step": 435 + }, + { + "dpo_loss": 0.6872573494911194, + "epoch": 0.8313651393481342, + "grad_norm": 101.47384172618189, + "learning_rate": 4.0926275992438564e-07, + "logits": -1.3521515130996704, + "logps": -92.96221923828125, + "loss": 0.1529, + "objective": 0.1533818542957306, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 0.08465610444545746, + "step": 440 + }, + { + "dpo_loss": 0.6908755898475647, + "epoch": 0.8408124704770902, + "grad_norm": 91.82268216912694, + "learning_rate": 4.1398865784499055e-07, + "logits": -1.3667224645614624, + "logps": -90.73509979248047, + "loss": 0.1527, + "objective": 0.14873163402080536, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.0796440839767456, + "step": 445 + }, + { + "dpo_loss": 0.7019797563552856, + "epoch": 0.8502598016060463, + "grad_norm": 100.61548180012701, + "learning_rate": 4.1871455576559547e-07, + "logits": -1.3356683254241943, + "logps": -92.12760162353516, + "loss": 0.156, + "objective": 0.1631532460451126, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.09295526146888733, + "step": 450 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 0.6917065382003784, + "eval_logits": -1.3000538349151611, + "eval_logps": -98.69611358642578, + "eval_loss": 0.11016573011875153, + "eval_objective": 0.11068187654018402, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.522774338722229, + "eval_regularize": 0.041511211544275284, + "eval_runtime": 158.2413, + "eval_samples_per_second": 36.59, + "eval_steps_per_second": 3.052, + "step": 450 + }, + { + "dpo_loss": 0.6912184357643127, + "epoch": 0.8597071327350023, + "grad_norm": 103.74989373255174, + "learning_rate": 4.2344045368620033e-07, + "logits": -1.311997413635254, + "logps": -91.93920135498047, + "loss": 0.1648, + "objective": 0.16276951134204865, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.09364765137434006, + "step": 455 + }, + { + "dpo_loss": 0.6932337880134583, + "epoch": 0.8691544638639584, + "grad_norm": 99.03071397788196, + "learning_rate": 4.2816635160680524e-07, + "logits": -1.3874913454055786, + "logps": -92.34394836425781, + "loss": 0.1556, + "objective": 0.15355022251605988, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.08422684669494629, + "step": 460 + }, + { + "dpo_loss": 0.6835826635360718, + "epoch": 0.8786017949929145, + "grad_norm": 84.6790640072118, + "learning_rate": 4.328922495274102e-07, + "logits": -1.3000524044036865, + "logps": -91.39022064208984, + "loss": 0.154, + "objective": 0.15162508189678192, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.08326681703329086, + "step": 465 + }, + { + "dpo_loss": 0.6978387832641602, + "epoch": 0.8880491261218706, + "grad_norm": 107.18167519452933, + "learning_rate": 4.3761814744801513e-07, + "logits": -1.3547214269638062, + "logps": -90.24058532714844, + "loss": 0.1673, + "objective": 0.156182661652565, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.08639879524707794, + "step": 470 + }, + { + "dpo_loss": 0.68897944688797, + "epoch": 0.8974964572508266, + "grad_norm": 94.54535920638504, + "learning_rate": 4.4234404536862004e-07, + "logits": -1.3638577461242676, + "logps": -91.9796371459961, + "loss": 0.1689, + "objective": 0.16064883768558502, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48124998807907104, + "regularize": 0.09175089001655579, + "step": 475 + }, + { + "dpo_loss": 0.690127968788147, + "epoch": 0.9069437883797827, + "grad_norm": 96.98421932494537, + "learning_rate": 4.470699432892249e-07, + "logits": -1.296614646911621, + "logps": -93.49898529052734, + "loss": 0.1654, + "objective": 0.16247372329235077, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5874999761581421, + "regularize": 0.09346093982458115, + "step": 480 + }, + { + "dpo_loss": 0.6874816417694092, + "epoch": 0.9163911195087387, + "grad_norm": 109.5139852284968, + "learning_rate": 4.517958412098298e-07, + "logits": -1.393359661102295, + "logps": -95.6229019165039, + "loss": 0.1731, + "objective": 0.16404297947883606, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.09529478847980499, + "step": 485 + }, + { + "dpo_loss": 0.6885187029838562, + "epoch": 0.9258384506376949, + "grad_norm": 112.83140218708049, + "learning_rate": 4.5652173913043473e-07, + "logits": -1.2763055562973022, + "logps": -93.39484405517578, + "loss": 0.1892, + "objective": 0.18885581195354462, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48750001192092896, + "regularize": 0.12000392377376556, + "step": 490 + }, + { + "dpo_loss": 0.7008720636367798, + "epoch": 0.9352857817666509, + "grad_norm": 105.14659658145943, + "learning_rate": 4.612476370510397e-07, + "logits": -1.2833281755447388, + "logps": -92.49003601074219, + "loss": 0.1824, + "objective": 0.1885298490524292, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5562499761581421, + "regularize": 0.11844261735677719, + "step": 495 + }, + { + "dpo_loss": 0.7026808261871338, + "epoch": 0.944733112895607, + "grad_norm": 104.20649038619608, + "learning_rate": 4.659735349716446e-07, + "logits": -1.2983553409576416, + "logps": -93.92900085449219, + "loss": 0.1843, + "objective": 0.1905163675546646, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.53125, + "regularize": 0.12024825811386108, + "step": 500 + }, + { + "epoch": 0.944733112895607, + "eval_dpo_loss": 0.6934216022491455, + "eval_logits": -1.2985299825668335, + "eval_logps": -98.3685073852539, + "eval_loss": 0.14251917600631714, + "eval_objective": 0.14164087176322937, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.07229872047901154, + "eval_runtime": 158.351, + "eval_samples_per_second": 36.564, + "eval_steps_per_second": 3.05, + "step": 500 + }, + { + "dpo_loss": 0.6825562715530396, + "epoch": 0.954180444024563, + "grad_norm": 87.49461844052641, + "learning_rate": 4.7069943289224953e-07, + "logits": -1.3763777017593384, + "logps": -92.05134582519531, + "loss": 0.1862, + "objective": 0.1869305521249771, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.11867489665746689, + "step": 505 + }, + { + "dpo_loss": 0.7000191807746887, + "epoch": 0.9636277751535192, + "grad_norm": 99.68210245118057, + "learning_rate": 4.754253308128544e-07, + "logits": -1.3264052867889404, + "logps": -93.01531982421875, + "loss": 0.1855, + "objective": 0.18150749802589417, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.550000011920929, + "regularize": 0.11150559037923813, + "step": 510 + }, + { + "dpo_loss": 0.6989023685455322, + "epoch": 0.9730751062824752, + "grad_norm": 100.37728881230282, + "learning_rate": 4.801512287334594e-07, + "logits": -1.3409388065338135, + "logps": -92.48466491699219, + "loss": 0.1815, + "objective": 0.17680883407592773, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5562499761581421, + "regularize": 0.1069185882806778, + "step": 515 + }, + { + "dpo_loss": 0.6951934695243835, + "epoch": 0.9825224374114313, + "grad_norm": 109.03096603881598, + "learning_rate": 4.848771266540643e-07, + "logits": -1.4067871570587158, + "logps": -93.4749984741211, + "loss": 0.1818, + "objective": 0.18318508565425873, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.581250011920929, + "regularize": 0.11366574466228485, + "step": 520 + }, + { + "dpo_loss": 0.6849699020385742, + "epoch": 0.9919697685403873, + "grad_norm": 88.97267098704813, + "learning_rate": 4.896030245746692e-07, + "logits": -1.2617175579071045, + "logps": -93.6603775024414, + "loss": 0.1789, + "objective": 0.17417797446250916, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.10568100214004517, + "step": 525 + }, + { + "dpo_loss": 0.6823090314865112, + "epoch": 1.0014170996693434, + "grad_norm": 89.5976454923623, + "learning_rate": 4.943289224952741e-07, + "logits": -1.4235401153564453, + "logps": -92.53488159179688, + "loss": 0.1967, + "objective": 0.2006862610578537, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.53125, + "regularize": 0.13245537877082825, + "step": 530 + }, + { + "dpo_loss": 0.6965185403823853, + "epoch": 1.0108644307982995, + "grad_norm": 96.30445573525516, + "learning_rate": 4.99054820415879e-07, + "logits": -1.4111480712890625, + "logps": -93.36735534667969, + "loss": 0.1921, + "objective": 0.19494469463825226, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1252928525209427, + "step": 535 + }, + { + "dpo_loss": 0.6989037394523621, + "epoch": 1.0203117619272555, + "grad_norm": 97.20685088312915, + "learning_rate": 4.999991291705134e-07, + "logits": -1.384477138519287, + "logps": -93.18318176269531, + "loss": 0.2125, + "objective": 0.2088538110256195, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5062500238418579, + "regularize": 0.13896343111991882, + "step": 540 + }, + { + "dpo_loss": 0.6883914470672607, + "epoch": 1.0297590930562117, + "grad_norm": 90.69429715375348, + "learning_rate": 4.999955914361218e-07, + "logits": -1.380433201789856, + "logps": -93.4023208618164, + "loss": 0.2032, + "objective": 0.1992042511701584, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.13036508858203888, + "step": 545 + }, + { + "dpo_loss": 0.6875864267349243, + "epoch": 1.0392064241851677, + "grad_norm": 91.50992212902428, + "learning_rate": 4.999893324084622e-07, + "logits": -1.3838626146316528, + "logps": -91.31867980957031, + "loss": 0.1954, + "objective": 0.2028588354587555, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.13410018384456635, + "step": 550 + }, + { + "epoch": 1.0392064241851677, + "eval_dpo_loss": 0.6945993900299072, + "eval_logits": -1.3153595924377441, + "eval_logps": -98.2336196899414, + "eval_loss": 0.13877831399440765, + "eval_objective": 0.13831962645053864, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.522774338722229, + "eval_regularize": 0.06885971128940582, + "eval_runtime": 158.0034, + "eval_samples_per_second": 36.645, + "eval_steps_per_second": 3.057, + "step": 550 + }, + { + "dpo_loss": 0.6934968829154968, + "epoch": 1.0486537553141237, + "grad_norm": 100.42594403491987, + "learning_rate": 4.999803521556664e-07, + "logits": -1.3395583629608154, + "logps": -90.90914916992188, + "loss": 0.2073, + "objective": 0.1963643729686737, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.12701468169689178, + "step": 555 + }, + { + "dpo_loss": 0.6871433854103088, + "epoch": 1.0581010864430798, + "grad_norm": 103.31670555425323, + "learning_rate": 4.999686507754875e-07, + "logits": -1.367850661277771, + "logps": -91.52650451660156, + "loss": 0.2065, + "objective": 0.18664252758026123, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.48750001192092896, + "regularize": 0.11792820692062378, + "step": 560 + }, + { + "dpo_loss": 0.6820244789123535, + "epoch": 1.067548417572036, + "grad_norm": 109.55842755689235, + "learning_rate": 4.999542283952998e-07, + "logits": -1.3654028177261353, + "logps": -93.674560546875, + "loss": 0.2142, + "objective": 0.21235975623130798, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.1441573202610016, + "step": 565 + }, + { + "dpo_loss": 0.6821997761726379, + "epoch": 1.076995748700992, + "grad_norm": 87.6265285859963, + "learning_rate": 4.999370851720956e-07, + "logits": -1.356442928314209, + "logps": -92.73716735839844, + "loss": 0.2023, + "objective": 0.2152920961380005, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.581250011920929, + "regularize": 0.14707210659980774, + "step": 570 + }, + { + "dpo_loss": 0.6839498281478882, + "epoch": 1.086443079829948, + "grad_norm": 94.30475178073908, + "learning_rate": 4.999172212924856e-07, + "logits": -1.3030678033828735, + "logps": -92.93643951416016, + "loss": 0.2005, + "objective": 0.1996048092842102, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.1312098205089569, + "step": 575 + }, + { + "dpo_loss": 0.6901076436042786, + "epoch": 1.095890410958904, + "grad_norm": 94.21853528392039, + "learning_rate": 4.99894636972695e-07, + "logits": -1.310119390487671, + "logps": -92.42277526855469, + "loss": 0.2094, + "objective": 0.22037836909294128, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.6187499761581421, + "ranking_simple": 0.6187499761581421, + "regularize": 0.15136758983135223, + "step": 580 + }, + { + "dpo_loss": 0.6884266138076782, + "epoch": 1.10533774208786, + "grad_norm": 88.706538539552, + "learning_rate": 4.998693324585628e-07, + "logits": -1.304504632949829, + "logps": -93.57672119140625, + "loss": 0.2051, + "objective": 0.20268912613391876, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.5874999761581421, + "regularize": 0.1338464766740799, + "step": 585 + }, + { + "dpo_loss": 0.6907263398170471, + "epoch": 1.1147850732168163, + "grad_norm": 105.11352044037606, + "learning_rate": 4.998413080255375e-07, + "logits": -1.4110699892044067, + "logps": -92.94535827636719, + "loss": 0.2072, + "objective": 0.19032931327819824, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.12125667184591293, + "step": 590 + }, + { + "dpo_loss": 0.6772922277450562, + "epoch": 1.1242324043457723, + "grad_norm": 95.3174741961889, + "learning_rate": 4.998105639786754e-07, + "logits": -1.3170404434204102, + "logps": -91.73414611816406, + "loss": 0.2182, + "objective": 0.19899888336658478, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.1312696635723114, + "step": 595 + }, + { + "dpo_loss": 0.6846722364425659, + "epoch": 1.1336797354747283, + "grad_norm": 107.8425091882789, + "learning_rate": 4.997771006526367e-07, + "logits": -1.3956836462020874, + "logps": -93.02611541748047, + "loss": 0.2073, + "objective": 0.20752784609794617, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.13906064629554749, + "step": 600 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 0.693564236164093, + "eval_logits": -1.307108998298645, + "eval_logps": -98.72145080566406, + "eval_loss": 0.1374109834432602, + "eval_objective": 0.13693860173225403, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.522774338722229, + "eval_regularize": 0.06758218258619308, + "eval_runtime": 158.0564, + "eval_samples_per_second": 36.632, + "eval_steps_per_second": 3.056, + "step": 600 + }, + { + "dpo_loss": 0.6941686272621155, + "epoch": 1.1431270666036846, + "grad_norm": 84.49370124503615, + "learning_rate": 4.997409184116819e-07, + "logits": -1.3777289390563965, + "logps": -93.79177856445312, + "loss": 0.2128, + "objective": 0.2253679782152176, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.1559511125087738, + "step": 605 + }, + { + "dpo_loss": 0.6771566271781921, + "epoch": 1.1525743977326406, + "grad_norm": 88.09246493054597, + "learning_rate": 4.997020176496679e-07, + "logits": -1.3691322803497314, + "logps": -93.24571990966797, + "loss": 0.2237, + "objective": 0.21853280067443848, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.5, + "regularize": 0.1508171707391739, + "step": 610 + }, + { + "dpo_loss": 0.6988297700881958, + "epoch": 1.1620217288615966, + "grad_norm": 96.71294215621295, + "learning_rate": 4.996603987900437e-07, + "logits": -1.4297274351119995, + "logps": -91.73030853271484, + "loss": 0.2175, + "objective": 0.2077571451663971, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5249999761581421, + "regularize": 0.13787415623664856, + "step": 615 + }, + { + "dpo_loss": 0.6964720487594604, + "epoch": 1.1714690599905526, + "grad_norm": 96.89701629175714, + "learning_rate": 4.996160622858458e-07, + "logits": -1.3424708843231201, + "logps": -90.92454528808594, + "loss": 0.2191, + "objective": 0.21006684005260468, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.574999988079071, + "regularize": 0.1404196172952652, + "step": 620 + }, + { + "dpo_loss": 0.7060242295265198, + "epoch": 1.1809163911195086, + "grad_norm": 85.87331082123467, + "learning_rate": 4.995690086196932e-07, + "logits": -1.4066816568374634, + "logps": -93.66090393066406, + "loss": 0.2397, + "objective": 0.24251866340637207, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.17191624641418457, + "step": 625 + }, + { + "dpo_loss": 0.6906918287277222, + "epoch": 1.1903637222484649, + "grad_norm": 112.18287918083412, + "learning_rate": 4.995192383037823e-07, + "logits": -1.3011844158172607, + "logps": -90.5967025756836, + "loss": 0.2062, + "objective": 0.21339142322540283, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.512499988079071, + "regularize": 0.1443222463130951, + "step": 630 + }, + { + "dpo_loss": 0.7012017965316772, + "epoch": 1.1998110533774209, + "grad_norm": 91.04006298092757, + "learning_rate": 4.994667518798809e-07, + "logits": -1.3534437417984009, + "logps": -90.98788452148438, + "loss": 0.2146, + "objective": 0.21284246444702148, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.14272227883338928, + "step": 635 + }, + { + "dpo_loss": 0.6859739422798157, + "epoch": 1.209258384506377, + "grad_norm": 95.64760986526014, + "learning_rate": 4.994115499193233e-07, + "logits": -1.3624138832092285, + "logps": -90.53767395019531, + "loss": 0.2284, + "objective": 0.23482659459114075, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.16622920334339142, + "step": 640 + }, + { + "dpo_loss": 0.6897287368774414, + "epoch": 1.2187057156353331, + "grad_norm": 85.2353798998164, + "learning_rate": 4.993536330230027e-07, + "logits": -1.265354871749878, + "logps": -91.70381164550781, + "loss": 0.2109, + "objective": 0.2076033651828766, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.45625001192092896, + "regularize": 0.13863049447536469, + "step": 645 + }, + { + "dpo_loss": 0.6822940707206726, + "epoch": 1.2281530467642892, + "grad_norm": 94.6596625093207, + "learning_rate": 4.992930018213657e-07, + "logits": -1.286306619644165, + "logps": -92.8187484741211, + "loss": 0.2165, + "objective": 0.22189109027385712, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5625, + "regularize": 0.15366169810295105, + "step": 650 + }, + { + "epoch": 1.2281530467642892, + "eval_dpo_loss": 0.6915929317474365, + "eval_logits": -1.2926098108291626, + "eval_logps": -97.9261474609375, + "eval_loss": 0.14777907729148865, + "eval_objective": 0.14874477684497833, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5232919454574585, + "eval_regularize": 0.0795854777097702, + "eval_runtime": 157.6426, + "eval_samples_per_second": 36.729, + "eval_steps_per_second": 3.064, + "step": 650 + }, + { + "dpo_loss": 0.687442421913147, + "epoch": 1.2376003778932452, + "grad_norm": 104.63327142074685, + "learning_rate": 4.992296569744051e-07, + "logits": -1.2754095792770386, + "logps": -91.40235900878906, + "loss": 0.2313, + "objective": 0.24213269352912903, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.606249988079071, + "ranking_simple": 0.59375, + "regularize": 0.17338842153549194, + "step": 655 + }, + { + "dpo_loss": 0.6873584389686584, + "epoch": 1.2470477090222012, + "grad_norm": 92.75152345942683, + "learning_rate": 4.991635991716527e-07, + "logits": -1.2639495134353638, + "logps": -90.1284408569336, + "loss": 0.2221, + "objective": 0.2178734540939331, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 0.14913764595985413, + "step": 660 + }, + { + "dpo_loss": 0.6985175609588623, + "epoch": 1.2564950401511572, + "grad_norm": 88.00328633474199, + "learning_rate": 4.990948291321719e-07, + "logits": -1.385095477104187, + "logps": -92.58069610595703, + "loss": 0.2272, + "objective": 0.23054857552051544, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5625, + "regularize": 0.1606968343257904, + "step": 665 + }, + { + "dpo_loss": 0.6855772137641907, + "epoch": 1.2659423712801134, + "grad_norm": 85.58324526077789, + "learning_rate": 4.990233476045493e-07, + "logits": -1.2797305583953857, + "logps": -89.69403839111328, + "loss": 0.2226, + "objective": 0.21690480411052704, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4749999940395355, + "regularize": 0.14834709465503693, + "step": 670 + }, + { + "dpo_loss": 0.6901682615280151, + "epoch": 1.2753897024090695, + "grad_norm": 89.94100156988536, + "learning_rate": 4.989491553668878e-07, + "logits": -1.4622950553894043, + "logps": -92.04087829589844, + "loss": 0.2133, + "objective": 0.21802285313606262, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.14900599420070648, + "step": 675 + }, + { + "dpo_loss": 0.684799313545227, + "epoch": 1.2848370335380255, + "grad_norm": 85.69424074902147, + "learning_rate": 4.988722532267968e-07, + "logits": -1.2466356754302979, + "logps": -90.41310119628906, + "loss": 0.2247, + "objective": 0.22920887172222137, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.44999998807907104, + "regularize": 0.16072896122932434, + "step": 680 + }, + { + "dpo_loss": 0.6969146132469177, + "epoch": 1.2942843646669817, + "grad_norm": 83.29941926709483, + "learning_rate": 4.987926420213843e-07, + "logits": -1.2692553997039795, + "logps": -90.69180297851562, + "loss": 0.2207, + "objective": 0.2164468765258789, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.1467554122209549, + "step": 685 + }, + { + "dpo_loss": 0.7128168940544128, + "epoch": 1.3037316957959377, + "grad_norm": 89.95838972455667, + "learning_rate": 4.987103226172473e-07, + "logits": -1.3931699991226196, + "logps": -91.45069885253906, + "loss": 0.2298, + "objective": 0.2287740260362625, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.15749232470989227, + "step": 690 + }, + { + "dpo_loss": 0.6920118927955627, + "epoch": 1.3131790269248937, + "grad_norm": 92.07098509412019, + "learning_rate": 4.986252959104624e-07, + "logits": -1.3434112071990967, + "logps": -89.19305419921875, + "loss": 0.2318, + "objective": 0.2392226755619049, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.17002148926258087, + "step": 695 + }, + { + "dpo_loss": 0.6873451471328735, + "epoch": 1.3226263580538498, + "grad_norm": 79.45369435451617, + "learning_rate": 4.985375628265765e-07, + "logits": -1.3786038160324097, + "logps": -90.81690979003906, + "loss": 0.2333, + "objective": 0.2435322254896164, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.45625001192092896, + "regularize": 0.17479772865772247, + "step": 700 + }, + { + "epoch": 1.3226263580538498, + "eval_dpo_loss": 0.691523015499115, + "eval_logits": -1.292966365814209, + "eval_logps": -97.1070785522461, + "eval_loss": 0.14701269567012787, + "eval_objective": 0.14496096968650818, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.07580868154764175, + "eval_runtime": 159.9237, + "eval_samples_per_second": 36.205, + "eval_steps_per_second": 3.02, + "step": 700 + }, + { + "dpo_loss": 0.6949089765548706, + "epoch": 1.3320736891828058, + "grad_norm": 84.44151860295028, + "learning_rate": 4.984471243205964e-07, + "logits": -1.3508937358856201, + "logps": -87.85145568847656, + "loss": 0.2232, + "objective": 0.19557249546051025, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.12608161568641663, + "step": 705 + }, + { + "dpo_loss": 0.6862300038337708, + "epoch": 1.3415210203117618, + "grad_norm": 80.57908902050966, + "learning_rate": 4.983539813769778e-07, + "logits": -1.283381700515747, + "logps": -90.93116760253906, + "loss": 0.2168, + "objective": 0.2195473164319992, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.15092433989048004, + "step": 710 + }, + { + "dpo_loss": 0.7053779363632202, + "epoch": 1.350968351440718, + "grad_norm": 89.9139558928414, + "learning_rate": 4.98258135009616e-07, + "logits": -1.34377121925354, + "logps": -92.65654754638672, + "loss": 0.2232, + "objective": 0.22837159037590027, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.15783381462097168, + "step": 715 + }, + { + "dpo_loss": 0.7014961838722229, + "epoch": 1.360415682569674, + "grad_norm": 90.6743435807847, + "learning_rate": 4.981595862618335e-07, + "logits": -1.3795337677001953, + "logps": -89.71587371826172, + "loss": 0.2222, + "objective": 0.20193001627922058, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.13178040087223053, + "step": 720 + }, + { + "dpo_loss": 0.6928237080574036, + "epoch": 1.36986301369863, + "grad_norm": 84.96138592580893, + "learning_rate": 4.980583362063696e-07, + "logits": -1.3667652606964111, + "logps": -88.43769073486328, + "loss": 0.2381, + "objective": 0.22285565733909607, + "ranking_idealized": 0.44999998807907104, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.15357330441474915, + "step": 725 + }, + { + "dpo_loss": 0.6958102583885193, + "epoch": 1.3793103448275863, + "grad_norm": 81.63702429187545, + "learning_rate": 4.97954385945368e-07, + "logits": -1.3288919925689697, + "logps": -93.19049072265625, + "loss": 0.2209, + "objective": 0.2079770565032959, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.1383960247039795, + "step": 730 + }, + { + "dpo_loss": 0.691160261631012, + "epoch": 1.3887576759565423, + "grad_norm": 88.03221762034345, + "learning_rate": 4.978477366103651e-07, + "logits": -1.3048386573791504, + "logps": -94.25810241699219, + "loss": 0.2502, + "objective": 0.25141897797584534, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.18230298161506653, + "step": 735 + }, + { + "dpo_loss": 0.6944637298583984, + "epoch": 1.3982050070854983, + "grad_norm": 87.28974817363158, + "learning_rate": 4.977383893622782e-07, + "logits": -1.3183424472808838, + "logps": -94.75113677978516, + "loss": 0.2423, + "objective": 0.26133081316947937, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4749999940395355, + "regularize": 0.19188445806503296, + "step": 740 + }, + { + "dpo_loss": 0.6745160818099976, + "epoch": 1.4076523382144543, + "grad_norm": 96.76417312133702, + "learning_rate": 4.976263453913917e-07, + "logits": -1.2408394813537598, + "logps": -93.50255584716797, + "loss": 0.2262, + "objective": 0.23426973819732666, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.45625001192092896, + "regularize": 0.16681811213493347, + "step": 745 + }, + { + "dpo_loss": 0.679398238658905, + "epoch": 1.4170996693434104, + "grad_norm": 83.00130223599811, + "learning_rate": 4.975116059173451e-07, + "logits": -1.2838294506072998, + "logps": -92.10679626464844, + "loss": 0.229, + "objective": 0.23130765557289124, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16336780786514282, + "step": 750 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 0.6928658485412598, + "eval_logits": -1.2689110040664673, + "eval_logps": -97.09227752685547, + "eval_loss": 0.17181387543678284, + "eval_objective": 0.17250551283359528, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.1032189130783081, + "eval_runtime": 157.4279, + "eval_samples_per_second": 36.779, + "eval_steps_per_second": 3.068, + "step": 750 + }, + { + "dpo_loss": 0.6830964684486389, + "epoch": 1.4265470004723666, + "grad_norm": 95.57749503310629, + "learning_rate": 4.973941721891196e-07, + "logits": -1.342911958694458, + "logps": -92.39205169677734, + "loss": 0.2388, + "objective": 0.2358742505311966, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5625, + "regularize": 0.16756460070610046, + "step": 755 + }, + { + "dpo_loss": 0.6861435174942017, + "epoch": 1.4359943316013226, + "grad_norm": 94.3425647575578, + "learning_rate": 4.972740454850243e-07, + "logits": -1.2989110946655273, + "logps": -91.51603698730469, + "loss": 0.2407, + "objective": 0.24070945382118225, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.46875, + "regularize": 0.17209510505199432, + "step": 760 + }, + { + "dpo_loss": 0.7200834155082703, + "epoch": 1.4454416627302786, + "grad_norm": 83.8026116090842, + "learning_rate": 4.971512271126819e-07, + "logits": -1.3317606449127197, + "logps": -92.68598937988281, + "loss": 0.252, + "objective": 0.24466517567634583, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.1726568043231964, + "step": 765 + }, + { + "dpo_loss": 0.6894422769546509, + "epoch": 1.4548889938592349, + "grad_norm": 88.18228914725576, + "learning_rate": 4.970257184090156e-07, + "logits": -1.3856732845306396, + "logps": -94.49053192138672, + "loss": 0.2368, + "objective": 0.2500324249267578, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.46875, + "regularize": 0.18108820915222168, + "step": 770 + }, + { + "dpo_loss": 0.6978266835212708, + "epoch": 1.4643363249881909, + "grad_norm": 84.42047599647562, + "learning_rate": 4.968975207402331e-07, + "logits": -1.299525260925293, + "logps": -90.47492218017578, + "loss": 0.2412, + "objective": 0.2401023656129837, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.48750001192092896, + "regularize": 0.17031969130039215, + "step": 775 + }, + { + "dpo_loss": 0.6908974051475525, + "epoch": 1.473783656117147, + "grad_norm": 93.16672039442666, + "learning_rate": 4.96766635501813e-07, + "logits": -1.2777698040008545, + "logps": -90.28379821777344, + "loss": 0.2397, + "objective": 0.23492303490638733, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.1658332794904709, + "step": 780 + }, + { + "dpo_loss": 0.7020236849784851, + "epoch": 1.483230987246103, + "grad_norm": 96.38647725910744, + "learning_rate": 4.966330641184889e-07, + "logits": -1.2058895826339722, + "logps": -89.30281829833984, + "loss": 0.2408, + "objective": 0.2349843531847, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.16478195786476135, + "step": 785 + }, + { + "dpo_loss": 0.6749367117881775, + "epoch": 1.492678318375059, + "grad_norm": 80.80313448250223, + "learning_rate": 4.964968080442341e-07, + "logits": -1.3213014602661133, + "logps": -89.11174011230469, + "loss": 0.237, + "objective": 0.23509947955608368, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48124998807907104, + "regularize": 0.16760580241680145, + "step": 790 + }, + { + "dpo_loss": 0.6985687613487244, + "epoch": 1.5021256495040152, + "grad_norm": 87.52434485748383, + "learning_rate": 4.963578687622455e-07, + "logits": -1.3071268796920776, + "logps": -92.3751449584961, + "loss": 0.2348, + "objective": 0.23419027030467987, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.16433341801166534, + "step": 795 + }, + { + "dpo_loss": 0.6958016157150269, + "epoch": 1.5115729806329712, + "grad_norm": 87.68870839452211, + "learning_rate": 4.962162477849281e-07, + "logits": -1.3120272159576416, + "logps": -90.5923843383789, + "loss": 0.2565, + "objective": 0.2664094567298889, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.19682928919792175, + "step": 800 + }, + { + "epoch": 1.5115729806329712, + "eval_dpo_loss": 0.6944370865821838, + "eval_logits": -1.2540355920791626, + "eval_logps": -97.26213073730469, + "eval_loss": 0.18168330192565918, + "eval_objective": 0.1830022782087326, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.1135585755109787, + "eval_runtime": 158.4981, + "eval_samples_per_second": 36.53, + "eval_steps_per_second": 3.047, + "step": 800 + }, + { + "dpo_loss": 0.7026247978210449, + "epoch": 1.5210203117619272, + "grad_norm": 81.65317643314856, + "learning_rate": 4.96071946653878e-07, + "logits": -1.3086163997650146, + "logps": -90.99162292480469, + "loss": 0.238, + "objective": 0.2365531474351883, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.16629067063331604, + "step": 805 + }, + { + "dpo_loss": 0.6891599893569946, + "epoch": 1.5304676428908834, + "grad_norm": 82.12999785328469, + "learning_rate": 4.959249669398655e-07, + "logits": -1.2432034015655518, + "logps": -91.92169189453125, + "loss": 0.2353, + "objective": 0.2502974569797516, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1813814640045166, + "step": 810 + }, + { + "dpo_loss": 0.6978787779808044, + "epoch": 1.5399149740198395, + "grad_norm": 86.46569624354272, + "learning_rate": 4.957753102428184e-07, + "logits": -1.3116600513458252, + "logps": -91.28858947753906, + "loss": 0.2343, + "objective": 0.23753111064434052, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.6000000238418579, + "ranking_simple": 0.59375, + "regularize": 0.1677432358264923, + "step": 815 + }, + { + "dpo_loss": 0.6800965070724487, + "epoch": 1.5493623051487955, + "grad_norm": 86.12978638347721, + "learning_rate": 4.956229781918047e-07, + "logits": -1.3178225755691528, + "logps": -92.11973571777344, + "loss": 0.23, + "objective": 0.23273181915283203, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.16472215950489044, + "step": 820 + }, + { + "dpo_loss": 0.6855944395065308, + "epoch": 1.5588096362777515, + "grad_norm": 91.07443851138821, + "learning_rate": 4.954679724450142e-07, + "logits": -1.3200221061706543, + "logps": -91.27191162109375, + "loss": 0.2389, + "objective": 0.23568864166736603, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.16712923347949982, + "step": 825 + }, + { + "dpo_loss": 0.6887711882591248, + "epoch": 1.5682569674067075, + "grad_norm": 79.19312631115046, + "learning_rate": 4.953102946897411e-07, + "logits": -1.2187261581420898, + "logps": -90.7856216430664, + "loss": 0.2384, + "objective": 0.24422509968280792, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48750001192092896, + "regularize": 0.1753479689359665, + "step": 830 + }, + { + "dpo_loss": 0.6980452537536621, + "epoch": 1.5777042985356635, + "grad_norm": 94.31876516475398, + "learning_rate": 4.951499466423653e-07, + "logits": -1.2923799753189087, + "logps": -92.07442474365234, + "loss": 0.239, + "objective": 0.24555882811546326, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.17575430870056152, + "step": 835 + }, + { + "dpo_loss": 0.7045887112617493, + "epoch": 1.5871516296646198, + "grad_norm": 81.47411045744263, + "learning_rate": 4.949869300483338e-07, + "logits": -1.2757551670074463, + "logps": -90.0054702758789, + "loss": 0.2412, + "objective": 0.24159309267997742, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.17113421857357025, + "step": 840 + }, + { + "dpo_loss": 0.6911923885345459, + "epoch": 1.5965989607935758, + "grad_norm": 80.29449667582183, + "learning_rate": 4.948212466821419e-07, + "logits": -1.2297747135162354, + "logps": -91.9012680053711, + "loss": 0.2365, + "objective": 0.2445167601108551, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4312500059604645, + "ranking_simple": 0.44999998807907104, + "regularize": 0.17539751529693604, + "step": 845 + }, + { + "dpo_loss": 0.7042365074157715, + "epoch": 1.606046291922532, + "grad_norm": 86.21783815042541, + "learning_rate": 4.946528983473133e-07, + "logits": -1.2861218452453613, + "logps": -88.93782043457031, + "loss": 0.2479, + "objective": 0.24865977466106415, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.4312500059604645, + "ranking_simple": 0.4312500059604645, + "regularize": 0.17823612689971924, + "step": 850 + }, + { + "epoch": 1.606046291922532, + "eval_dpo_loss": 0.6945503950119019, + "eval_logits": -1.2707864046096802, + "eval_logps": -96.34225463867188, + "eval_loss": 0.18642590939998627, + "eval_objective": 0.18530656397342682, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.11585152894258499, + "eval_runtime": 159.8355, + "eval_samples_per_second": 36.225, + "eval_steps_per_second": 3.022, + "step": 850 + }, + { + "dpo_loss": 0.7014453411102295, + "epoch": 1.615493623051488, + "grad_norm": 94.62150643181256, + "learning_rate": 4.944818868763813e-07, + "logits": -1.446413278579712, + "logps": -92.63004302978516, + "loss": 0.2459, + "objective": 0.2486976683139801, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.581250011920929, + "regularize": 0.17855311930179596, + "step": 855 + }, + { + "dpo_loss": 0.7022618055343628, + "epoch": 1.624940954180444, + "grad_norm": 84.34142777503043, + "learning_rate": 4.943082141308679e-07, + "logits": -1.2727842330932617, + "logps": -88.67359161376953, + "loss": 0.2329, + "objective": 0.21688945591449738, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.14666327834129333, + "step": 860 + }, + { + "dpo_loss": 0.698417603969574, + "epoch": 1.6343882853094, + "grad_norm": 82.98354606550087, + "learning_rate": 4.941318820012645e-07, + "logits": -1.3707373142242432, + "logps": -89.36933898925781, + "loss": 0.2314, + "objective": 0.22971661388874054, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.159874826669693, + "step": 865 + }, + { + "dpo_loss": 0.6885509490966797, + "epoch": 1.643835616438356, + "grad_norm": 81.18325168892137, + "learning_rate": 4.939528924070107e-07, + "logits": -1.3568060398101807, + "logps": -90.24173736572266, + "loss": 0.2252, + "objective": 0.2341945618391037, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.16533946990966797, + "step": 870 + }, + { + "dpo_loss": 0.7120766639709473, + "epoch": 1.653282947567312, + "grad_norm": 89.83602487849788, + "learning_rate": 4.937712472964736e-07, + "logits": -1.3132705688476562, + "logps": -89.10626220703125, + "loss": 0.2492, + "objective": 0.2398935854434967, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48750001192092896, + "regularize": 0.1686859279870987, + "step": 875 + }, + { + "dpo_loss": 0.6932774186134338, + "epoch": 1.6627302786962683, + "grad_norm": 78.3424148808078, + "learning_rate": 4.935869486469266e-07, + "logits": -1.2213243246078491, + "logps": -90.08100891113281, + "loss": 0.2424, + "objective": 0.2528541684150696, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4312500059604645, + "regularize": 0.1835264414548874, + "step": 880 + }, + { + "dpo_loss": 0.7122358083724976, + "epoch": 1.6721776098252243, + "grad_norm": 81.83908311068858, + "learning_rate": 4.933999984645275e-07, + "logits": -1.3308547735214233, + "logps": -88.36769104003906, + "loss": 0.2362, + "objective": 0.23830123245716095, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.518750011920929, + "regularize": 0.16707763075828552, + "step": 885 + }, + { + "dpo_loss": 0.6975988745689392, + "epoch": 1.6816249409541806, + "grad_norm": 85.91803463556262, + "learning_rate": 4.932103987842975e-07, + "logits": -1.2521806955337524, + "logps": -88.85072326660156, + "loss": 0.2473, + "objective": 0.237627312541008, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16786742210388184, + "step": 890 + }, + { + "dpo_loss": 0.7044867277145386, + "epoch": 1.6910722720831366, + "grad_norm": 79.98253861185327, + "learning_rate": 4.930181516700982e-07, + "logits": -1.269334077835083, + "logps": -91.34991455078125, + "loss": 0.2509, + "objective": 0.2616916298866272, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.19124294817447662, + "step": 895 + }, + { + "dpo_loss": 0.6823757886886597, + "epoch": 1.7005196032120926, + "grad_norm": 81.00522927194797, + "learning_rate": 4.928232592146097e-07, + "logits": -1.3042701482772827, + "logps": -92.00179290771484, + "loss": 0.2586, + "objective": 0.26876914501190186, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.20053155720233917, + "step": 900 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 0.6944207549095154, + "eval_logits": -1.2622801065444946, + "eval_logps": -97.21571350097656, + "eval_loss": 0.18386565148830414, + "eval_objective": 0.18254661560058594, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.11310453712940216, + "eval_runtime": 157.6535, + "eval_samples_per_second": 36.726, + "eval_steps_per_second": 3.064, + "step": 900 + }, + { + "dpo_loss": 0.7131351828575134, + "epoch": 1.7099669343410486, + "grad_norm": 92.4488887165358, + "learning_rate": 4.926257235393077e-07, + "logits": -1.273389220237732, + "logps": -91.80742645263672, + "loss": 0.263, + "objective": 0.273505836725235, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.2021923065185547, + "step": 905 + }, + { + "dpo_loss": 0.6990777254104614, + "epoch": 1.7194142654700046, + "grad_norm": 89.78081376469598, + "learning_rate": 4.924255467944397e-07, + "logits": -1.21916925907135, + "logps": -93.84732818603516, + "loss": 0.2557, + "objective": 0.2540630102157593, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.550000011920929, + "regularize": 0.18415522575378418, + "step": 910 + }, + { + "dpo_loss": 0.7060036659240723, + "epoch": 1.7288615965989607, + "grad_norm": 84.34105627107965, + "learning_rate": 4.922227311590029e-07, + "logits": -1.328016996383667, + "logps": -89.12452697753906, + "loss": 0.251, + "objective": 0.24675559997558594, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5874999761581421, + "regularize": 0.17615528404712677, + "step": 915 + }, + { + "dpo_loss": 0.7035702466964722, + "epoch": 1.738308927727917, + "grad_norm": 101.23265714387522, + "learning_rate": 4.920172788407195e-07, + "logits": -1.3243465423583984, + "logps": -90.22145080566406, + "loss": 0.2595, + "objective": 0.2632547616958618, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.1928977072238922, + "step": 920 + }, + { + "dpo_loss": 0.704595685005188, + "epoch": 1.747756258856873, + "grad_norm": 86.12783856162052, + "learning_rate": 4.91809192076013e-07, + "logits": -1.3240232467651367, + "logps": -90.23241424560547, + "loss": 0.2428, + "objective": 0.23700585961341858, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1665463149547577, + "step": 925 + }, + { + "dpo_loss": 0.6912989020347595, + "epoch": 1.7572035899858292, + "grad_norm": 88.12461065279146, + "learning_rate": 4.915984731299838e-07, + "logits": -1.3094854354858398, + "logps": -87.85308837890625, + "loss": 0.2451, + "objective": 0.24931573867797852, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.18018585443496704, + "step": 930 + }, + { + "dpo_loss": 0.6917027831077576, + "epoch": 1.7666509211147852, + "grad_norm": 82.18255854656601, + "learning_rate": 4.913851242963846e-07, + "logits": -1.3129820823669434, + "logps": -89.41729736328125, + "loss": 0.2462, + "objective": 0.22966830432415009, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.518750011920929, + "regularize": 0.1604980230331421, + "step": 935 + }, + { + "dpo_loss": 0.6987249255180359, + "epoch": 1.7760982522437412, + "grad_norm": 84.73374743748703, + "learning_rate": 4.91169147897595e-07, + "logits": -1.2820937633514404, + "logps": -89.26692199707031, + "loss": 0.2313, + "objective": 0.2306392639875412, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.16076678037643433, + "step": 940 + }, + { + "dpo_loss": 0.6959508061408997, + "epoch": 1.7855455833726972, + "grad_norm": 83.20896646756478, + "learning_rate": 4.909505462845974e-07, + "logits": -1.3393566608428955, + "logps": -89.69905090332031, + "loss": 0.2387, + "objective": 0.23756399750709534, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.16796889901161194, + "step": 945 + }, + { + "dpo_loss": 0.6779564023017883, + "epoch": 1.7949929145016532, + "grad_norm": 84.28401023606136, + "learning_rate": 4.907293218369498e-07, + "logits": -1.368841290473938, + "logps": -88.40916442871094, + "loss": 0.2347, + "objective": 0.21868371963500977, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.1508881002664566, + "step": 950 + }, + { + "epoch": 1.7949929145016532, + "eval_dpo_loss": 0.6945188641548157, + "eval_logits": -1.2677661180496216, + "eval_logps": -94.84024810791016, + "eval_loss": 0.19953110814094543, + "eval_objective": 0.19893400371074677, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.12948210537433624, + "eval_runtime": 157.0713, + "eval_samples_per_second": 36.862, + "eval_steps_per_second": 3.075, + "step": 950 + }, + { + "dpo_loss": 0.6959952712059021, + "epoch": 1.8044402456306092, + "grad_norm": 82.19115094683073, + "learning_rate": 4.905054769627612e-07, + "logits": -1.3279144763946533, + "logps": -89.822021484375, + "loss": 0.2426, + "objective": 0.22453975677490234, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.53125, + "regularize": 0.15494023263454437, + "step": 955 + }, + { + "dpo_loss": 0.6977975964546204, + "epoch": 1.8138875767595655, + "grad_norm": 79.54284804882354, + "learning_rate": 4.902790140986649e-07, + "logits": -1.3207134008407593, + "logps": -89.81251525878906, + "loss": 0.2421, + "objective": 0.23721659183502197, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16743679344654083, + "step": 960 + }, + { + "dpo_loss": 0.700217604637146, + "epoch": 1.8233349078885215, + "grad_norm": 80.11794966712507, + "learning_rate": 4.900499357097915e-07, + "logits": -1.3014113903045654, + "logps": -89.26399230957031, + "loss": 0.2313, + "objective": 0.24520444869995117, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.17518267035484314, + "step": 965 + }, + { + "dpo_loss": 0.6830806732177734, + "epoch": 1.8327822390174777, + "grad_norm": 85.50728061889993, + "learning_rate": 4.898182442897432e-07, + "logits": -1.3395977020263672, + "logps": -90.12285614013672, + "loss": 0.2441, + "objective": 0.2601337432861328, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.574999988079071, + "regularize": 0.19182565808296204, + "step": 970 + }, + { + "dpo_loss": 0.7140229940414429, + "epoch": 1.8422295701464337, + "grad_norm": 92.90130732582263, + "learning_rate": 4.895839423605656e-07, + "logits": -1.364551305770874, + "logps": -90.04396057128906, + "loss": 0.2498, + "objective": 0.24496057629585266, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.17355826497077942, + "step": 975 + }, + { + "dpo_loss": 0.6796795725822449, + "epoch": 1.8516769012753898, + "grad_norm": 83.19455027012233, + "learning_rate": 4.893470324727208e-07, + "logits": -1.2256380319595337, + "logps": -87.5365982055664, + "loss": 0.2305, + "objective": 0.21892723441123962, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.53125, + "regularize": 0.15095928311347961, + "step": 980 + }, + { + "dpo_loss": 0.6931859254837036, + "epoch": 1.8611242324043458, + "grad_norm": 81.35496778915645, + "learning_rate": 4.891075172050591e-07, + "logits": -1.340255856513977, + "logps": -88.29389953613281, + "loss": 0.2303, + "objective": 0.22776278853416443, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.15844415128231049, + "step": 985 + }, + { + "dpo_loss": 0.7015528678894043, + "epoch": 1.8705715635333018, + "grad_norm": 77.84141782754642, + "learning_rate": 4.888653991647919e-07, + "logits": -1.3628294467926025, + "logps": -90.3727035522461, + "loss": 0.2384, + "objective": 0.2526736855506897, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.1825183928012848, + "step": 990 + }, + { + "dpo_loss": 0.6984178423881531, + "epoch": 1.8800188946622578, + "grad_norm": 84.78671948973349, + "learning_rate": 4.886206809874623e-07, + "logits": -1.2810890674591064, + "logps": -91.11246490478516, + "loss": 0.2554, + "objective": 0.25629252195358276, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.18645073473453522, + "step": 995 + }, + { + "dpo_loss": 0.7016985416412354, + "epoch": 1.8894662257912138, + "grad_norm": 77.84474982912333, + "learning_rate": 4.883733653369172e-07, + "logits": -1.3840316534042358, + "logps": -87.68888854980469, + "loss": 0.2414, + "objective": 0.23510241508483887, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.16493254899978638, + "step": 1000 + }, + { + "epoch": 1.8894662257912138, + "eval_dpo_loss": 0.692449152469635, + "eval_logits": -1.2579281330108643, + "eval_logps": -95.87931823730469, + "eval_loss": 0.18945148587226868, + "eval_objective": 0.19014763832092285, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5253623127937317, + "eval_regularize": 0.12090273201465607, + "eval_runtime": 157.7109, + "eval_samples_per_second": 36.713, + "eval_steps_per_second": 3.063, + "step": 1000 + }, + { + "dpo_loss": 0.6800759434700012, + "epoch": 1.89891355692017, + "grad_norm": 81.6394205444093, + "learning_rate": 4.881234549052775e-07, + "logits": -1.2515195608139038, + "logps": -88.47236633300781, + "loss": 0.2396, + "objective": 0.2439851462841034, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.17597754299640656, + "step": 1005 + }, + { + "dpo_loss": 0.6951671838760376, + "epoch": 1.908360888049126, + "grad_norm": 82.6142617114122, + "learning_rate": 4.878709524129096e-07, + "logits": -1.1769020557403564, + "logps": -88.99317169189453, + "loss": 0.233, + "objective": 0.2350684106349945, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16555170714855194, + "step": 1010 + }, + { + "dpo_loss": 0.704579770565033, + "epoch": 1.9178082191780823, + "grad_norm": 109.58609659876082, + "learning_rate": 4.876158606083952e-07, + "logits": -1.350940227508545, + "logps": -90.60091400146484, + "loss": 0.2504, + "objective": 0.26667410135269165, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.543749988079071, + "regularize": 0.1962161660194397, + "step": 1015 + }, + { + "dpo_loss": 0.684603214263916, + "epoch": 1.9272555503070383, + "grad_norm": 75.7008642804927, + "learning_rate": 4.873581822685019e-07, + "logits": -1.298457384109497, + "logps": -92.027099609375, + "loss": 0.2523, + "objective": 0.24609248340129852, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.1776321530342102, + "step": 1020 + }, + { + "dpo_loss": 0.6828368306159973, + "epoch": 1.9367028814359943, + "grad_norm": 78.59042762552068, + "learning_rate": 4.870979201981523e-07, + "logits": -1.2884193658828735, + "logps": -88.74574279785156, + "loss": 0.2481, + "objective": 0.23535391688346863, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.4625000059604645, + "regularize": 0.16707023978233337, + "step": 1025 + }, + { + "dpo_loss": 0.7131926417350769, + "epoch": 1.9461502125649504, + "grad_norm": 78.49738347088316, + "learning_rate": 4.86835077230394e-07, + "logits": -1.2955366373062134, + "logps": -88.70613861083984, + "loss": 0.2431, + "objective": 0.25340279936790466, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.18208353221416473, + "step": 1030 + }, + { + "dpo_loss": 0.6981402635574341, + "epoch": 1.9555975436939064, + "grad_norm": 80.29900254916205, + "learning_rate": 4.865696562263689e-07, + "logits": -1.2850407361984253, + "logps": -90.81657409667969, + "loss": 0.2442, + "objective": 0.24822275340557098, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5874999761581421, + "regularize": 0.17840874195098877, + "step": 1035 + }, + { + "dpo_loss": 0.6980287432670593, + "epoch": 1.9650448748228624, + "grad_norm": 83.18234069075524, + "learning_rate": 4.863016600752813e-07, + "logits": -1.2231897115707397, + "logps": -89.87462615966797, + "loss": 0.2368, + "objective": 0.24762320518493652, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.1778203248977661, + "step": 1040 + }, + { + "dpo_loss": 0.6901535987854004, + "epoch": 1.9744922059518186, + "grad_norm": 78.95679943145011, + "learning_rate": 4.860310916943672e-07, + "logits": -1.2009848356246948, + "logps": -90.7292709350586, + "loss": 0.2456, + "objective": 0.22756969928741455, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1585543304681778, + "step": 1045 + }, + { + "dpo_loss": 0.6983585357666016, + "epoch": 1.9839395370807746, + "grad_norm": 82.01184585459328, + "learning_rate": 4.857579540288622e-07, + "logits": -1.270129680633545, + "logps": -91.57951354980469, + "loss": 0.2433, + "objective": 0.2582865059375763, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.4749999940395355, + "regularize": 0.1884506642818451, + "step": 1050 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 0.6923243403434753, + "eval_logits": -1.2552448511123657, + "eval_logps": -95.79696655273438, + "eval_loss": 0.209737628698349, + "eval_objective": 0.20681917667388916, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5258799195289612, + "eval_regularize": 0.13758672773838043, + "eval_runtime": 163.6808, + "eval_samples_per_second": 35.374, + "eval_steps_per_second": 2.951, + "step": 1050 + }, + { + "dpo_loss": 0.6885973811149597, + "epoch": 1.9933868682097309, + "grad_norm": 88.44691340438358, + "learning_rate": 4.854822500519694e-07, + "logits": -1.1901785135269165, + "logps": -89.351806640625, + "loss": 0.2419, + "objective": 0.23133370280265808, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.16247394680976868, + "step": 1055 + }, + { + "dpo_loss": 0.7313130497932434, + "epoch": 2.002834199338687, + "grad_norm": 77.72279899826333, + "learning_rate": 4.852039827648274e-07, + "logits": -1.2519927024841309, + "logps": -90.05610656738281, + "loss": 0.257, + "objective": 0.25717490911483765, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.53125, + "regularize": 0.18404361605644226, + "step": 1060 + }, + { + "dpo_loss": 0.6962462067604065, + "epoch": 2.012281530467643, + "grad_norm": 81.63125239898291, + "learning_rate": 4.849231551964771e-07, + "logits": -1.2965519428253174, + "logps": -88.1960220336914, + "loss": 0.2471, + "objective": 0.23747217655181885, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.16784754395484924, + "step": 1065 + }, + { + "dpo_loss": 0.7073962688446045, + "epoch": 2.021728861596599, + "grad_norm": 77.69276192791024, + "learning_rate": 4.84639770403829e-07, + "logits": -1.2943260669708252, + "logps": -89.67262268066406, + "loss": 0.2268, + "objective": 0.22726266086101532, + "ranking_idealized": 0.643750011920929, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.581250011920929, + "regularize": 0.15652304887771606, + "step": 1070 + }, + { + "dpo_loss": 0.6832243204116821, + "epoch": 2.031176192725555, + "grad_norm": 83.4346543281612, + "learning_rate": 4.843538314716303e-07, + "logits": -1.2915871143341064, + "logps": -89.31999969482422, + "loss": 0.2427, + "objective": 0.24280743300914764, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.4937500059604645, + "regularize": 0.1744849979877472, + "step": 1075 + }, + { + "dpo_loss": 0.7055403590202332, + "epoch": 2.040623523854511, + "grad_norm": 77.27965034026705, + "learning_rate": 4.840653415124302e-07, + "logits": -1.2398439645767212, + "logps": -89.6719970703125, + "loss": 0.2294, + "objective": 0.2320195883512497, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.1614655703306198, + "step": 1080 + }, + { + "dpo_loss": 0.6916980743408203, + "epoch": 2.050070854983467, + "grad_norm": 77.59782281381496, + "learning_rate": 4.837743036665476e-07, + "logits": -1.2724249362945557, + "logps": -91.91465759277344, + "loss": 0.2537, + "objective": 0.2678667902946472, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.19869700074195862, + "step": 1085 + }, + { + "dpo_loss": 0.6935771107673645, + "epoch": 2.0595181861124234, + "grad_norm": 80.82666392609994, + "learning_rate": 4.834807211020356e-07, + "logits": -1.2286088466644287, + "logps": -93.31673431396484, + "loss": 0.2434, + "objective": 0.25225305557250977, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.18289534747600555, + "step": 1090 + }, + { + "dpo_loss": 0.6847450733184814, + "epoch": 2.0689655172413794, + "grad_norm": 77.40627853328184, + "learning_rate": 4.831845970146474e-07, + "logits": -1.3850970268249512, + "logps": -90.1637191772461, + "loss": 0.2472, + "objective": 0.23211999237537384, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.45625001192092896, + "regularize": 0.16364550590515137, + "step": 1095 + }, + { + "dpo_loss": 0.6987076997756958, + "epoch": 2.0784128483703355, + "grad_norm": 81.51363848488977, + "learning_rate": 4.82885934627802e-07, + "logits": -1.3133985996246338, + "logps": -91.41629028320312, + "loss": 0.2393, + "objective": 0.2526317238807678, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.18276092410087585, + "step": 1100 + }, + { + "epoch": 2.0784128483703355, + "eval_dpo_loss": 0.6961632370948792, + "eval_logits": -1.242217779159546, + "eval_logps": -96.93132019042969, + "eval_loss": 0.21558791399002075, + "eval_objective": 0.21485954523086548, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5263975262641907, + "eval_regularize": 0.14524322748184204, + "eval_runtime": 159.0028, + "eval_samples_per_second": 36.414, + "eval_steps_per_second": 3.038, + "step": 1100 + }, + { + "dpo_loss": 0.6975056529045105, + "epoch": 2.0878601794992915, + "grad_norm": 75.26532295370068, + "learning_rate": 4.825847371925484e-07, + "logits": -1.272645115852356, + "logps": -89.9799575805664, + "loss": 0.2312, + "objective": 0.2198951244354248, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1501445770263672, + "step": 1105 + }, + { + "dpo_loss": 0.6822449564933777, + "epoch": 2.0973075106282475, + "grad_norm": 76.11229862634485, + "learning_rate": 4.822810079875308e-07, + "logits": -1.2977901697158813, + "logps": -88.89505767822266, + "loss": 0.2413, + "objective": 0.2520579695701599, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.18383343517780304, + "step": 1110 + }, + { + "dpo_loss": 0.6917838454246521, + "epoch": 2.1067548417572035, + "grad_norm": 81.10051542367798, + "learning_rate": 4.819747503189522e-07, + "logits": -1.268223524093628, + "logps": -89.8685073852539, + "loss": 0.234, + "objective": 0.23825547099113464, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5625, + "regularize": 0.1690770834684372, + "step": 1115 + }, + { + "dpo_loss": 0.6925947070121765, + "epoch": 2.1162021728861595, + "grad_norm": 73.79299474366525, + "learning_rate": 4.816659675205392e-07, + "logits": -1.2154170274734497, + "logps": -89.65646362304688, + "loss": 0.2313, + "objective": 0.2250944823026657, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.15583501756191254, + "step": 1120 + }, + { + "dpo_loss": 0.6929166913032532, + "epoch": 2.1256495040151155, + "grad_norm": 79.44167597980932, + "learning_rate": 4.813546629535053e-07, + "logits": -1.3260807991027832, + "logps": -87.94298553466797, + "loss": 0.235, + "objective": 0.2192649394273758, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.44999998807907104, + "regularize": 0.1499732881784439, + "step": 1125 + }, + { + "dpo_loss": 0.6933780312538147, + "epoch": 2.135096835144072, + "grad_norm": 75.59134488100315, + "learning_rate": 4.810408400065144e-07, + "logits": -1.2588220834732056, + "logps": -90.21156311035156, + "loss": 0.2508, + "objective": 0.25041699409484863, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1810791939496994, + "step": 1130 + }, + { + "dpo_loss": 0.6917892694473267, + "epoch": 2.144544166273028, + "grad_norm": 78.62106956394454, + "learning_rate": 4.807245020956437e-07, + "logits": -1.3259994983673096, + "logps": -89.59319305419922, + "loss": 0.2293, + "objective": 0.23598890006542206, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16680999100208282, + "step": 1135 + }, + { + "dpo_loss": 0.7026151418685913, + "epoch": 2.153991497401984, + "grad_norm": 85.29348168581552, + "learning_rate": 4.804056526643471e-07, + "logits": -1.217145562171936, + "logps": -90.12454986572266, + "loss": 0.2412, + "objective": 0.22202345728874207, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.15176193416118622, + "step": 1140 + }, + { + "dpo_loss": 0.701118528842926, + "epoch": 2.16343882853094, + "grad_norm": 82.7848574013617, + "learning_rate": 4.800842951834168e-07, + "logits": -1.3022010326385498, + "logps": -90.82298278808594, + "loss": 0.2479, + "objective": 0.23917004466056824, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.543749988079071, + "regularize": 0.1690582036972046, + "step": 1145 + }, + { + "dpo_loss": 0.6887177228927612, + "epoch": 2.172886159659896, + "grad_norm": 81.51256071950469, + "learning_rate": 4.797604331509462e-07, + "logits": -1.2722337245941162, + "logps": -90.06324768066406, + "loss": 0.2476, + "objective": 0.24817052483558655, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.17929872870445251, + "step": 1150 + }, + { + "epoch": 2.172886159659896, + "eval_dpo_loss": 0.6958022713661194, + "eval_logits": -1.2484749555587769, + "eval_logps": -95.86177062988281, + "eval_loss": 0.21953806281089783, + "eval_objective": 0.21911680698394775, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.149536594748497, + "eval_runtime": 157.4146, + "eval_samples_per_second": 36.782, + "eval_steps_per_second": 3.068, + "step": 1150 + }, + { + "dpo_loss": 0.7213719487190247, + "epoch": 2.182333490788852, + "grad_norm": 85.26999836671722, + "learning_rate": 4.794340700922921e-07, + "logits": -1.289499044418335, + "logps": -88.68782806396484, + "loss": 0.2405, + "objective": 0.23491954803466797, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.16278235614299774, + "step": 1155 + }, + { + "dpo_loss": 0.7010141015052795, + "epoch": 2.191780821917808, + "grad_norm": 82.49417059894672, + "learning_rate": 4.791052095600351e-07, + "logits": -1.2551113367080688, + "logps": -91.86698150634766, + "loss": 0.2499, + "objective": 0.23400485515594482, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5562499761581421, + "regularize": 0.16390344500541687, + "step": 1160 + }, + { + "dpo_loss": 0.6905627250671387, + "epoch": 2.201228153046764, + "grad_norm": 76.1785158086783, + "learning_rate": 4.787738551339425e-07, + "logits": -1.3215975761413574, + "logps": -90.5303726196289, + "loss": 0.2336, + "objective": 0.24319115281105042, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.17413488030433655, + "step": 1165 + }, + { + "dpo_loss": 0.6864585876464844, + "epoch": 2.21067548417572, + "grad_norm": 90.3766310186037, + "learning_rate": 4.784400104209281e-07, + "logits": -1.2362306118011475, + "logps": -91.8830795288086, + "loss": 0.2504, + "objective": 0.24644558131694794, + "ranking_idealized": 0.4312500059604645, + "ranking_idealized_expo": 0.4312500059604645, + "ranking_simple": 0.4312500059604645, + "regularize": 0.1777997463941574, + "step": 1170 + }, + { + "dpo_loss": 0.6967033743858337, + "epoch": 2.2201228153046766, + "grad_norm": 81.5391367066229, + "learning_rate": 4.781036790550133e-07, + "logits": -1.2316911220550537, + "logps": -90.62294006347656, + "loss": 0.2378, + "objective": 0.23412732779979706, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.48750001192092896, + "regularize": 0.16445696353912354, + "step": 1175 + }, + { + "dpo_loss": 0.7060146331787109, + "epoch": 2.2295701464336326, + "grad_norm": 85.7375377050115, + "learning_rate": 4.777648646972879e-07, + "logits": -1.2484381198883057, + "logps": -94.7322998046875, + "loss": 0.2545, + "objective": 0.24841733276844025, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.17781586945056915, + "step": 1180 + }, + { + "dpo_loss": 0.7063915729522705, + "epoch": 2.2390174775625886, + "grad_norm": 74.78902429353171, + "learning_rate": 4.774235710358699e-07, + "logits": -1.1517055034637451, + "logps": -91.82881927490234, + "loss": 0.2337, + "objective": 0.25040021538734436, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.17976108193397522, + "step": 1185 + }, + { + "dpo_loss": 0.6940587759017944, + "epoch": 2.2484648086915446, + "grad_norm": 79.15932210088977, + "learning_rate": 4.770798017858653e-07, + "logits": -1.2461541891098022, + "logps": -91.99650573730469, + "loss": 0.2465, + "objective": 0.24219770729541779, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.46875, + "regularize": 0.17279183864593506, + "step": 1190 + }, + { + "dpo_loss": 0.7148272395133972, + "epoch": 2.2579121398205007, + "grad_norm": 76.05933709488173, + "learning_rate": 4.7673356068932786e-07, + "logits": -1.22458815574646, + "logps": -93.11870574951172, + "loss": 0.2487, + "objective": 0.26240792870521545, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.59375, + "regularize": 0.19092519581317902, + "step": 1195 + }, + { + "dpo_loss": 0.7013410329818726, + "epoch": 2.2673594709494567, + "grad_norm": 77.60037125978356, + "learning_rate": 4.763848515152184e-07, + "logits": -1.3251944780349731, + "logps": -91.91200256347656, + "loss": 0.2443, + "objective": 0.23794111609458923, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.16780701279640198, + "step": 1200 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 0.6998235583305359, + "eval_logits": -1.224057912826538, + "eval_logps": -97.13621520996094, + "eval_loss": 0.23180466890335083, + "eval_objective": 0.23165498673915863, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5258799195289612, + "eval_regularize": 0.16167263686656952, + "eval_runtime": 157.8532, + "eval_samples_per_second": 36.68, + "eval_steps_per_second": 3.06, + "step": 1200 + }, + { + "dpo_loss": 0.6951106786727905, + "epoch": 2.2768068020784127, + "grad_norm": 80.22359104079362, + "learning_rate": 4.7603367805936347e-07, + "logits": -1.2757624387741089, + "logps": -91.18368530273438, + "loss": 0.2394, + "objective": 0.2431495636701584, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.1736384928226471, + "step": 1205 + }, + { + "dpo_loss": 0.6933736801147461, + "epoch": 2.286254133207369, + "grad_norm": 72.68834763111872, + "learning_rate": 4.7568004414441444e-07, + "logits": -1.2526779174804688, + "logps": -88.98243713378906, + "loss": 0.2351, + "objective": 0.22677263617515564, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.15743525326251984, + "step": 1210 + }, + { + "dpo_loss": 0.6766241788864136, + "epoch": 2.295701464336325, + "grad_norm": 79.38107843409844, + "learning_rate": 4.7532395361980544e-07, + "logits": -1.312524437904358, + "logps": -91.68379974365234, + "loss": 0.2463, + "objective": 0.23765353858470917, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5687500238418579, + "regularize": 0.16999110579490662, + "step": 1215 + }, + { + "dpo_loss": 0.6962811946868896, + "epoch": 2.305148795465281, + "grad_norm": 85.13780816275877, + "learning_rate": 4.749654103617119e-07, + "logits": -1.2287237644195557, + "logps": -92.60826110839844, + "loss": 0.2328, + "objective": 0.22278353571891785, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.15315541625022888, + "step": 1220 + }, + { + "dpo_loss": 0.6947506666183472, + "epoch": 2.314596126594237, + "grad_norm": 78.24452291589246, + "learning_rate": 4.74604418273008e-07, + "logits": -1.3358386754989624, + "logps": -90.82719421386719, + "loss": 0.2346, + "objective": 0.228725865483284, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.15925081074237823, + "step": 1225 + }, + { + "dpo_loss": 0.7003086805343628, + "epoch": 2.324043457723193, + "grad_norm": 81.50256143225303, + "learning_rate": 4.742409812832244e-07, + "logits": -1.2636182308197021, + "logps": -90.55316925048828, + "loss": 0.2489, + "objective": 0.25676098465919495, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.18673010170459747, + "step": 1230 + }, + { + "dpo_loss": 0.6981230974197388, + "epoch": 2.3334907888521492, + "grad_norm": 80.31828285987623, + "learning_rate": 4.738751033485055e-07, + "logits": -1.239471197128296, + "logps": -89.82279205322266, + "loss": 0.2352, + "objective": 0.2329610139131546, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1631486713886261, + "step": 1235 + }, + { + "dpo_loss": 0.7123819589614868, + "epoch": 2.3429381199811052, + "grad_norm": 83.16849127640717, + "learning_rate": 4.7350678845156613e-07, + "logits": -1.2978795766830444, + "logps": -92.16786193847656, + "loss": 0.2402, + "objective": 0.2497522085905075, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.1785140037536621, + "step": 1240 + }, + { + "dpo_loss": 0.6852763891220093, + "epoch": 2.3523854511100613, + "grad_norm": 85.40390965968454, + "learning_rate": 4.7313604060164824e-07, + "logits": -1.2600769996643066, + "logps": -93.20845031738281, + "loss": 0.254, + "objective": 0.27139025926589966, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.543749988079071, + "regularize": 0.20286257565021515, + "step": 1245 + }, + { + "dpo_loss": 0.7078602910041809, + "epoch": 2.3618327822390173, + "grad_norm": 78.24810295158233, + "learning_rate": 4.7276286383447766e-07, + "logits": -1.2538678646087646, + "logps": -91.44749450683594, + "loss": 0.2337, + "objective": 0.23821282386779785, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.16742679476737976, + "step": 1250 + }, + { + "epoch": 2.3618327822390173, + "eval_dpo_loss": 0.6949954628944397, + "eval_logits": -1.23129403591156, + "eval_logps": -96.26293182373047, + "eval_loss": 0.2493569552898407, + "eval_objective": 0.25145968794822693, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5269151329994202, + "eval_regularize": 0.18196015059947968, + "eval_runtime": 158.1128, + "eval_samples_per_second": 36.619, + "eval_steps_per_second": 3.055, + "step": 1250 + }, + { + "dpo_loss": 0.6774023175239563, + "epoch": 2.3712801133679737, + "grad_norm": 73.61944794598868, + "learning_rate": 4.723872622122197e-07, + "logits": -1.222249150276184, + "logps": -92.29754638671875, + "loss": 0.2413, + "objective": 0.2529022693634033, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.1851620376110077, + "step": 1255 + }, + { + "dpo_loss": 0.6773152351379395, + "epoch": 2.3807274444969297, + "grad_norm": 75.01411541839315, + "learning_rate": 4.720092398234351e-07, + "logits": -1.1812235116958618, + "logps": -91.16864013671875, + "loss": 0.242, + "objective": 0.2524015009403229, + "ranking_idealized": 0.4375, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.18466997146606445, + "step": 1260 + }, + { + "dpo_loss": 0.7058506011962891, + "epoch": 2.3901747756258858, + "grad_norm": 85.94887253603434, + "learning_rate": 4.716288007830356e-07, + "logits": -1.2233319282531738, + "logps": -91.88188171386719, + "loss": 0.2412, + "objective": 0.24302363395690918, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.1724385917186737, + "step": 1265 + }, + { + "dpo_loss": 0.6782275438308716, + "epoch": 2.3996221067548418, + "grad_norm": 84.19950426466906, + "learning_rate": 4.71245949232239e-07, + "logits": -1.353999376296997, + "logps": -92.9189682006836, + "loss": 0.2592, + "objective": 0.2692815661430359, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.20145881175994873, + "step": 1270 + }, + { + "dpo_loss": 0.6916183233261108, + "epoch": 2.409069437883798, + "grad_norm": 87.47559767434763, + "learning_rate": 4.708606893385241e-07, + "logits": -1.2686707973480225, + "logps": -89.87593078613281, + "loss": 0.2319, + "objective": 0.22510790824890137, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.15594607591629028, + "step": 1275 + }, + { + "dpo_loss": 0.6960471868515015, + "epoch": 2.418516769012754, + "grad_norm": 81.01535668345583, + "learning_rate": 4.7047302529558556e-07, + "logits": -1.2857545614242554, + "logps": -89.09248352050781, + "loss": 0.2388, + "objective": 0.22974196076393127, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.16013725101947784, + "step": 1280 + }, + { + "dpo_loss": 0.6761273145675659, + "epoch": 2.42796410014171, + "grad_norm": 74.11716901737486, + "learning_rate": 4.7008296132328805e-07, + "logits": -1.279343843460083, + "logps": -92.09466552734375, + "loss": 0.2444, + "objective": 0.2343457192182541, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.4625000059604645, + "regularize": 0.1667329967021942, + "step": 1285 + }, + { + "dpo_loss": 0.708074688911438, + "epoch": 2.4374114312706663, + "grad_norm": 77.08424586936, + "learning_rate": 4.6969050166762013e-07, + "logits": -1.232253909111023, + "logps": -89.49784851074219, + "loss": 0.2379, + "objective": 0.24165663123130798, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.17084915935993195, + "step": 1290 + }, + { + "dpo_loss": 0.7073941230773926, + "epoch": 2.4468587623996223, + "grad_norm": 77.54215896924674, + "learning_rate": 4.692956506006486e-07, + "logits": -1.2533094882965088, + "logps": -89.46601104736328, + "loss": 0.2512, + "objective": 0.25510329008102417, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.18436387181282043, + "step": 1295 + }, + { + "dpo_loss": 0.6897347569465637, + "epoch": 2.4563060935285783, + "grad_norm": 81.92124464995868, + "learning_rate": 4.688984124204712e-07, + "logits": -1.279669165611267, + "logps": -88.2325668334961, + "loss": 0.2264, + "objective": 0.22791297733783722, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.15893949568271637, + "step": 1300 + }, + { + "epoch": 2.4563060935285783, + "eval_dpo_loss": 0.6981492042541504, + "eval_logits": -1.2534995079040527, + "eval_logps": -94.45039367675781, + "eval_loss": 0.24725377559661865, + "eval_objective": 0.24559155106544495, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.1757766604423523, + "eval_runtime": 157.5273, + "eval_samples_per_second": 36.756, + "eval_steps_per_second": 3.066, + "step": 1300 + }, + { + "dpo_loss": 0.7108772397041321, + "epoch": 2.4657534246575343, + "grad_norm": 82.39715959123795, + "learning_rate": 4.684987914511708e-07, + "logits": -1.3086609840393066, + "logps": -89.1966323852539, + "loss": 0.2314, + "objective": 0.23325006663799286, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.46875, + "regularize": 0.16216234862804413, + "step": 1305 + }, + { + "dpo_loss": 0.6940029859542847, + "epoch": 2.4752007557864903, + "grad_norm": 77.69520048627098, + "learning_rate": 4.6809679204276735e-07, + "logits": -1.3097118139266968, + "logps": -88.96656799316406, + "loss": 0.2339, + "objective": 0.24012359976768494, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.581250011920929, + "regularize": 0.1707233041524887, + "step": 1310 + }, + { + "dpo_loss": 0.6831144094467163, + "epoch": 2.4846480869154464, + "grad_norm": 80.97079870828857, + "learning_rate": 4.6769241857117127e-07, + "logits": -1.223602294921875, + "logps": -88.93707275390625, + "loss": 0.2369, + "objective": 0.25154465436935425, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.18323318660259247, + "step": 1315 + }, + { + "dpo_loss": 0.7016611695289612, + "epoch": 2.4940954180444024, + "grad_norm": 76.56858236464322, + "learning_rate": 4.6728567543813524e-07, + "logits": -1.142772912979126, + "logps": -87.71903991699219, + "loss": 0.2368, + "objective": 0.2456255704164505, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.53125, + "regularize": 0.17545947432518005, + "step": 1320 + }, + { + "dpo_loss": 0.6933158040046692, + "epoch": 2.5035427491733584, + "grad_norm": 78.36609479465805, + "learning_rate": 4.6687656707120693e-07, + "logits": -1.2576102018356323, + "logps": -87.36424255371094, + "loss": 0.2364, + "objective": 0.2189684808254242, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1496368795633316, + "step": 1325 + }, + { + "dpo_loss": 0.7006167769432068, + "epoch": 2.5129900803023144, + "grad_norm": 80.0154519809813, + "learning_rate": 4.664650979236801e-07, + "logits": -1.2859307527542114, + "logps": -87.9940185546875, + "loss": 0.2438, + "objective": 0.23237256705760956, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.1623109132051468, + "step": 1330 + }, + { + "dpo_loss": 0.6959272623062134, + "epoch": 2.5224374114312704, + "grad_norm": 85.35335471365272, + "learning_rate": 4.660512724745467e-07, + "logits": -1.27793288230896, + "logps": -87.02171325683594, + "loss": 0.2368, + "objective": 0.2255460023880005, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.45625001192092896, + "regularize": 0.15595325827598572, + "step": 1335 + }, + { + "dpo_loss": 0.6886266469955444, + "epoch": 2.531884742560227, + "grad_norm": 78.40692183070935, + "learning_rate": 4.656350952284478e-07, + "logits": -1.1710107326507568, + "logps": -89.88359069824219, + "loss": 0.2381, + "objective": 0.2395767718553543, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.17071406543254852, + "step": 1340 + }, + { + "dpo_loss": 0.6858821511268616, + "epoch": 2.541332073689183, + "grad_norm": 81.74921881509104, + "learning_rate": 4.652165707156246e-07, + "logits": -1.2932147979736328, + "logps": -90.06592559814453, + "loss": 0.2398, + "objective": 0.24520452320575714, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.17661629617214203, + "step": 1345 + }, + { + "dpo_loss": 0.6906551122665405, + "epoch": 2.550779404818139, + "grad_norm": 78.20617287848113, + "learning_rate": 4.6479570349186913e-07, + "logits": -1.212925672531128, + "logps": -88.88258361816406, + "loss": 0.2398, + "objective": 0.222016841173172, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.15295132994651794, + "step": 1350 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 0.7027136087417603, + "eval_logits": -1.2349364757537842, + "eval_logps": -96.20651245117188, + "eval_loss": 0.24674533307552338, + "eval_objective": 0.24622555077075958, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.1759541779756546, + "eval_runtime": 158.2341, + "eval_samples_per_second": 36.591, + "eval_steps_per_second": 3.052, + "step": 1350 + }, + { + "dpo_loss": 0.6958358883857727, + "epoch": 2.560226735947095, + "grad_norm": 75.25806832805698, + "learning_rate": 4.643724981384749e-07, + "logits": -1.2679038047790527, + "logps": -91.00750732421875, + "loss": 0.2358, + "objective": 0.23495018482208252, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.16536661982536316, + "step": 1355 + }, + { + "dpo_loss": 0.6821821331977844, + "epoch": 2.569674067076051, + "grad_norm": 77.03521477955098, + "learning_rate": 4.6394695926218656e-07, + "logits": -1.2167763710021973, + "logps": -89.78504180908203, + "loss": 0.2247, + "objective": 0.2250729501247406, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.574999988079071, + "regularize": 0.1568547487258911, + "step": 1360 + }, + { + "dpo_loss": 0.692377507686615, + "epoch": 2.579121398205007, + "grad_norm": 77.31482925879361, + "learning_rate": 4.635190914951502e-07, + "logits": -1.2918310165405273, + "logps": -91.2199935913086, + "loss": 0.2385, + "objective": 0.2363658845424652, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.44999998807907104, + "regularize": 0.16712811589241028, + "step": 1365 + }, + { + "dpo_loss": 0.6883034110069275, + "epoch": 2.5885687293339634, + "grad_norm": 81.58418573926582, + "learning_rate": 4.6308889949486256e-07, + "logits": -1.392380952835083, + "logps": -91.57077026367188, + "loss": 0.2318, + "objective": 0.23946337401866913, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.17063304781913757, + "step": 1370 + }, + { + "dpo_loss": 0.7015324234962463, + "epoch": 2.5980160604629194, + "grad_norm": 72.99558544647365, + "learning_rate": 4.6265638794412067e-07, + "logits": -1.193394660949707, + "logps": -89.87769317626953, + "loss": 0.2282, + "objective": 0.2171776294708252, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4749999940395355, + "regularize": 0.14702437818050385, + "step": 1375 + }, + { + "dpo_loss": 0.6938923597335815, + "epoch": 2.6074633915918755, + "grad_norm": 77.25092850818334, + "learning_rate": 4.622215615509705e-07, + "logits": -1.2323808670043945, + "logps": -87.7343978881836, + "loss": 0.2284, + "objective": 0.2326710969209671, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.1632818579673767, + "step": 1380 + }, + { + "dpo_loss": 0.6813127398490906, + "epoch": 2.6169107227208315, + "grad_norm": 81.808009092451, + "learning_rate": 4.6178442504865623e-07, + "logits": -1.3131296634674072, + "logps": -89.78125, + "loss": 0.2463, + "objective": 0.24866609275341034, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.18053480982780457, + "step": 1385 + }, + { + "dpo_loss": 0.6854914426803589, + "epoch": 2.6263580538497875, + "grad_norm": 82.80323980610244, + "learning_rate": 4.6134498319556815e-07, + "logits": -1.1897776126861572, + "logps": -89.03984832763672, + "loss": 0.2411, + "objective": 0.22924157977104187, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.16069245338439941, + "step": 1390 + }, + { + "dpo_loss": 0.6865602135658264, + "epoch": 2.6358053849787435, + "grad_norm": 74.25016710243104, + "learning_rate": 4.6090324077519127e-07, + "logits": -1.2304832935333252, + "logps": -89.58457946777344, + "loss": 0.2412, + "objective": 0.23623552918434143, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.1675795018672943, + "step": 1395 + }, + { + "dpo_loss": 0.6881572008132935, + "epoch": 2.6452527161076995, + "grad_norm": 73.86263634495297, + "learning_rate": 4.6045920259605306e-07, + "logits": -1.2435492277145386, + "logps": -87.33223724365234, + "loss": 0.2346, + "objective": 0.23662078380584717, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5562499761581421, + "regularize": 0.1678050458431244, + "step": 1400 + }, + { + "epoch": 2.6452527161076995, + "eval_dpo_loss": 0.7001627683639526, + "eval_logits": -1.2562015056610107, + "eval_logps": -94.65913391113281, + "eval_loss": 0.2565145790576935, + "eval_objective": 0.25667497515678406, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.18665869534015656, + "eval_runtime": 158.5988, + "eval_samples_per_second": 36.507, + "eval_steps_per_second": 3.045, + "step": 1400 + }, + { + "dpo_loss": 0.689062237739563, + "epoch": 2.6547000472366555, + "grad_norm": 78.01273938173455, + "learning_rate": 4.600128734916713e-07, + "logits": -1.2227166891098022, + "logps": -89.48971557617188, + "loss": 0.2422, + "objective": 0.2384214699268341, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5687500238418579, + "regularize": 0.1695152223110199, + "step": 1405 + }, + { + "dpo_loss": 0.6784166693687439, + "epoch": 2.6641473783656116, + "grad_norm": 74.9551460062836, + "learning_rate": 4.595642583205011e-07, + "logits": -1.321109652519226, + "logps": -90.31436920166016, + "loss": 0.2429, + "objective": 0.23836779594421387, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.17052613198757172, + "step": 1410 + }, + { + "dpo_loss": 0.6802228689193726, + "epoch": 2.6735947094945676, + "grad_norm": 75.504272279789, + "learning_rate": 4.5911336196588237e-07, + "logits": -1.294541597366333, + "logps": -88.68775939941406, + "loss": 0.2384, + "objective": 0.2413848638534546, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1733625829219818, + "step": 1415 + }, + { + "dpo_loss": 0.6930629014968872, + "epoch": 2.6830420406235236, + "grad_norm": 72.22345932978276, + "learning_rate": 4.586601893359865e-07, + "logits": -1.230058193206787, + "logps": -88.73162841796875, + "loss": 0.2386, + "objective": 0.2470715045928955, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.17776522040367126, + "step": 1420 + }, + { + "dpo_loss": 0.7063760757446289, + "epoch": 2.69248937175248, + "grad_norm": 81.14829878526038, + "learning_rate": 4.58204745363763e-07, + "logits": -1.2439881563186646, + "logps": -88.5297622680664, + "loss": 0.2369, + "objective": 0.2180163860321045, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.14737875759601593, + "step": 1425 + }, + { + "dpo_loss": 0.6979953646659851, + "epoch": 2.701936702881436, + "grad_norm": 72.60233904196467, + "learning_rate": 4.577470350068858e-07, + "logits": -1.2875802516937256, + "logps": -87.95441436767578, + "loss": 0.2324, + "objective": 0.23016265034675598, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.1603630930185318, + "step": 1430 + }, + { + "dpo_loss": 0.6804190278053284, + "epoch": 2.711384034010392, + "grad_norm": 73.85158200914744, + "learning_rate": 4.5728706324769914e-07, + "logits": -1.2435171604156494, + "logps": -89.67928314208984, + "loss": 0.2336, + "objective": 0.24185959994792938, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.1738176941871643, + "step": 1435 + }, + { + "dpo_loss": 0.6941691040992737, + "epoch": 2.720831365139348, + "grad_norm": 76.60837269004416, + "learning_rate": 4.5682483509316367e-07, + "logits": -1.2151108980178833, + "logps": -88.1698989868164, + "loss": 0.245, + "objective": 0.24525687098503113, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.53125, + "regularize": 0.17583994567394257, + "step": 1440 + }, + { + "dpo_loss": 0.6980563998222351, + "epoch": 2.730278696268304, + "grad_norm": 75.43164801037526, + "learning_rate": 4.5636035557480144e-07, + "logits": -1.2762441635131836, + "logps": -88.98656463623047, + "loss": 0.2434, + "objective": 0.23745755851268768, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.1676519215106964, + "step": 1445 + }, + { + "dpo_loss": 0.7027378082275391, + "epoch": 2.73972602739726, + "grad_norm": 75.21672455179534, + "learning_rate": 4.558936297486415e-07, + "logits": -1.306553602218628, + "logps": -90.38543701171875, + "loss": 0.242, + "objective": 0.24809806048870087, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.17782428860664368, + "step": 1450 + }, + { + "epoch": 2.73972602739726, + "eval_dpo_loss": 0.7014594078063965, + "eval_logits": -1.214097261428833, + "eval_logps": -94.65546417236328, + "eval_loss": 0.2640208899974823, + "eval_objective": 0.26405468583106995, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.1939087212085724, + "eval_runtime": 159.1959, + "eval_samples_per_second": 36.37, + "eval_steps_per_second": 3.034, + "step": 1450 + }, + { + "dpo_loss": 0.6996809244155884, + "epoch": 2.7491733585262166, + "grad_norm": 76.00464187434298, + "learning_rate": 4.5542466269516503e-07, + "logits": -1.1624560356140137, + "logps": -88.36128234863281, + "loss": 0.2355, + "objective": 0.231034517288208, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.1610664278268814, + "step": 1455 + }, + { + "dpo_loss": 0.6926349401473999, + "epoch": 2.7586206896551726, + "grad_norm": 77.37875288927383, + "learning_rate": 4.5495345951924944e-07, + "logits": -1.146475076675415, + "logps": -88.392333984375, + "loss": 0.2214, + "objective": 0.22251549363136292, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5562499761581421, + "regularize": 0.15325197577476501, + "step": 1460 + }, + { + "dpo_loss": 0.6899169087409973, + "epoch": 2.7680680207841286, + "grad_norm": 77.90428885902477, + "learning_rate": 4.544800253501134e-07, + "logits": -1.310323715209961, + "logps": -88.71553802490234, + "loss": 0.2326, + "objective": 0.23641350865364075, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16742180287837982, + "step": 1465 + }, + { + "dpo_loss": 0.7080496549606323, + "epoch": 2.7775153519130846, + "grad_norm": 73.39837293853236, + "learning_rate": 4.540043653412606e-07, + "logits": -1.247336983680725, + "logps": -89.10823059082031, + "loss": 0.2301, + "objective": 0.2354445904493332, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.44999998807907104, + "regularize": 0.16463959217071533, + "step": 1470 + }, + { + "dpo_loss": 0.6979535818099976, + "epoch": 2.7869626830420406, + "grad_norm": 75.71179419593179, + "learning_rate": 4.5352648467042384e-07, + "logits": -1.1792867183685303, + "logps": -88.86894226074219, + "loss": 0.234, + "objective": 0.22246937453746796, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48124998807907104, + "regularize": 0.15267404913902283, + "step": 1475 + }, + { + "dpo_loss": 0.678043782711029, + "epoch": 2.7964100141709967, + "grad_norm": 92.17559432395329, + "learning_rate": 4.5304638853950866e-07, + "logits": -1.198436975479126, + "logps": -88.4832992553711, + "loss": 0.2211, + "objective": 0.2273280918598175, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.15952369570732117, + "step": 1480 + }, + { + "dpo_loss": 0.6995588541030884, + "epoch": 2.8058573452999527, + "grad_norm": 75.6219150180191, + "learning_rate": 4.525640821745368e-07, + "logits": -1.2838484048843384, + "logps": -90.87583923339844, + "loss": 0.2283, + "objective": 0.23600324988365173, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4749999940395355, + "regularize": 0.1660473495721817, + "step": 1485 + }, + { + "dpo_loss": 0.6786238551139832, + "epoch": 2.8153046764289087, + "grad_norm": 74.59167039994438, + "learning_rate": 4.52079570825589e-07, + "logits": -1.2329105138778687, + "logps": -86.96043395996094, + "loss": 0.2333, + "objective": 0.23706336319446564, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.16920097172260284, + "step": 1490 + }, + { + "dpo_loss": 0.7082782983779907, + "epoch": 2.8247520075578647, + "grad_norm": 83.74958128471566, + "learning_rate": 4.515928597667481e-07, + "logits": -1.3093531131744385, + "logps": -90.46369171142578, + "loss": 0.2388, + "objective": 0.26155704259872437, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.19072921574115753, + "step": 1495 + }, + { + "dpo_loss": 0.7037413716316223, + "epoch": 2.8341993386868207, + "grad_norm": 71.00748372107614, + "learning_rate": 4.5110395429604183e-07, + "logits": -1.2782231569290161, + "logps": -87.0984115600586, + "loss": 0.2372, + "objective": 0.2213272601366043, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.15095312893390656, + "step": 1500 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 0.7027208805084229, + "eval_logits": -1.2472012042999268, + "eval_logps": -94.92886352539062, + "eval_loss": 0.2747000455856323, + "eval_objective": 0.2726268768310547, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2023547887802124, + "eval_runtime": 162.0193, + "eval_samples_per_second": 35.736, + "eval_steps_per_second": 2.981, + "step": 1500 + }, + { + "dpo_loss": 0.691096305847168, + "epoch": 2.843646669815777, + "grad_norm": 75.52449443233994, + "learning_rate": 4.5061285973538456e-07, + "logits": -1.2361116409301758, + "logps": -87.37306213378906, + "loss": 0.2267, + "objective": 0.2288275510072708, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.15971791744232178, + "step": 1505 + }, + { + "dpo_loss": 0.6853073835372925, + "epoch": 2.853094000944733, + "grad_norm": 75.06432079116681, + "learning_rate": 4.5011958143051987e-07, + "logits": -1.3150286674499512, + "logps": -87.7445297241211, + "loss": 0.2295, + "objective": 0.2292981892824173, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.16076745092868805, + "step": 1510 + }, + { + "dpo_loss": 0.6877030730247498, + "epoch": 2.862541332073689, + "grad_norm": 71.09686183807273, + "learning_rate": 4.496241247509621e-07, + "logits": -1.3267923593521118, + "logps": -88.32781982421875, + "loss": 0.2355, + "objective": 0.2435312271118164, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.17476090788841248, + "step": 1515 + }, + { + "dpo_loss": 0.7023721933364868, + "epoch": 2.8719886632026452, + "grad_norm": 84.59529154677602, + "learning_rate": 4.4912649508993827e-07, + "logits": -1.3224318027496338, + "logps": -87.6534194946289, + "loss": 0.2331, + "objective": 0.22857289016246796, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.543749988079071, + "regularize": 0.15833568572998047, + "step": 1520 + }, + { + "dpo_loss": 0.7165266275405884, + "epoch": 2.8814359943316012, + "grad_norm": 74.94683227752313, + "learning_rate": 4.486266978643286e-07, + "logits": -1.1609947681427002, + "logps": -86.48796081542969, + "loss": 0.2349, + "objective": 0.2357398271560669, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.16408716142177582, + "step": 1525 + }, + { + "dpo_loss": 0.6916718482971191, + "epoch": 2.8908833254605573, + "grad_norm": 71.49489799909206, + "learning_rate": 4.481247385146086e-07, + "logits": -1.1969927549362183, + "logps": -87.98089599609375, + "loss": 0.2218, + "objective": 0.21987979114055634, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.15071257948875427, + "step": 1530 + }, + { + "dpo_loss": 0.7046507596969604, + "epoch": 2.9003306565895137, + "grad_norm": 72.52640605772251, + "learning_rate": 4.476206225047889e-07, + "logits": -1.2879106998443604, + "logps": -88.4914321899414, + "loss": 0.2265, + "objective": 0.23516707122325897, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16470198333263397, + "step": 1535 + }, + { + "dpo_loss": 0.6844728589057922, + "epoch": 2.9097779877184697, + "grad_norm": 73.91357746088326, + "learning_rate": 4.4711435532235624e-07, + "logits": -1.2798715829849243, + "logps": -89.1636962890625, + "loss": 0.2236, + "objective": 0.21921619772911072, + "ranking_idealized": 0.45625001192092896, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.44999998807907104, + "regularize": 0.15076890587806702, + "step": 1540 + }, + { + "dpo_loss": 0.6933543086051941, + "epoch": 2.9192253188474258, + "grad_norm": 72.93335086466251, + "learning_rate": 4.4660594247821384e-07, + "logits": -1.2931110858917236, + "logps": -88.17526245117188, + "loss": 0.2147, + "objective": 0.20991010963916779, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.14057467877864838, + "step": 1545 + }, + { + "dpo_loss": 0.6930429339408875, + "epoch": 2.9286726499763818, + "grad_norm": 81.90726634302081, + "learning_rate": 4.460953895066209e-07, + "logits": -1.2895206212997437, + "logps": -90.56971740722656, + "loss": 0.2133, + "objective": 0.2167246788740158, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.14742037653923035, + "step": 1550 + }, + { + "epoch": 2.9286726499763818, + "eval_dpo_loss": 0.7006378769874573, + "eval_logits": -1.2344614267349243, + "eval_logps": -95.19911193847656, + "eval_loss": 0.2529480755329132, + "eval_objective": 0.251154363155365, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.1810905784368515, + "eval_runtime": 157.1881, + "eval_samples_per_second": 36.835, + "eval_steps_per_second": 3.073, + "step": 1550 + }, + { + "dpo_loss": 0.7084794044494629, + "epoch": 2.938119981105338, + "grad_norm": 81.37590112985177, + "learning_rate": 4.4558270196513306e-07, + "logits": -1.3027656078338623, + "logps": -89.90327453613281, + "loss": 0.226, + "objective": 0.2371251881122589, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16627724468708038, + "step": 1555 + }, + { + "dpo_loss": 0.6968989968299866, + "epoch": 2.947567312234294, + "grad_norm": 74.4110488665179, + "learning_rate": 4.450678854345412e-07, + "logits": -1.3281409740447998, + "logps": -89.05107116699219, + "loss": 0.2284, + "objective": 0.2369905412197113, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.16730067133903503, + "step": 1560 + }, + { + "dpo_loss": 0.6882658004760742, + "epoch": 2.95701464336325, + "grad_norm": 74.9308740448357, + "learning_rate": 4.445509455188113e-07, + "logits": -1.2605291604995728, + "logps": -91.87565612792969, + "loss": 0.2284, + "objective": 0.22609011828899384, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.15726353228092194, + "step": 1565 + }, + { + "dpo_loss": 0.6850372552871704, + "epoch": 2.966461974492206, + "grad_norm": 75.73765777029178, + "learning_rate": 4.44031887845023e-07, + "logits": -1.2321079969406128, + "logps": -88.75463104248047, + "loss": 0.229, + "objective": 0.2340477705001831, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1655440479516983, + "step": 1570 + }, + { + "dpo_loss": 0.6979643702507019, + "epoch": 2.975909305621162, + "grad_norm": 75.89769576525151, + "learning_rate": 4.4351071806330856e-07, + "logits": -1.2928818464279175, + "logps": -87.43384552001953, + "loss": 0.2337, + "objective": 0.22396059334278107, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.1541641652584076, + "step": 1575 + }, + { + "dpo_loss": 0.6913705468177795, + "epoch": 2.985356636750118, + "grad_norm": 79.25695866196861, + "learning_rate": 4.429874418467913e-07, + "logits": -1.309133768081665, + "logps": -86.86026000976562, + "loss": 0.2402, + "objective": 0.23291504383087158, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.581250011920929, + "regularize": 0.16377797722816467, + "step": 1580 + }, + { + "dpo_loss": 0.703238844871521, + "epoch": 2.9948039678790743, + "grad_norm": 79.54613475110943, + "learning_rate": 4.4246206489152375e-07, + "logits": -1.2111042737960815, + "logps": -88.62483978271484, + "loss": 0.2274, + "objective": 0.23201140761375427, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.16168752312660217, + "step": 1585 + }, + { + "dpo_loss": 0.6940716505050659, + "epoch": 3.0042512990080303, + "grad_norm": 75.03175294996458, + "learning_rate": 4.41934592916426e-07, + "logits": -1.2947633266448975, + "logps": -87.53929138183594, + "loss": 0.2249, + "objective": 0.2187323272228241, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.1493251621723175, + "step": 1590 + }, + { + "dpo_loss": 0.693493664264679, + "epoch": 3.0136986301369864, + "grad_norm": 73.25790878410778, + "learning_rate": 4.414050316632229e-07, + "logits": -1.3149116039276123, + "logps": -89.59700012207031, + "loss": 0.2326, + "objective": 0.2274354249238968, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4749999940395355, + "regularize": 0.15808603167533875, + "step": 1595 + }, + { + "dpo_loss": 0.6876460313796997, + "epoch": 3.0231459612659424, + "grad_norm": 70.71771437549256, + "learning_rate": 4.408733868963821e-07, + "logits": -1.2373759746551514, + "logps": -89.33880615234375, + "loss": 0.2292, + "objective": 0.2364555299282074, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.1676909476518631, + "step": 1600 + }, + { + "epoch": 3.0231459612659424, + "eval_dpo_loss": 0.7038150429725647, + "eval_logits": -1.2436572313308716, + "eval_logps": -93.63340759277344, + "eval_loss": 0.28403371572494507, + "eval_objective": 0.2860759198665619, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.21569444239139557, + "eval_runtime": 157.8424, + "eval_samples_per_second": 36.682, + "eval_steps_per_second": 3.06, + "step": 1600 + }, + { + "dpo_loss": 0.6990983486175537, + "epoch": 3.0325932923948984, + "grad_norm": 71.79146138509809, + "learning_rate": 4.403396644030509e-07, + "logits": -1.225997805595398, + "logps": -86.30064392089844, + "loss": 0.2438, + "objective": 0.2267124354839325, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.15680257976055145, + "step": 1605 + }, + { + "dpo_loss": 0.6825001239776611, + "epoch": 3.0420406235238544, + "grad_norm": 84.9864448074978, + "learning_rate": 4.398038699929935e-07, + "logits": -1.2466906309127808, + "logps": -88.25871276855469, + "loss": 0.2262, + "objective": 0.21322885155677795, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5562499761581421, + "regularize": 0.1449788361787796, + "step": 1610 + }, + { + "dpo_loss": 0.7017780542373657, + "epoch": 3.0514879546528104, + "grad_norm": 71.05456640227479, + "learning_rate": 4.392660094985276e-07, + "logits": -1.2647464275360107, + "logps": -88.5762710571289, + "loss": 0.2331, + "objective": 0.23695608973503113, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.1667782962322235, + "step": 1615 + }, + { + "dpo_loss": 0.6860125660896301, + "epoch": 3.0609352857817664, + "grad_norm": 73.51188299769646, + "learning_rate": 4.38726088774461e-07, + "logits": -1.3439459800720215, + "logps": -88.92823791503906, + "loss": 0.2232, + "objective": 0.21951046586036682, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.1509092003107071, + "step": 1620 + }, + { + "dpo_loss": 0.6869057416915894, + "epoch": 3.070382616910723, + "grad_norm": 81.04643217611545, + "learning_rate": 4.3818411369802795e-07, + "logits": -1.281882882118225, + "logps": -88.72843933105469, + "loss": 0.222, + "objective": 0.22023515403270721, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.15154457092285156, + "step": 1625 + }, + { + "dpo_loss": 0.6761130094528198, + "epoch": 3.079829948039679, + "grad_norm": 77.28947505018488, + "learning_rate": 4.3764009016882484e-07, + "logits": -1.2218523025512695, + "logps": -88.16899108886719, + "loss": 0.2295, + "objective": 0.22817304730415344, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.1605617254972458, + "step": 1630 + }, + { + "dpo_loss": 0.6863864064216614, + "epoch": 3.089277279168635, + "grad_norm": 80.79575367937582, + "learning_rate": 4.370940241087466e-07, + "logits": -1.2691013813018799, + "logps": -85.53553771972656, + "loss": 0.2408, + "objective": 0.22911615669727325, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.518750011920929, + "regularize": 0.16047748923301697, + "step": 1635 + }, + { + "dpo_loss": 0.6985718011856079, + "epoch": 3.098724610297591, + "grad_norm": 76.70049039283718, + "learning_rate": 4.3654592146192137e-07, + "logits": -1.193575382232666, + "logps": -88.6602554321289, + "loss": 0.2323, + "objective": 0.22806735336780548, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1582101434469223, + "step": 1640 + }, + { + "dpo_loss": 0.6904804706573486, + "epoch": 3.108171941426547, + "grad_norm": 73.8243508955083, + "learning_rate": 4.3599578819464664e-07, + "logits": -1.2254277467727661, + "logps": -88.09333801269531, + "loss": 0.2229, + "objective": 0.2212776392698288, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.15222959220409393, + "step": 1645 + }, + { + "dpo_loss": 0.6988715529441833, + "epoch": 3.117619272555503, + "grad_norm": 70.58207807986498, + "learning_rate": 4.3544363029532394e-07, + "logits": -1.288346767425537, + "logps": -88.28787231445312, + "loss": 0.2227, + "objective": 0.22656592726707458, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.4749999940395355, + "regularize": 0.15667878091335297, + "step": 1650 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 0.7025489807128906, + "eval_logits": -1.233197569847107, + "eval_logps": -93.4762954711914, + "eval_loss": 0.2854098975658417, + "eval_objective": 0.28514111042022705, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.21488620340824127, + "eval_runtime": 159.1208, + "eval_samples_per_second": 36.387, + "eval_steps_per_second": 3.035, + "step": 1650 + }, + { + "dpo_loss": 0.6795647144317627, + "epoch": 3.127066603684459, + "grad_norm": 77.06489456346192, + "learning_rate": 4.3488945377439344e-07, + "logits": -1.2286312580108643, + "logps": -87.36393737792969, + "loss": 0.2162, + "objective": 0.21354565024375916, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.14558915793895721, + "step": 1655 + }, + { + "dpo_loss": 0.6925145387649536, + "epoch": 3.136513934813415, + "grad_norm": 67.06324257323814, + "learning_rate": 4.3433326466426884e-07, + "logits": -1.2775371074676514, + "logps": -88.46916198730469, + "loss": 0.2086, + "objective": 0.1956256926059723, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.1263742297887802, + "step": 1660 + }, + { + "dpo_loss": 0.6909645795822144, + "epoch": 3.1459612659423715, + "grad_norm": 77.02733659224373, + "learning_rate": 4.337750690192717e-07, + "logits": -1.1966722011566162, + "logps": -88.89064025878906, + "loss": 0.2323, + "objective": 0.23447394371032715, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.16537746787071228, + "step": 1665 + }, + { + "dpo_loss": 0.6765434741973877, + "epoch": 3.1554085970713275, + "grad_norm": 76.79340939576753, + "learning_rate": 4.3321487291556537e-07, + "logits": -1.2806079387664795, + "logps": -88.34075927734375, + "loss": 0.2341, + "objective": 0.22429628670215607, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.15664193034172058, + "step": 1670 + }, + { + "dpo_loss": 0.6903152465820312, + "epoch": 3.1648559282002835, + "grad_norm": 71.06959581471867, + "learning_rate": 4.326526824510888e-07, + "logits": -1.1770598888397217, + "logps": -87.21501922607422, + "loss": 0.2203, + "objective": 0.22811034321784973, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.5874999761581421, + "regularize": 0.15907880663871765, + "step": 1675 + }, + { + "dpo_loss": 0.6848000288009644, + "epoch": 3.1743032593292395, + "grad_norm": 71.53335354112947, + "learning_rate": 4.3208850374549066e-07, + "logits": -1.2417142391204834, + "logps": -85.82376861572266, + "loss": 0.2254, + "objective": 0.22943010926246643, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16095009446144104, + "step": 1680 + }, + { + "dpo_loss": 0.6956906914710999, + "epoch": 3.1837505904581955, + "grad_norm": 68.21329452472561, + "learning_rate": 4.3152234294006185e-07, + "logits": -1.2465617656707764, + "logps": -87.61761474609375, + "loss": 0.2211, + "objective": 0.22904673218727112, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5, + "regularize": 0.15947763621807098, + "step": 1685 + }, + { + "dpo_loss": 0.6885348558425903, + "epoch": 3.1931979215871515, + "grad_norm": 72.44090594485864, + "learning_rate": 4.309542061976695e-07, + "logits": -1.2819534540176392, + "logps": -89.31121063232422, + "loss": 0.2324, + "objective": 0.23005850613117218, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.16120502352714539, + "step": 1690 + }, + { + "dpo_loss": 0.6953254342079163, + "epoch": 3.2026452527161076, + "grad_norm": 80.57191405511507, + "learning_rate": 4.303840997026895e-07, + "logits": -1.3103854656219482, + "logps": -89.79373168945312, + "loss": 0.2321, + "objective": 0.24063341319561005, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.17110088467597961, + "step": 1695 + }, + { + "dpo_loss": 0.6881821751594543, + "epoch": 3.2120925838450636, + "grad_norm": 71.42605462474872, + "learning_rate": 4.298120296609391e-07, + "logits": -1.2566523551940918, + "logps": -88.62003326416016, + "loss": 0.2123, + "objective": 0.205051988363266, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48124998807907104, + "regularize": 0.13623377680778503, + "step": 1700 + }, + { + "epoch": 3.2120925838450636, + "eval_dpo_loss": 0.7008152604103088, + "eval_logits": -1.2311402559280396, + "eval_logps": -95.69064331054688, + "eval_loss": 0.275156170129776, + "eval_objective": 0.27562105655670166, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5232919454574585, + "eval_regularize": 0.2055395096540451, + "eval_runtime": 157.5719, + "eval_samples_per_second": 36.745, + "eval_steps_per_second": 3.065, + "step": 1700 + }, + { + "dpo_loss": 0.7004303932189941, + "epoch": 3.22153991497402, + "grad_norm": 78.61435190569276, + "learning_rate": 4.292380022996094e-07, + "logits": -1.2356593608856201, + "logps": -88.43986511230469, + "loss": 0.2149, + "objective": 0.2104000598192215, + "ranking_idealized": 0.44999998807907104, + "ranking_idealized_expo": 0.4124999940395355, + "ranking_simple": 0.42500001192092896, + "regularize": 0.14035701751708984, + "step": 1705 + }, + { + "dpo_loss": 0.6839348673820496, + "epoch": 3.230987246102976, + "grad_norm": 72.2767944523041, + "learning_rate": 4.2866202386719806e-07, + "logits": -1.186471939086914, + "logps": -89.38927459716797, + "loss": 0.218, + "objective": 0.20395776629447937, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.13556428253650665, + "step": 1710 + }, + { + "dpo_loss": 0.6864286661148071, + "epoch": 3.240434577231932, + "grad_norm": 77.24847322414985, + "learning_rate": 4.2808410063344025e-07, + "logits": -1.2315725088119507, + "logps": -90.61546325683594, + "loss": 0.2224, + "objective": 0.22162394225597382, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.15298107266426086, + "step": 1715 + }, + { + "dpo_loss": 0.6974970102310181, + "epoch": 3.249881908360888, + "grad_norm": 76.03703557994152, + "learning_rate": 4.2750423888924156e-07, + "logits": -1.2265657186508179, + "logps": -90.89115905761719, + "loss": 0.2318, + "objective": 0.2390907108783722, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.16934101283550262, + "step": 1720 + }, + { + "dpo_loss": 0.6898042559623718, + "epoch": 3.259329239489844, + "grad_norm": 69.31924665387766, + "learning_rate": 4.2692244494660854e-07, + "logits": -1.256665587425232, + "logps": -87.42665100097656, + "loss": 0.2255, + "objective": 0.21517562866210938, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.550000011920929, + "regularize": 0.1461952179670334, + "step": 1725 + }, + { + "dpo_loss": 0.6960927248001099, + "epoch": 3.2687765706188, + "grad_norm": 74.63257715863116, + "learning_rate": 4.263387251385808e-07, + "logits": -1.2075088024139404, + "logps": -88.08040618896484, + "loss": 0.2275, + "objective": 0.2128901183605194, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.14328083395957947, + "step": 1730 + }, + { + "dpo_loss": 0.6903256177902222, + "epoch": 3.278223901747756, + "grad_norm": 75.40242294372213, + "learning_rate": 4.2575308581916147e-07, + "logits": -1.2131913900375366, + "logps": -92.84107971191406, + "loss": 0.2352, + "objective": 0.2373063564300537, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.16827379167079926, + "step": 1735 + }, + { + "dpo_loss": 0.6846415400505066, + "epoch": 3.287671232876712, + "grad_norm": 76.10083653558631, + "learning_rate": 4.251655333632482e-07, + "logits": -1.273949384689331, + "logps": -90.49128723144531, + "loss": 0.2297, + "objective": 0.21975067257881165, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48750001192092896, + "regularize": 0.1512865275144577, + "step": 1740 + }, + { + "dpo_loss": 0.6826716065406799, + "epoch": 3.297118564005668, + "grad_norm": 74.85403338341007, + "learning_rate": 4.2457607416656417e-07, + "logits": -1.2469581365585327, + "logps": -90.03531646728516, + "loss": 0.2201, + "objective": 0.2303483486175537, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.16208119690418243, + "step": 1745 + }, + { + "dpo_loss": 0.688482403755188, + "epoch": 3.3065658951346246, + "grad_norm": 69.73525123761033, + "learning_rate": 4.239847146455877e-07, + "logits": -1.254630208015442, + "logps": -89.3567886352539, + "loss": 0.218, + "objective": 0.23068399727344513, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.16183575987815857, + "step": 1750 + }, + { + "epoch": 3.3065658951346246, + "eval_dpo_loss": 0.7037237882614136, + "eval_logits": -1.2166752815246582, + "eval_logps": -95.90421295166016, + "eval_loss": 0.279991090297699, + "eval_objective": 0.27828940749168396, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.20791704952716827, + "eval_runtime": 158.1426, + "eval_samples_per_second": 36.613, + "eval_steps_per_second": 3.054, + "step": 1750 + }, + { + "dpo_loss": 0.7040959596633911, + "epoch": 3.3160132262635806, + "grad_norm": 79.0729106207761, + "learning_rate": 4.233914612374832e-07, + "logits": -1.2124968767166138, + "logps": -91.5573959350586, + "loss": 0.2265, + "objective": 0.23880453407764435, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.16839495301246643, + "step": 1755 + }, + { + "dpo_loss": 0.6873825788497925, + "epoch": 3.3254605573925367, + "grad_norm": 79.43729294619003, + "learning_rate": 4.227963204000304e-07, + "logits": -1.2346994876861572, + "logps": -90.03487396240234, + "loss": 0.2216, + "objective": 0.2150363028049469, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.14629802107810974, + "step": 1760 + }, + { + "dpo_loss": 0.7011836767196655, + "epoch": 3.3349078885214927, + "grad_norm": 73.36113823463724, + "learning_rate": 4.221992986115548e-07, + "logits": -1.309787392616272, + "logps": -89.97632598876953, + "loss": 0.2261, + "objective": 0.23244591057300568, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.41874998807907104, + "ranking_simple": 0.42500001192092896, + "regularize": 0.16232752799987793, + "step": 1765 + }, + { + "dpo_loss": 0.6768354177474976, + "epoch": 3.3443552196504487, + "grad_norm": 79.18185589393623, + "learning_rate": 4.2160040237085627e-07, + "logits": -1.3097981214523315, + "logps": -93.37566375732422, + "loss": 0.2236, + "objective": 0.23846200108528137, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.17077843844890594, + "step": 1770 + }, + { + "dpo_loss": 0.6806414127349854, + "epoch": 3.3538025507794047, + "grad_norm": 76.07180693015962, + "learning_rate": 4.2099963819713896e-07, + "logits": -1.2354665994644165, + "logps": -92.22715759277344, + "loss": 0.2161, + "objective": 0.22700223326683044, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.15893810987472534, + "step": 1775 + }, + { + "dpo_loss": 0.6920955777168274, + "epoch": 3.3632498819083607, + "grad_norm": 78.86169700611478, + "learning_rate": 4.203970126299404e-07, + "logits": -1.1942596435546875, + "logps": -89.15409088134766, + "loss": 0.236, + "objective": 0.22877366840839386, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.15956410765647888, + "step": 1780 + }, + { + "dpo_loss": 0.7036693692207336, + "epoch": 3.372697213037317, + "grad_norm": 75.60890885363388, + "learning_rate": 4.1979253222905984e-07, + "logits": -1.3075898885726929, + "logps": -91.54581451416016, + "loss": 0.2256, + "objective": 0.2090461701154709, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.13867922127246857, + "step": 1785 + }, + { + "dpo_loss": 0.684840977191925, + "epoch": 3.382144544166273, + "grad_norm": 74.14098312646254, + "learning_rate": 4.19186203574487e-07, + "logits": -1.1911137104034424, + "logps": -91.33808135986328, + "loss": 0.2194, + "objective": 0.2209484875202179, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.1524643748998642, + "step": 1790 + }, + { + "dpo_loss": 0.7015503644943237, + "epoch": 3.391591875295229, + "grad_norm": 75.93493261152722, + "learning_rate": 4.185780332663306e-07, + "logits": -1.3089752197265625, + "logps": -90.48160552978516, + "loss": 0.2183, + "objective": 0.21567769348621368, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.14552268385887146, + "step": 1795 + }, + { + "dpo_loss": 0.7126961350440979, + "epoch": 3.4010392064241852, + "grad_norm": 75.44154451389085, + "learning_rate": 4.1796802792474655e-07, + "logits": -1.282238245010376, + "logps": -89.86241912841797, + "loss": 0.2086, + "objective": 0.21801860630512238, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.53125, + "regularize": 0.14674898982048035, + "step": 1800 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 0.7027232646942139, + "eval_logits": -1.2183377742767334, + "eval_logps": -95.69831085205078, + "eval_loss": 0.29446694254875183, + "eval_objective": 0.2932322025299072, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5232919454574585, + "eval_regularize": 0.22295984625816345, + "eval_runtime": 157.9813, + "eval_samples_per_second": 36.65, + "eval_steps_per_second": 3.057, + "step": 1800 + }, + { + "dpo_loss": 0.6810536980628967, + "epoch": 3.4104865375531412, + "grad_norm": 71.63965639419007, + "learning_rate": 4.1735619418986554e-07, + "logits": -1.2107884883880615, + "logps": -90.13643646240234, + "loss": 0.2135, + "objective": 0.20621566474437714, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5687500238418579, + "regularize": 0.13811030983924866, + "step": 1805 + }, + { + "dpo_loss": 0.6831521987915039, + "epoch": 3.4199338686820973, + "grad_norm": 72.26883405440417, + "learning_rate": 4.1674253872172126e-07, + "logits": -1.2936310768127441, + "logps": -90.55397033691406, + "loss": 0.2152, + "objective": 0.21073338389396667, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4749999940395355, + "regularize": 0.14241817593574524, + "step": 1810 + }, + { + "dpo_loss": 0.6987210512161255, + "epoch": 3.4293811998110533, + "grad_norm": 76.83410855661971, + "learning_rate": 4.1612706820017735e-07, + "logits": -1.214812994003296, + "logps": -90.34785461425781, + "loss": 0.2151, + "objective": 0.21492168307304382, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.4937500059604645, + "regularize": 0.145049586892128, + "step": 1815 + }, + { + "dpo_loss": 0.6953445672988892, + "epoch": 3.4388285309400093, + "grad_norm": 73.25854640302302, + "learning_rate": 4.1550978932485516e-07, + "logits": -1.2482576370239258, + "logps": -90.07608795166016, + "loss": 0.2118, + "objective": 0.2104315310716629, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.14089706540107727, + "step": 1820 + }, + { + "dpo_loss": 0.6868658065795898, + "epoch": 3.4482758620689653, + "grad_norm": 70.67689663662213, + "learning_rate": 4.1489070881506053e-07, + "logits": -1.217498779296875, + "logps": -90.81259155273438, + "loss": 0.2224, + "objective": 0.21237485110759735, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 0.14368824660778046, + "step": 1825 + }, + { + "dpo_loss": 0.7178629636764526, + "epoch": 3.4577231931979218, + "grad_norm": 74.0418276231077, + "learning_rate": 4.142698334097109e-07, + "logits": -1.2087634801864624, + "logps": -89.52534484863281, + "loss": 0.2271, + "objective": 0.24181203544139862, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.17002572119235992, + "step": 1830 + }, + { + "dpo_loss": 0.6907800436019897, + "epoch": 3.467170524326878, + "grad_norm": 74.29390968359742, + "learning_rate": 4.1364716986726147e-07, + "logits": -1.2132540941238403, + "logps": -87.84049987792969, + "loss": 0.2119, + "objective": 0.22763434052467346, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.4749999940395355, + "regularize": 0.15855631232261658, + "step": 1835 + }, + { + "dpo_loss": 0.6834974884986877, + "epoch": 3.476617855455834, + "grad_norm": 70.530010249313, + "learning_rate": 4.130227249656324e-07, + "logits": -1.290542483329773, + "logps": -89.50587463378906, + "loss": 0.2227, + "objective": 0.22068659961223602, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.15233686566352844, + "step": 1840 + }, + { + "dpo_loss": 0.6878833770751953, + "epoch": 3.48606518658479, + "grad_norm": 66.897938998051, + "learning_rate": 4.1239650550213435e-07, + "logits": -1.2337359189987183, + "logps": -87.03044128417969, + "loss": 0.2064, + "objective": 0.22119465470314026, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.15240630507469177, + "step": 1845 + }, + { + "dpo_loss": 0.7014085650444031, + "epoch": 3.495512517713746, + "grad_norm": 70.40529049471338, + "learning_rate": 4.1176851829339465e-07, + "logits": -1.3215398788452148, + "logps": -87.68231201171875, + "loss": 0.216, + "objective": 0.21836349368095398, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.1482226401567459, + "step": 1850 + }, + { + "epoch": 3.495512517713746, + "eval_dpo_loss": 0.702814519405365, + "eval_logits": -1.22354257106781, + "eval_logps": -93.07842254638672, + "eval_loss": 0.28953778743743896, + "eval_objective": 0.28734228014945984, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.21706083416938782, + "eval_runtime": 157.9047, + "eval_samples_per_second": 36.668, + "eval_steps_per_second": 3.059, + "step": 1850 + }, + { + "dpo_loss": 0.6787363290786743, + "epoch": 3.504959848842702, + "grad_norm": 73.96509104787444, + "learning_rate": 4.111387701752834e-07, + "logits": -1.1543210744857788, + "logps": -88.16014099121094, + "loss": 0.2149, + "objective": 0.23128505051136017, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.574999988079071, + "regularize": 0.16341140866279602, + "step": 1855 + }, + { + "dpo_loss": 0.6856968998908997, + "epoch": 3.514407179971658, + "grad_norm": 68.14542483518404, + "learning_rate": 4.1050726800283886e-07, + "logits": -1.2676688432693481, + "logps": -85.95804595947266, + "loss": 0.2151, + "objective": 0.20490717887878418, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.4625000059604645, + "regularize": 0.13633747398853302, + "step": 1860 + }, + { + "dpo_loss": 0.6999458074569702, + "epoch": 3.5238545111006143, + "grad_norm": 74.92773908196327, + "learning_rate": 4.0987401865019246e-07, + "logits": -1.2837023735046387, + "logps": -87.63172149658203, + "loss": 0.2158, + "objective": 0.21518392860889435, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.1451893448829651, + "step": 1865 + }, + { + "dpo_loss": 0.6849756836891174, + "epoch": 3.5333018422295703, + "grad_norm": 71.18170455631198, + "learning_rate": 4.092390290104946e-07, + "logits": -1.2384998798370361, + "logps": -89.23136901855469, + "loss": 0.2155, + "objective": 0.22379860281944275, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1553010642528534, + "step": 1870 + }, + { + "dpo_loss": 0.6910787224769592, + "epoch": 3.5427491733585263, + "grad_norm": 70.20472742023536, + "learning_rate": 4.086023059958393e-07, + "logits": -1.22196364402771, + "logps": -87.11416625976562, + "loss": 0.2314, + "objective": 0.23368553817272186, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.1645776778459549, + "step": 1875 + }, + { + "dpo_loss": 0.6986586451530457, + "epoch": 3.5521965044874824, + "grad_norm": 68.61044209679825, + "learning_rate": 4.0796385653718916e-07, + "logits": -1.2331111431121826, + "logps": -87.99723815917969, + "loss": 0.2188, + "objective": 0.23793819546699524, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.46875, + "regularize": 0.16807231307029724, + "step": 1880 + }, + { + "dpo_loss": 0.6858953237533569, + "epoch": 3.5616438356164384, + "grad_norm": 75.74083195147854, + "learning_rate": 4.073236875842995e-07, + "logits": -1.2994650602340698, + "logps": -87.44940185546875, + "loss": 0.2144, + "objective": 0.22179841995239258, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.1532088816165924, + "step": 1885 + }, + { + "dpo_loss": 0.6975304484367371, + "epoch": 3.5710911667453944, + "grad_norm": 74.25528642912279, + "learning_rate": 4.06681806105643e-07, + "logits": -1.2362072467803955, + "logps": -88.1231689453125, + "loss": 0.2207, + "objective": 0.21597766876220703, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.14622461795806885, + "step": 1890 + }, + { + "dpo_loss": 0.6833354234695435, + "epoch": 3.5805384978743504, + "grad_norm": 71.48737502356961, + "learning_rate": 4.060382190883338e-07, + "logits": -1.2433570623397827, + "logps": -87.79743194580078, + "loss": 0.2115, + "objective": 0.23286516964435577, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.1645316183567047, + "step": 1895 + }, + { + "dpo_loss": 0.7012842297554016, + "epoch": 3.5899858290033064, + "grad_norm": 70.89129174284837, + "learning_rate": 4.053929335380516e-07, + "logits": -1.219341516494751, + "logps": -88.85891723632812, + "loss": 0.2182, + "objective": 0.19823125004768372, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.12810282409191132, + "step": 1900 + }, + { + "epoch": 3.5899858290033064, + "eval_dpo_loss": 0.7018768191337585, + "eval_logits": -1.2137829065322876, + "eval_logps": -95.2384033203125, + "eval_loss": 0.29732540249824524, + "eval_objective": 0.2976545989513397, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.520703911781311, + "eval_regularize": 0.2274669110774994, + "eval_runtime": 157.4719, + "eval_samples_per_second": 36.768, + "eval_steps_per_second": 3.067, + "step": 1900 + }, + { + "dpo_loss": 0.7078115344047546, + "epoch": 3.5994331601322624, + "grad_norm": 66.4607315520783, + "learning_rate": 4.047459564789653e-07, + "logits": -1.29160737991333, + "logps": -89.04077911376953, + "loss": 0.2096, + "objective": 0.20699355006217957, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48750001192092896, + "regularize": 0.13621237874031067, + "step": 1905 + }, + { + "dpo_loss": 0.6720970869064331, + "epoch": 3.6088804912612185, + "grad_norm": 71.70509436040507, + "learning_rate": 4.040972949536561e-07, + "logits": -1.1920585632324219, + "logps": -89.29122161865234, + "loss": 0.2159, + "objective": 0.20662541687488556, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.13941572606563568, + "step": 1910 + }, + { + "dpo_loss": 0.6973280310630798, + "epoch": 3.618327822390175, + "grad_norm": 71.88875462075627, + "learning_rate": 4.0344695602304157e-07, + "logits": -1.2554800510406494, + "logps": -90.129150390625, + "loss": 0.211, + "objective": 0.2120811492204666, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.14234834909439087, + "step": 1915 + }, + { + "dpo_loss": 0.6800395846366882, + "epoch": 3.627775153519131, + "grad_norm": 74.42477707252212, + "learning_rate": 4.0279494676629844e-07, + "logits": -1.2180017232894897, + "logps": -88.39971160888672, + "loss": 0.2152, + "objective": 0.21058933436870575, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.44999998807907104, + "regularize": 0.1425853818655014, + "step": 1920 + }, + { + "dpo_loss": 0.6923831701278687, + "epoch": 3.637222484648087, + "grad_norm": 73.34941427015573, + "learning_rate": 4.021412742807854e-07, + "logits": -1.1889969110488892, + "logps": -86.75862121582031, + "loss": 0.2262, + "objective": 0.21546685695648193, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.14622853696346283, + "step": 1925 + }, + { + "dpo_loss": 0.6955797076225281, + "epoch": 3.646669815777043, + "grad_norm": 73.03460385154673, + "learning_rate": 4.01485945681966e-07, + "logits": -1.1690335273742676, + "logps": -90.11813354492188, + "loss": 0.2297, + "objective": 0.2290828675031662, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.15952490270137787, + "step": 1930 + }, + { + "dpo_loss": 0.6710019111633301, + "epoch": 3.656117146905999, + "grad_norm": 74.1516440000823, + "learning_rate": 4.0082896810333144e-07, + "logits": -1.1948826313018799, + "logps": -86.81245422363281, + "loss": 0.2282, + "objective": 0.234673410654068, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5, + "regularize": 0.1675732135772705, + "step": 1935 + }, + { + "dpo_loss": 0.6779486536979675, + "epoch": 3.665564478034955, + "grad_norm": 73.38625711928812, + "learning_rate": 4.001703486963223e-07, + "logits": -1.2244865894317627, + "logps": -88.3202896118164, + "loss": 0.2144, + "objective": 0.21537482738494873, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.48750001192092896, + "regularize": 0.14757993817329407, + "step": 1940 + }, + { + "dpo_loss": 0.6879987716674805, + "epoch": 3.6750118091639115, + "grad_norm": 71.15714667512788, + "learning_rate": 3.9951009463025125e-07, + "logits": -1.2248257398605347, + "logps": -85.404541015625, + "loss": 0.212, + "objective": 0.20684568583965302, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1380458027124405, + "step": 1945 + }, + { + "dpo_loss": 0.6955716013908386, + "epoch": 3.6844591402928675, + "grad_norm": 69.03202990753444, + "learning_rate": 3.988482130922249e-07, + "logits": -1.2241899967193604, + "logps": -87.9198226928711, + "loss": 0.2097, + "objective": 0.21265700459480286, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.543749988079071, + "regularize": 0.143099844455719, + "step": 1950 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 0.7046319842338562, + "eval_logits": -1.2111308574676514, + "eval_logps": -93.49398040771484, + "eval_loss": 0.3022589683532715, + "eval_objective": 0.29996559023857117, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.2295023649930954, + "eval_runtime": 160.3461, + "eval_samples_per_second": 36.109, + "eval_steps_per_second": 3.012, + "step": 1950 + }, + { + "dpo_loss": 0.6958789229393005, + "epoch": 3.6939064714218235, + "grad_norm": 68.69646746773823, + "learning_rate": 3.981847112870654e-07, + "logits": -1.222076177597046, + "logps": -87.19930267333984, + "loss": 0.214, + "objective": 0.22020037472248077, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4625000059604645, + "regularize": 0.1506124883890152, + "step": 1955 + }, + { + "dpo_loss": 0.6883679032325745, + "epoch": 3.7033538025507795, + "grad_norm": 74.56605724250167, + "learning_rate": 3.9751959643723215e-07, + "logits": -1.209473729133606, + "logps": -89.5036849975586, + "loss": 0.2277, + "objective": 0.2188255488872528, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4749999940395355, + "regularize": 0.14998875558376312, + "step": 1960 + }, + { + "dpo_loss": 0.6816436052322388, + "epoch": 3.7128011336797355, + "grad_norm": 73.39352968814994, + "learning_rate": 3.9685287578274284e-07, + "logits": -1.2119733095169067, + "logps": -87.68373107910156, + "loss": 0.2122, + "objective": 0.22281642258167267, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.15465207397937775, + "step": 1965 + }, + { + "dpo_loss": 0.6812302470207214, + "epoch": 3.7222484648086915, + "grad_norm": 73.99180921100901, + "learning_rate": 3.961845565810954e-07, + "logits": -1.2347557544708252, + "logps": -87.96427917480469, + "loss": 0.2161, + "objective": 0.20007386803627014, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.46875, + "regularize": 0.13195081055164337, + "step": 1970 + }, + { + "dpo_loss": 0.6964179277420044, + "epoch": 3.7316957959376476, + "grad_norm": 72.84292160439442, + "learning_rate": 3.9551464610718815e-07, + "logits": -1.2395890951156616, + "logps": -87.82969665527344, + "loss": 0.2033, + "objective": 0.19435453414916992, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.12471272796392441, + "step": 1975 + }, + { + "dpo_loss": 0.6921079158782959, + "epoch": 3.7411431270666036, + "grad_norm": 66.93544436043771, + "learning_rate": 3.9484315165324123e-07, + "logits": -1.3096370697021484, + "logps": -89.51728820800781, + "loss": 0.2112, + "objective": 0.22608831524848938, + "ranking_idealized": 0.6312500238418579, + "ranking_idealized_expo": 0.6000000238418579, + "ranking_simple": 0.59375, + "regularize": 0.1568775177001953, + "step": 1980 + }, + { + "dpo_loss": 0.6992356777191162, + "epoch": 3.7505904581955596, + "grad_norm": 70.59081285310172, + "learning_rate": 3.941700805287168e-07, + "logits": -1.2646957635879517, + "logps": -88.4402084350586, + "loss": 0.2132, + "objective": 0.21550500392913818, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.48750001192092896, + "regularize": 0.1455814391374588, + "step": 1985 + }, + { + "dpo_loss": 0.695839524269104, + "epoch": 3.7600377893245156, + "grad_norm": 75.7270912521511, + "learning_rate": 3.9349544006023976e-07, + "logits": -1.186018466949463, + "logps": -88.2295150756836, + "loss": 0.2182, + "objective": 0.219081312417984, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4749999940395355, + "regularize": 0.1494973599910736, + "step": 1990 + }, + { + "dpo_loss": 0.6939408779144287, + "epoch": 3.769485120453472, + "grad_norm": 69.76456500792314, + "learning_rate": 3.928192375915179e-07, + "logits": -1.260425329208374, + "logps": -88.10365295410156, + "loss": 0.2134, + "objective": 0.22218754887580872, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.15279348194599152, + "step": 1995 + }, + { + "dpo_loss": 0.6960574388504028, + "epoch": 3.778932451582428, + "grad_norm": 67.23828756636841, + "learning_rate": 3.9214148048326203e-07, + "logits": -1.2658439874649048, + "logps": -89.23229217529297, + "loss": 0.2076, + "objective": 0.2040717601776123, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.1344660073518753, + "step": 2000 + }, + { + "epoch": 3.778932451582428, + "eval_dpo_loss": 0.7034488320350647, + "eval_logits": -1.2337108850479126, + "eval_logps": -93.09392547607422, + "eval_loss": 0.3083517551422119, + "eval_objective": 0.3067256808280945, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.23638078570365906, + "eval_runtime": 157.9882, + "eval_samples_per_second": 36.648, + "eval_steps_per_second": 3.057, + "step": 2000 + }, + { + "dpo_loss": 0.6904383301734924, + "epoch": 3.788379782711384, + "grad_norm": 70.81937143215949, + "learning_rate": 3.914621761131054e-07, + "logits": -1.3027160167694092, + "logps": -87.96573638916016, + "loss": 0.2112, + "objective": 0.2065231055021286, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5625, + "regularize": 0.1374792605638504, + "step": 2005 + }, + { + "dpo_loss": 0.6968480944633484, + "epoch": 3.79782711384034, + "grad_norm": 72.90625556362093, + "learning_rate": 3.907813318755243e-07, + "logits": -1.2627811431884766, + "logps": -86.23448181152344, + "loss": 0.2151, + "objective": 0.22150051593780518, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.15181571245193481, + "step": 2010 + }, + { + "dpo_loss": 0.6842600703239441, + "epoch": 3.807274444969296, + "grad_norm": 73.31719528122754, + "learning_rate": 3.9009895518175665e-07, + "logits": -1.215425968170166, + "logps": -87.07940673828125, + "loss": 0.2091, + "objective": 0.2033880203962326, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1349620223045349, + "step": 2015 + }, + { + "dpo_loss": 0.6808702349662781, + "epoch": 3.816721776098252, + "grad_norm": 74.57110438173568, + "learning_rate": 3.894150534597219e-07, + "logits": -1.236825704574585, + "logps": -87.09656524658203, + "loss": 0.2082, + "objective": 0.22032049298286438, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.543749988079071, + "regularize": 0.15223348140716553, + "step": 2020 + }, + { + "dpo_loss": 0.7069395780563354, + "epoch": 3.826169107227208, + "grad_norm": 68.4209155562685, + "learning_rate": 3.887296341539399e-07, + "logits": -1.2843348979949951, + "logps": -86.17149353027344, + "loss": 0.2156, + "objective": 0.22795970737934113, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.48750001192092896, + "regularize": 0.15726572275161743, + "step": 2025 + }, + { + "dpo_loss": 0.6793303489685059, + "epoch": 3.8356164383561646, + "grad_norm": 75.1413449223689, + "learning_rate": 3.880427047254501e-07, + "logits": -1.2218055725097656, + "logps": -86.53273010253906, + "loss": 0.2093, + "objective": 0.2060566246509552, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48750001192092896, + "regularize": 0.13812358677387238, + "step": 2030 + }, + { + "dpo_loss": 0.6906328201293945, + "epoch": 3.8450637694851206, + "grad_norm": 68.98202095352073, + "learning_rate": 3.8735427265172994e-07, + "logits": -1.2316806316375732, + "logps": -87.60394287109375, + "loss": 0.2064, + "objective": 0.20692899823188782, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.13786569237709045, + "step": 2035 + }, + { + "dpo_loss": 0.6925719976425171, + "epoch": 3.8545111006140766, + "grad_norm": 70.40093898247615, + "learning_rate": 3.8666434542661384e-07, + "logits": -1.2409096956253052, + "logps": -85.93083190917969, + "loss": 0.2128, + "objective": 0.21727947890758514, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.14802229404449463, + "step": 2040 + }, + { + "dpo_loss": 0.6921749114990234, + "epoch": 3.8639584317430327, + "grad_norm": 70.45551281696774, + "learning_rate": 3.859729305602116e-07, + "logits": -1.2216551303863525, + "logps": -86.43045043945312, + "loss": 0.2027, + "objective": 0.2071777880191803, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.137960284948349, + "step": 2045 + }, + { + "dpo_loss": 0.6951195597648621, + "epoch": 3.8734057628719887, + "grad_norm": 84.99479776157791, + "learning_rate": 3.852800355788263e-07, + "logits": -1.257700800895691, + "logps": -88.80815124511719, + "loss": 0.2099, + "objective": 0.21275556087493896, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.14324359595775604, + "step": 2050 + }, + { + "epoch": 3.8734057628719887, + "eval_dpo_loss": 0.7044119238853455, + "eval_logits": -1.2279720306396484, + "eval_logps": -93.17267608642578, + "eval_loss": 0.29618221521377563, + "eval_objective": 0.29535168409347534, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.22491048276424408, + "eval_runtime": 158.2417, + "eval_samples_per_second": 36.59, + "eval_steps_per_second": 3.052, + "step": 2050 + }, + { + "dpo_loss": 0.6845670938491821, + "epoch": 3.8828530940009447, + "grad_norm": 72.36813730314171, + "learning_rate": 3.845856680248729e-07, + "logits": -1.26739501953125, + "logps": -88.06434631347656, + "loss": 0.2088, + "objective": 0.2010469138622284, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.132590189576149, + "step": 2055 + }, + { + "dpo_loss": 0.6887670755386353, + "epoch": 3.8923004251299007, + "grad_norm": 66.16332670938284, + "learning_rate": 3.8388983545679546e-07, + "logits": -1.178450345993042, + "logps": -86.03468322753906, + "loss": 0.2002, + "objective": 0.20420917868614197, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.13533246517181396, + "step": 2060 + }, + { + "dpo_loss": 0.6974113583564758, + "epoch": 3.9017477562588567, + "grad_norm": 67.85463149312577, + "learning_rate": 3.831925454489857e-07, + "logits": -1.2947609424591064, + "logps": -88.52572631835938, + "loss": 0.2062, + "objective": 0.2186674177646637, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1489262729883194, + "step": 2065 + }, + { + "dpo_loss": 0.6959481835365295, + "epoch": 3.9111950873878127, + "grad_norm": 70.49600282460919, + "learning_rate": 3.824938055916998e-07, + "logits": -1.2146751880645752, + "logps": -85.87166595458984, + "loss": 0.2144, + "objective": 0.21881794929504395, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.59375, + "regularize": 0.14922314882278442, + "step": 2070 + }, + { + "dpo_loss": 0.7003825306892395, + "epoch": 3.9206424185167688, + "grad_norm": 68.41116746766701, + "learning_rate": 3.8179362349097624e-07, + "logits": -1.2074339389801025, + "logps": -88.32437133789062, + "loss": 0.2036, + "objective": 0.20541544258594513, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.4937500059604645, + "regularize": 0.1353771686553955, + "step": 2075 + }, + { + "dpo_loss": 0.6963819265365601, + "epoch": 3.930089749645725, + "grad_norm": 64.33739691711719, + "learning_rate": 3.810920067685525e-07, + "logits": -1.2292484045028687, + "logps": -87.91120147705078, + "loss": 0.1955, + "objective": 0.20319481194019318, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.4437499940395355, + "regularize": 0.13355661928653717, + "step": 2080 + }, + { + "dpo_loss": 0.6952013969421387, + "epoch": 3.9395370807746812, + "grad_norm": 68.36853706112721, + "learning_rate": 3.8038896306178304e-07, + "logits": -1.244873046875, + "logps": -88.29363250732422, + "loss": 0.1996, + "objective": 0.19693097472190857, + "ranking_idealized": 0.44999998807907104, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.12741082906723022, + "step": 2085 + }, + { + "dpo_loss": 0.6985955238342285, + "epoch": 3.9489844119036372, + "grad_norm": 72.63484898585152, + "learning_rate": 3.79684500023555e-07, + "logits": -1.3063652515411377, + "logps": -88.39873504638672, + "loss": 0.1975, + "objective": 0.18639475107192993, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.11653516441583633, + "step": 2090 + }, + { + "dpo_loss": 0.6845866441726685, + "epoch": 3.9584317430325933, + "grad_norm": 75.90816419725637, + "learning_rate": 3.7897862532220594e-07, + "logits": -1.1452820301055908, + "logps": -88.32160949707031, + "loss": 0.2137, + "objective": 0.22087030112743378, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.15241165459156036, + "step": 2095 + }, + { + "dpo_loss": 0.7027496099472046, + "epoch": 3.9678790741615493, + "grad_norm": 65.23926199909047, + "learning_rate": 3.7827134664143944e-07, + "logits": -1.2793314456939697, + "logps": -87.33671569824219, + "loss": 0.2001, + "objective": 0.20044174790382385, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.13016676902770996, + "step": 2100 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 0.7062909007072449, + "eval_logits": -1.2079051733016968, + "eval_logps": -93.9210433959961, + "eval_loss": 0.31389835476875305, + "eval_objective": 0.31231164932250977, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5186335444450378, + "eval_regularize": 0.24168255925178528, + "eval_runtime": 157.946, + "eval_samples_per_second": 36.658, + "eval_steps_per_second": 3.058, + "step": 2100 + }, + { + "dpo_loss": 0.694995641708374, + "epoch": 3.9773264052905053, + "grad_norm": 70.6276970104338, + "learning_rate": 3.7756267168024216e-07, + "logits": -1.1883465051651, + "logps": -87.5169448852539, + "loss": 0.2032, + "objective": 0.20670051872730255, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.1372009664773941, + "step": 2105 + }, + { + "dpo_loss": 0.682449996471405, + "epoch": 3.9867737364194618, + "grad_norm": 73.84783896819107, + "learning_rate": 3.7685260815279985e-07, + "logits": -1.191896677017212, + "logps": -87.0994644165039, + "loss": 0.2025, + "objective": 0.20286063849925995, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.13461564481258392, + "step": 2110 + }, + { + "dpo_loss": 0.6851739287376404, + "epoch": 3.9962210675484178, + "grad_norm": 72.15356901954534, + "learning_rate": 3.7614116378841304e-07, + "logits": -1.1415256261825562, + "logps": -87.24793243408203, + "loss": 0.1994, + "objective": 0.18393009901046753, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.11541268974542618, + "step": 2115 + }, + { + "dpo_loss": 0.6915415525436401, + "epoch": 4.005668398677374, + "grad_norm": 69.87138624592257, + "learning_rate": 3.754283463314134e-07, + "logits": -1.2651426792144775, + "logps": -87.78424072265625, + "loss": 0.2082, + "objective": 0.2031985968351364, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.13404445350170135, + "step": 2120 + }, + { + "dpo_loss": 0.6839936375617981, + "epoch": 4.01511572980633, + "grad_norm": 65.95346613098634, + "learning_rate": 3.747141635410792e-07, + "logits": -1.2902952432632446, + "logps": -89.2015380859375, + "loss": 0.2008, + "objective": 0.1982060968875885, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.1298067271709442, + "step": 2125 + }, + { + "dpo_loss": 0.6753361821174622, + "epoch": 4.024563060935286, + "grad_norm": 68.66890119551623, + "learning_rate": 3.739986231915508e-07, + "logits": -1.2251384258270264, + "logps": -90.5170669555664, + "loss": 0.1999, + "objective": 0.2000185251235962, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.4625000059604645, + "regularize": 0.13248488306999207, + "step": 2130 + }, + { + "dpo_loss": 0.6820967197418213, + "epoch": 4.034010392064242, + "grad_norm": 67.84715622068745, + "learning_rate": 3.7328173307174597e-07, + "logits": -1.2330976724624634, + "logps": -89.2086410522461, + "loss": 0.2065, + "objective": 0.20815534889698029, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.13994565606117249, + "step": 2135 + }, + { + "dpo_loss": 0.6867682337760925, + "epoch": 4.043457723193198, + "grad_norm": 71.14832059721319, + "learning_rate": 3.725635009852755e-07, + "logits": -1.1594212055206299, + "logps": -87.94404602050781, + "loss": 0.2092, + "objective": 0.21153667569160461, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.14285986125469208, + "step": 2140 + }, + { + "dpo_loss": 0.7056056261062622, + "epoch": 4.052905054322154, + "grad_norm": 68.73986524633295, + "learning_rate": 3.718439347503578e-07, + "logits": -1.1812673807144165, + "logps": -87.79216766357422, + "loss": 0.2071, + "objective": 0.2009545862674713, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.13039401173591614, + "step": 2145 + }, + { + "dpo_loss": 0.6813782453536987, + "epoch": 4.06235238545111, + "grad_norm": 68.82741945060675, + "learning_rate": 3.7112304219973394e-07, + "logits": -1.1798770427703857, + "logps": -91.46490478515625, + "loss": 0.2082, + "objective": 0.21020595729351044, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.48124998807907104, + "regularize": 0.14206811785697937, + "step": 2150 + }, + { + "epoch": 4.06235238545111, + "eval_dpo_loss": 0.7036926746368408, + "eval_logits": -1.2147855758666992, + "eval_logps": -93.67681884765625, + "eval_loss": 0.311942994594574, + "eval_objective": 0.3123517334461212, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.24198248982429504, + "eval_runtime": 158.0386, + "eval_samples_per_second": 36.637, + "eval_steps_per_second": 3.056, + "step": 2150 + }, + { + "dpo_loss": 0.6815483570098877, + "epoch": 4.071799716580066, + "grad_norm": 74.77954309287684, + "learning_rate": 3.7040083118058243e-07, + "logits": -1.1801261901855469, + "logps": -88.95193481445312, + "loss": 0.2014, + "objective": 0.20084276795387268, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.13268791139125824, + "step": 2155 + }, + { + "dpo_loss": 0.6933214664459229, + "epoch": 4.081247047709022, + "grad_norm": 68.95274496215063, + "learning_rate": 3.69677309554434e-07, + "logits": -1.2751498222351074, + "logps": -89.47291564941406, + "loss": 0.19, + "objective": 0.17904631793498993, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.10971417278051376, + "step": 2160 + }, + { + "dpo_loss": 0.6934170126914978, + "epoch": 4.090694378837978, + "grad_norm": 70.3491425212816, + "learning_rate": 3.689524851970855e-07, + "logits": -1.1636548042297363, + "logps": -88.32524871826172, + "loss": 0.2041, + "objective": 0.20672444999217987, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5625, + "regularize": 0.13738274574279785, + "step": 2165 + }, + { + "dpo_loss": 0.6837751865386963, + "epoch": 4.100141709966934, + "grad_norm": 76.80389850526132, + "learning_rate": 3.682263659985148e-07, + "logits": -1.2340781688690186, + "logps": -89.10033416748047, + "loss": 0.2021, + "objective": 0.196573406457901, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.518750011920929, + "regularize": 0.1281958818435669, + "step": 2170 + }, + { + "dpo_loss": 0.7011173963546753, + "epoch": 4.109589041095891, + "grad_norm": 69.35694017187159, + "learning_rate": 3.674989598627943e-07, + "logits": -1.1500046253204346, + "logps": -90.71090698242188, + "loss": 0.2032, + "objective": 0.20607486367225647, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.13596312701702118, + "step": 2175 + }, + { + "dpo_loss": 0.6869430541992188, + "epoch": 4.119036372224847, + "grad_norm": 69.15222854312827, + "learning_rate": 3.6677027470800534e-07, + "logits": -1.2302781343460083, + "logps": -88.79737854003906, + "loss": 0.2057, + "objective": 0.22396686673164368, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.15527252852916718, + "step": 2180 + }, + { + "dpo_loss": 0.6947880983352661, + "epoch": 4.128483703353803, + "grad_norm": 65.82727901584528, + "learning_rate": 3.660403184661518e-07, + "logits": -1.2679330110549927, + "logps": -89.92203521728516, + "loss": 0.1949, + "objective": 0.21502026915550232, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.1455414742231369, + "step": 2185 + }, + { + "dpo_loss": 0.6966627836227417, + "epoch": 4.137931034482759, + "grad_norm": 69.60605335663098, + "learning_rate": 3.653090990830739e-07, + "logits": -1.2208318710327148, + "logps": -89.22146606445312, + "loss": 0.2042, + "objective": 0.19856971502304077, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 0.12890341877937317, + "step": 2190 + }, + { + "dpo_loss": 0.6860800385475159, + "epoch": 4.147378365611715, + "grad_norm": 70.06283430115266, + "learning_rate": 3.645766245183615e-07, + "logits": -1.1613614559173584, + "logps": -89.69837951660156, + "loss": 0.2059, + "objective": 0.194590762257576, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.550000011920929, + "regularize": 0.12598276138305664, + "step": 2195 + }, + { + "dpo_loss": 0.6867005228996277, + "epoch": 4.156825696740671, + "grad_norm": 67.94271181770046, + "learning_rate": 3.6384290274526766e-07, + "logits": -1.181180715560913, + "logps": -90.18212890625, + "loss": 0.1914, + "objective": 0.19875545799732208, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.13008537888526917, + "step": 2200 + }, + { + "epoch": 4.156825696740671, + "eval_dpo_loss": 0.7032468318939209, + "eval_logits": -1.217943549156189, + "eval_logps": -94.57373046875, + "eval_loss": 0.31386545300483704, + "eval_objective": 0.3137575387954712, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.24343283474445343, + "eval_runtime": 157.6862, + "eval_samples_per_second": 36.718, + "eval_steps_per_second": 3.063, + "step": 2200 + }, + { + "dpo_loss": 0.6863278150558472, + "epoch": 4.166273027869627, + "grad_norm": 70.79608630866144, + "learning_rate": 3.6310794175062156e-07, + "logits": -1.2975205183029175, + "logps": -87.41703033447266, + "loss": 0.1941, + "objective": 0.18734896183013916, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4937500059604645, + "regularize": 0.11871619522571564, + "step": 2205 + }, + { + "dpo_loss": 0.6975613832473755, + "epoch": 4.175720358998583, + "grad_norm": 77.54815208083042, + "learning_rate": 3.62371749534742e-07, + "logits": -1.221123456954956, + "logps": -90.33836364746094, + "loss": 0.2056, + "objective": 0.20308546721935272, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.13332930207252502, + "step": 2210 + }, + { + "dpo_loss": 0.6789113283157349, + "epoch": 4.185167690127539, + "grad_norm": 75.631051881529, + "learning_rate": 3.6163433411135e-07, + "logits": -1.1596550941467285, + "logps": -88.44036102294922, + "loss": 0.2012, + "objective": 0.20634588599205017, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5562499761581421, + "regularize": 0.13845475018024445, + "step": 2215 + }, + { + "dpo_loss": 0.6980286836624146, + "epoch": 4.194615021256495, + "grad_norm": 66.30888177673542, + "learning_rate": 3.6089570350748167e-07, + "logits": -1.157200813293457, + "logps": -89.20658111572266, + "loss": 0.1931, + "objective": 0.187965989112854, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.11816313117742538, + "step": 2220 + }, + { + "dpo_loss": 0.6964994668960571, + "epoch": 4.204062352385451, + "grad_norm": 69.89753056570058, + "learning_rate": 3.601558657634006e-07, + "logits": -1.2606405019760132, + "logps": -89.00025939941406, + "loss": 0.201, + "objective": 0.18100889027118683, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.11135894060134888, + "step": 2225 + }, + { + "dpo_loss": 0.6851469874382019, + "epoch": 4.213509683514407, + "grad_norm": 73.28546857906582, + "learning_rate": 3.594148289325108e-07, + "logits": -1.2857199907302856, + "logps": -87.86104583740234, + "loss": 0.198, + "objective": 0.19738662242889404, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.4375, + "regularize": 0.12887193262577057, + "step": 2230 + }, + { + "dpo_loss": 0.6863463521003723, + "epoch": 4.222957014643363, + "grad_norm": 69.20984946205131, + "learning_rate": 3.586726010812687e-07, + "logits": -1.2645803689956665, + "logps": -87.96028137207031, + "loss": 0.1942, + "objective": 0.1901693493127823, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5062500238418579, + "regularize": 0.12153474241495132, + "step": 2235 + }, + { + "dpo_loss": 0.691120982170105, + "epoch": 4.232404345772319, + "grad_norm": 80.25805309412497, + "learning_rate": 3.5792919028909537e-07, + "logits": -1.3437623977661133, + "logps": -87.26004791259766, + "loss": 0.1951, + "objective": 0.19092616438865662, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.1218140497803688, + "step": 2240 + }, + { + "dpo_loss": 0.6999267935752869, + "epoch": 4.241851676901275, + "grad_norm": 74.39106991623802, + "learning_rate": 3.571846046482886e-07, + "logits": -1.2108876705169678, + "logps": -88.40174102783203, + "loss": 0.2089, + "objective": 0.21814508736133575, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.14815238118171692, + "step": 2245 + }, + { + "dpo_loss": 0.6846331357955933, + "epoch": 4.251299008030231, + "grad_norm": 72.48586230381743, + "learning_rate": 3.564388522639349e-07, + "logits": -1.2497327327728271, + "logps": -89.22590637207031, + "loss": 0.2026, + "objective": 0.20532198250293732, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.13685865700244904, + "step": 2250 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 0.7034680843353271, + "eval_logits": -1.2043732404708862, + "eval_logps": -93.22201538085938, + "eval_loss": 0.31791311502456665, + "eval_objective": 0.31768348813056946, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2473367154598236, + "eval_runtime": 157.3903, + "eval_samples_per_second": 36.788, + "eval_steps_per_second": 3.069, + "step": 2250 + }, + { + "dpo_loss": 0.6916313171386719, + "epoch": 4.260746339159187, + "grad_norm": 84.41450589555723, + "learning_rate": 3.556919412538212e-07, + "logits": -1.2482655048370361, + "logps": -88.18809509277344, + "loss": 0.1989, + "objective": 0.19382549822330475, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.1246623545885086, + "step": 2255 + }, + { + "dpo_loss": 0.6791214346885681, + "epoch": 4.270193670288144, + "grad_norm": 70.00311355913053, + "learning_rate": 3.549438797483465e-07, + "logits": -1.157104730606079, + "logps": -88.4947738647461, + "loss": 0.2055, + "objective": 0.20276562869548798, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.13485348224639893, + "step": 2260 + }, + { + "dpo_loss": 0.673402726650238, + "epoch": 4.2796410014171, + "grad_norm": 65.09377603534938, + "learning_rate": 3.5419467589043337e-07, + "logits": -1.128641128540039, + "logps": -88.3416519165039, + "loss": 0.2032, + "objective": 0.22014395892620087, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.53125, + "regularize": 0.1528036892414093, + "step": 2265 + }, + { + "dpo_loss": 0.6994603872299194, + "epoch": 4.289088332546056, + "grad_norm": 70.89538912066566, + "learning_rate": 3.5344433783543927e-07, + "logits": -1.2988563776016235, + "logps": -90.05706024169922, + "loss": 0.2001, + "objective": 0.19624318182468414, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.12629713118076324, + "step": 2270 + }, + { + "dpo_loss": 0.6809098124504089, + "epoch": 4.298535663675012, + "grad_norm": 68.30870403869821, + "learning_rate": 3.526928737510678e-07, + "logits": -1.1839253902435303, + "logps": -87.8452377319336, + "loss": 0.1948, + "objective": 0.199173703789711, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.13108272850513458, + "step": 2275 + }, + { + "dpo_loss": 0.7003562450408936, + "epoch": 4.307982994803968, + "grad_norm": 72.49712985866785, + "learning_rate": 3.519402918172798e-07, + "logits": -1.1665685176849365, + "logps": -89.95878601074219, + "loss": 0.1994, + "objective": 0.19495268166065216, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.543749988079071, + "regularize": 0.12491704523563385, + "step": 2280 + }, + { + "dpo_loss": 0.6910146474838257, + "epoch": 4.317430325932924, + "grad_norm": 68.88559864938857, + "learning_rate": 3.511866002262044e-07, + "logits": -1.1572844982147217, + "logps": -88.31980895996094, + "loss": 0.1855, + "objective": 0.18461644649505615, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.11551499366760254, + "step": 2285 + }, + { + "dpo_loss": 0.6953611969947815, + "epoch": 4.32687765706188, + "grad_norm": 72.10111152081335, + "learning_rate": 3.504318071820496e-07, + "logits": -1.2646400928497314, + "logps": -90.25203704833984, + "loss": 0.1979, + "objective": 0.20109014213085175, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.1315540075302124, + "step": 2290 + }, + { + "dpo_loss": 0.6853431463241577, + "epoch": 4.336324988190836, + "grad_norm": 74.46522176713073, + "learning_rate": 3.4967592090101326e-07, + "logits": -1.2266438007354736, + "logps": -90.8377456665039, + "loss": 0.2014, + "objective": 0.2001165896654129, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.13158227503299713, + "step": 2295 + }, + { + "dpo_loss": 0.6854650974273682, + "epoch": 4.345772319319792, + "grad_norm": 67.16113769652895, + "learning_rate": 3.489189496111936e-07, + "logits": -1.1785436868667603, + "logps": -88.6437759399414, + "loss": 0.1908, + "objective": 0.19480295479297638, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5249999761581421, + "regularize": 0.12625643610954285, + "step": 2300 + }, + { + "epoch": 4.345772319319792, + "eval_dpo_loss": 0.702228844165802, + "eval_logits": -1.2116835117340088, + "eval_logps": -94.31507110595703, + "eval_loss": 0.3066709339618683, + "eval_objective": 0.3084842562675476, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5232919454574585, + "eval_regularize": 0.2382613569498062, + "eval_runtime": 157.4043, + "eval_samples_per_second": 36.784, + "eval_steps_per_second": 3.069, + "step": 2300 + }, + { + "dpo_loss": 0.6954362988471985, + "epoch": 4.355219650448748, + "grad_norm": 72.34445643960167, + "learning_rate": 3.481609015524991e-07, + "logits": -1.2407293319702148, + "logps": -89.92005157470703, + "loss": 0.1938, + "objective": 0.19215351343154907, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.12260987609624863, + "step": 2305 + }, + { + "dpo_loss": 0.6934856176376343, + "epoch": 4.364666981577704, + "grad_norm": 78.51664449321001, + "learning_rate": 3.474017849765599e-07, + "logits": -1.2324168682098389, + "logps": -87.94395446777344, + "loss": 0.1936, + "objective": 0.1962895691394806, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5562499761581421, + "regularize": 0.1269409954547882, + "step": 2310 + }, + { + "dpo_loss": 0.6825065016746521, + "epoch": 4.37411431270666, + "grad_norm": 69.72656724525673, + "learning_rate": 3.466416081466369e-07, + "logits": -1.2817673683166504, + "logps": -89.84367370605469, + "loss": 0.1833, + "objective": 0.1848260760307312, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.11657543480396271, + "step": 2315 + }, + { + "dpo_loss": 0.6836963891983032, + "epoch": 4.383561643835616, + "grad_norm": 65.28015042658268, + "learning_rate": 3.458803793375324e-07, + "logits": -1.2592960596084595, + "logps": -88.8481674194336, + "loss": 0.1879, + "objective": 0.1888824999332428, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.12051286548376083, + "step": 2320 + }, + { + "dpo_loss": 0.6949820518493652, + "epoch": 4.393008974964572, + "grad_norm": 67.61356747192421, + "learning_rate": 3.451181068354998e-07, + "logits": -1.1924588680267334, + "logps": -87.05536651611328, + "loss": 0.1871, + "objective": 0.1929215043783188, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4749999940395355, + "regularize": 0.12342329323291779, + "step": 2325 + }, + { + "dpo_loss": 0.6871247887611389, + "epoch": 4.402456306093528, + "grad_norm": 68.57928876300109, + "learning_rate": 3.4435479893815355e-07, + "logits": -1.2161206007003784, + "logps": -87.45741271972656, + "loss": 0.194, + "objective": 0.19727759063243866, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 0.12856510281562805, + "step": 2330 + }, + { + "dpo_loss": 0.696916937828064, + "epoch": 4.411903637222484, + "grad_norm": 64.66076626257149, + "learning_rate": 3.435904639543789e-07, + "logits": -1.2180455923080444, + "logps": -86.78954315185547, + "loss": 0.195, + "objective": 0.19943666458129883, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.1297449767589569, + "step": 2335 + }, + { + "dpo_loss": 0.6831547617912292, + "epoch": 4.42135096835144, + "grad_norm": 76.92527405322872, + "learning_rate": 3.428251102042409e-07, + "logits": -1.2242004871368408, + "logps": -87.0202865600586, + "loss": 0.1862, + "objective": 0.18791408836841583, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.11959860473871231, + "step": 2340 + }, + { + "dpo_loss": 0.6820409893989563, + "epoch": 4.430798299480397, + "grad_norm": 65.42855768283592, + "learning_rate": 3.4205874601889464e-07, + "logits": -1.2483327388763428, + "logps": -85.98213195800781, + "loss": 0.1889, + "objective": 0.1903569996356964, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.12215292453765869, + "step": 2345 + }, + { + "dpo_loss": 0.6897372007369995, + "epoch": 4.440245630609353, + "grad_norm": 67.23230975440292, + "learning_rate": 3.414449327628279e-07, + "logits": -1.2460027933120728, + "logps": -86.58792877197266, + "loss": 0.1931, + "objective": 0.18547531962394714, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.11650160700082779, + "step": 2350 + }, + { + "epoch": 4.440245630609353, + "eval_dpo_loss": 0.7057645320892334, + "eval_logits": -1.206606388092041, + "eval_logps": -93.412353515625, + "eval_loss": 0.3241415023803711, + "eval_objective": 0.32362499833106995, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.25304850935935974, + "eval_runtime": 157.4891, + "eval_samples_per_second": 36.764, + "eval_steps_per_second": 3.067, + "step": 2350 + }, + { + "dpo_loss": 0.6985042691230774, + "epoch": 4.449692961738309, + "grad_norm": 68.68278228628853, + "learning_rate": 3.406767708236679e-07, + "logits": -1.237305998802185, + "logps": -87.68280029296875, + "loss": 0.1871, + "objective": 0.18133942782878876, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.45625001192092896, + "regularize": 0.11148901283740997, + "step": 2355 + }, + { + "dpo_loss": 0.7006803750991821, + "epoch": 4.459140292867265, + "grad_norm": 64.88989596384992, + "learning_rate": 3.39907621834758e-07, + "logits": -1.2617131471633911, + "logps": -91.06695556640625, + "loss": 0.2024, + "objective": 0.20287685096263885, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.13280881941318512, + "step": 2360 + }, + { + "dpo_loss": 0.6917591691017151, + "epoch": 4.468587623996221, + "grad_norm": 72.92629822125772, + "learning_rate": 3.391374941685656e-07, + "logits": -1.15776789188385, + "logps": -89.98072052001953, + "loss": 0.1974, + "objective": 0.20786471664905548, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.5874999761581421, + "regularize": 0.13868877291679382, + "step": 2365 + }, + { + "dpo_loss": 0.6847442388534546, + "epoch": 4.478034955125177, + "grad_norm": 72.59186575547133, + "learning_rate": 3.3836639620821164e-07, + "logits": -1.2330033779144287, + "logps": -87.796875, + "loss": 0.1977, + "objective": 0.19836851954460144, + "ranking_idealized": 0.4437499940395355, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.12989410758018494, + "step": 2370 + }, + { + "dpo_loss": 0.6927186250686646, + "epoch": 4.487482286254133, + "grad_norm": 73.8071218413957, + "learning_rate": 3.3759433634737875e-07, + "logits": -1.2164292335510254, + "logps": -88.21554565429688, + "loss": 0.1954, + "objective": 0.1947944313287735, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.12552256882190704, + "step": 2375 + }, + { + "dpo_loss": 0.6942063570022583, + "epoch": 4.496929617383089, + "grad_norm": 69.69165122272695, + "learning_rate": 3.3682132299022037e-07, + "logits": -1.171820878982544, + "logps": -88.99403381347656, + "loss": 0.1888, + "objective": 0.18754351139068604, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.518750011920929, + "regularize": 0.1181228905916214, + "step": 2380 + }, + { + "dpo_loss": 0.6920589208602905, + "epoch": 4.506376948512045, + "grad_norm": 76.6672759543614, + "learning_rate": 3.360473645512691e-07, + "logits": -1.1179250478744507, + "logps": -89.265869140625, + "loss": 0.1898, + "objective": 0.1791161596775055, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.1099102646112442, + "step": 2385 + }, + { + "dpo_loss": 0.6984947919845581, + "epoch": 4.515824279641001, + "grad_norm": 64.60478702112378, + "learning_rate": 3.3527246945534503e-07, + "logits": -1.2097110748291016, + "logps": -88.06912994384766, + "loss": 0.1893, + "objective": 0.2067548781633377, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4749999940395355, + "regularize": 0.13690538704395294, + "step": 2390 + }, + { + "dpo_loss": 0.6983093619346619, + "epoch": 4.525271610769957, + "grad_norm": 68.74712299235686, + "learning_rate": 3.3449664613746423e-07, + "logits": -1.1768536567687988, + "logps": -89.36795806884766, + "loss": 0.1867, + "objective": 0.1928076446056366, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.53125, + "regularize": 0.12297670543193817, + "step": 2395 + }, + { + "dpo_loss": 0.6859962344169617, + "epoch": 4.534718941898913, + "grad_norm": 69.57898877547159, + "learning_rate": 3.337199030427465e-07, + "logits": -1.221806526184082, + "logps": -89.45655822753906, + "loss": 0.195, + "objective": 0.19366537034511566, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5562499761581421, + "regularize": 0.12506575882434845, + "step": 2400 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 0.7035441994667053, + "eval_logits": -1.2061595916748047, + "eval_logps": -94.24186706542969, + "eval_loss": 0.31109294295310974, + "eval_objective": 0.31131625175476074, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.24096183478832245, + "eval_runtime": 157.5383, + "eval_samples_per_second": 36.753, + "eval_steps_per_second": 3.066, + "step": 2400 + }, + { + "dpo_loss": 0.6786166429519653, + "epoch": 4.544166273027869, + "grad_norm": 76.04442874927135, + "learning_rate": 3.329422486263242e-07, + "logits": -1.2290821075439453, + "logps": -90.7866439819336, + "loss": 0.1908, + "objective": 0.188669353723526, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.1208076924085617, + "step": 2405 + }, + { + "dpo_loss": 0.6833987236022949, + "epoch": 4.553613604156825, + "grad_norm": 69.9120836600003, + "learning_rate": 3.321636913532494e-07, + "logits": -1.184873104095459, + "logps": -87.97506713867188, + "loss": 0.1921, + "objective": 0.201020747423172, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.4124999940395355, + "ranking_simple": 0.41874998807907104, + "regularize": 0.13268086314201355, + "step": 2410 + }, + { + "dpo_loss": 0.688135027885437, + "epoch": 4.563060935285781, + "grad_norm": 71.7826426033307, + "learning_rate": 3.3138423969840214e-07, + "logits": -1.273437261581421, + "logps": -89.36498260498047, + "loss": 0.1885, + "objective": 0.19135358929634094, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.48750001192092896, + "regularize": 0.12254010140895844, + "step": 2415 + }, + { + "dpo_loss": 0.6903122663497925, + "epoch": 4.572508266414738, + "grad_norm": 72.09130382817256, + "learning_rate": 3.3060390214639834e-07, + "logits": -1.1458829641342163, + "logps": -86.36959075927734, + "loss": 0.1962, + "objective": 0.1846756488084793, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.44999998807907104, + "regularize": 0.1156444177031517, + "step": 2420 + }, + { + "dpo_loss": 0.6769827008247375, + "epoch": 4.581955597543693, + "grad_norm": 75.79810035141197, + "learning_rate": 3.29822687191497e-07, + "logits": -1.2757484912872314, + "logps": -88.91954040527344, + "loss": 0.1881, + "objective": 0.19819346070289612, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.13049522042274475, + "step": 2425 + }, + { + "dpo_loss": 0.7002481818199158, + "epoch": 4.59140292867265, + "grad_norm": 72.47474751481887, + "learning_rate": 3.2904060333750817e-07, + "logits": -1.3062492609024048, + "logps": -87.92079162597656, + "loss": 0.1892, + "objective": 0.18215011060237885, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.11212529242038727, + "step": 2430 + }, + { + "dpo_loss": 0.7003153562545776, + "epoch": 4.600850259801606, + "grad_norm": 69.56974706989557, + "learning_rate": 3.282576590977e-07, + "logits": -1.2487523555755615, + "logps": -87.8376235961914, + "loss": 0.19, + "objective": 0.18513301014900208, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.581250011920929, + "regularize": 0.11510147899389267, + "step": 2435 + }, + { + "dpo_loss": 0.6834243535995483, + "epoch": 4.610297590930562, + "grad_norm": 68.29661178867353, + "learning_rate": 3.2747386299470625e-07, + "logits": -1.1830635070800781, + "logps": -89.48603820800781, + "loss": 0.1829, + "objective": 0.1890353560447693, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.6187499761581421, + "ranking_simple": 0.612500011920929, + "regularize": 0.12069292366504669, + "step": 2440 + }, + { + "dpo_loss": 0.6962495446205139, + "epoch": 4.619744922059518, + "grad_norm": 67.06846784717705, + "learning_rate": 3.2668922356043385e-07, + "logits": -1.1794134378433228, + "logps": -88.9986801147461, + "loss": 0.1937, + "objective": 0.1998957395553589, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.606249988079071, + "ranking_simple": 0.606249988079071, + "regularize": 0.1302708089351654, + "step": 2445 + }, + { + "dpo_loss": 0.6916170120239258, + "epoch": 4.629192253188474, + "grad_norm": 69.63472871808763, + "learning_rate": 3.2590374933596934e-07, + "logits": -1.212580919265747, + "logps": -88.09070587158203, + "loss": 0.1947, + "objective": 0.19347377121448517, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.581250011920929, + "regularize": 0.12431205809116364, + "step": 2450 + }, + { + "epoch": 4.629192253188474, + "eval_dpo_loss": 0.7066665291786194, + "eval_logits": -1.1955713033676147, + "eval_logps": -93.6715316772461, + "eval_loss": 0.3311574161052704, + "eval_objective": 0.3316709101200104, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.522774338722229, + "eval_regularize": 0.2610042691230774, + "eval_runtime": 158.4938, + "eval_samples_per_second": 36.531, + "eval_steps_per_second": 3.047, + "step": 2450 + }, + { + "dpo_loss": 0.6930611729621887, + "epoch": 4.63863958431743, + "grad_norm": 68.9826360071532, + "learning_rate": 3.251174488714863e-07, + "logits": -1.1573054790496826, + "logps": -87.21949768066406, + "loss": 0.1898, + "objective": 0.1879061758518219, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.11860009282827377, + "step": 2455 + }, + { + "dpo_loss": 0.6947464346885681, + "epoch": 4.648086915446386, + "grad_norm": 69.37793486917724, + "learning_rate": 3.2433033072615237e-07, + "logits": -1.285698413848877, + "logps": -88.887939453125, + "loss": 0.1954, + "objective": 0.20384371280670166, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.13436904549598694, + "step": 2460 + }, + { + "dpo_loss": 0.6881365180015564, + "epoch": 4.657534246575342, + "grad_norm": 76.27390048138683, + "learning_rate": 3.2354240346803587e-07, + "logits": -1.3076812028884888, + "logps": -88.25764465332031, + "loss": 0.189, + "objective": 0.19885292649269104, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.1300393044948578, + "step": 2465 + }, + { + "dpo_loss": 0.6903897523880005, + "epoch": 4.6669815777042984, + "grad_norm": 68.2251787946993, + "learning_rate": 3.227536756740127e-07, + "logits": -1.1892402172088623, + "logps": -86.03571319580078, + "loss": 0.1829, + "objective": 0.18689945340156555, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.11786048114299774, + "step": 2470 + }, + { + "dpo_loss": 0.6937160491943359, + "epoch": 4.6764289088332545, + "grad_norm": 63.17076907075886, + "learning_rate": 3.219641559296726e-07, + "logits": -1.1472514867782593, + "logps": -86.34375762939453, + "loss": 0.1906, + "objective": 0.18023869395256042, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.11086708307266235, + "step": 2475 + }, + { + "dpo_loss": 0.6965736150741577, + "epoch": 4.6858762399622105, + "grad_norm": 64.71300253439841, + "learning_rate": 3.2117385282922636e-07, + "logits": -1.2553402185440063, + "logps": -87.43341827392578, + "loss": 0.1767, + "objective": 0.16977284848690033, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.10011549293994904, + "step": 2480 + }, + { + "dpo_loss": 0.6819189786911011, + "epoch": 4.6953235710911665, + "grad_norm": 69.46266935910216, + "learning_rate": 3.2038277497541177e-07, + "logits": -1.2258949279785156, + "logps": -87.9898910522461, + "loss": 0.1924, + "objective": 0.19613954424858093, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.12794767320156097, + "step": 2485 + }, + { + "dpo_loss": 0.6897075772285461, + "epoch": 4.7047709022201225, + "grad_norm": 69.39400619680565, + "learning_rate": 3.195909309793998e-07, + "logits": -1.1648635864257812, + "logps": -87.18431091308594, + "loss": 0.1797, + "objective": 0.1739298552274704, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.10495909303426743, + "step": 2490 + }, + { + "dpo_loss": 0.690790593624115, + "epoch": 4.7142182333490785, + "grad_norm": 75.1319131589367, + "learning_rate": 3.187983294607016e-07, + "logits": -1.1756292581558228, + "logps": -88.00474548339844, + "loss": 0.1948, + "objective": 0.2014426738023758, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.13236361742019653, + "step": 2495 + }, + { + "dpo_loss": 0.6772107481956482, + "epoch": 4.7236655644780345, + "grad_norm": 70.41393937300322, + "learning_rate": 3.1800497904707393e-07, + "logits": -1.1602323055267334, + "logps": -87.19842529296875, + "loss": 0.1837, + "objective": 0.1814550906419754, + "ranking_idealized": 0.4124999940395355, + "ranking_idealized_expo": 0.40625, + "ranking_simple": 0.40625, + "regularize": 0.11373400688171387, + "step": 2500 + }, + { + "epoch": 4.7236655644780345, + "eval_dpo_loss": 0.7076886296272278, + "eval_logits": -1.2040798664093018, + "eval_logps": -93.6178970336914, + "eval_loss": 0.32894209027290344, + "eval_objective": 0.3304065465927124, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.2596376836299896, + "eval_runtime": 157.472, + "eval_samples_per_second": 36.768, + "eval_steps_per_second": 3.067, + "step": 2500 + }, + { + "dpo_loss": 0.6912451982498169, + "epoch": 4.733112895606991, + "grad_norm": 63.32014936670936, + "learning_rate": 3.1721088837442563e-07, + "logits": -1.177161455154419, + "logps": -89.26594543457031, + "loss": 0.1772, + "objective": 0.1801118403673172, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.11098729074001312, + "step": 2505 + }, + { + "dpo_loss": 0.6862069368362427, + "epoch": 4.7425602267359475, + "grad_norm": 69.92808489362727, + "learning_rate": 3.1641606608672357e-07, + "logits": -1.2042205333709717, + "logps": -89.10171508789062, + "loss": 0.1854, + "objective": 0.18454326689243317, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.45625001192092896, + "regularize": 0.11592257022857666, + "step": 2510 + }, + { + "dpo_loss": 0.6823688745498657, + "epoch": 4.7520075578649035, + "grad_norm": 68.15826407150226, + "learning_rate": 3.1562052083589843e-07, + "logits": -1.1668407917022705, + "logps": -89.30625915527344, + "loss": 0.1939, + "objective": 0.19946186244487762, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.13122497498989105, + "step": 2515 + }, + { + "dpo_loss": 0.6842549443244934, + "epoch": 4.7614548889938595, + "grad_norm": 68.69748953052088, + "learning_rate": 3.1482426128175075e-07, + "logits": -1.1746580600738525, + "logps": -86.11698913574219, + "loss": 0.1809, + "objective": 0.17297717928886414, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.574999988079071, + "regularize": 0.10455168783664703, + "step": 2520 + }, + { + "dpo_loss": 0.6946545243263245, + "epoch": 4.7709022201228155, + "grad_norm": 73.811610364557, + "learning_rate": 3.1402729609185633e-07, + "logits": -1.2330520153045654, + "logps": -87.93450927734375, + "loss": 0.1861, + "objective": 0.19090914726257324, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.12144370377063751, + "step": 2525 + }, + { + "dpo_loss": 0.697222888469696, + "epoch": 4.7803495512517715, + "grad_norm": 70.51840780607554, + "learning_rate": 3.132296339414723e-07, + "logits": -1.2591397762298584, + "logps": -86.95104217529297, + "loss": 0.1856, + "objective": 0.1920265108346939, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.4937500059604645, + "regularize": 0.12230421602725983, + "step": 2530 + }, + { + "dpo_loss": 0.6979044675827026, + "epoch": 4.7897968823807275, + "grad_norm": 69.26217964070176, + "learning_rate": 3.124312835134422e-07, + "logits": -1.2490675449371338, + "logps": -87.75285339355469, + "loss": 0.1804, + "objective": 0.18386594951152802, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.11407549679279327, + "step": 2535 + }, + { + "dpo_loss": 0.6813572645187378, + "epoch": 4.7992442135096836, + "grad_norm": 65.90857751588676, + "learning_rate": 3.1163225349810197e-07, + "logits": -1.1830145120620728, + "logps": -86.79169464111328, + "loss": 0.1827, + "objective": 0.1742682158946991, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.10613247007131577, + "step": 2540 + }, + { + "dpo_loss": 0.6973792314529419, + "epoch": 4.80869154463864, + "grad_norm": 70.5678238381887, + "learning_rate": 3.10832552593185e-07, + "logits": -1.1652858257293701, + "logps": -86.21405029296875, + "loss": 0.1766, + "objective": 0.17289616167545319, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.10315822064876556, + "step": 2545 + }, + { + "dpo_loss": 0.6942178010940552, + "epoch": 4.818138875767596, + "grad_norm": 69.54271350405952, + "learning_rate": 3.100321895037274e-07, + "logits": -1.2170817852020264, + "logps": -86.79609680175781, + "loss": 0.1751, + "objective": 0.1728823482990265, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.10346055030822754, + "step": 2550 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 0.7060064077377319, + "eval_logits": -1.1993141174316406, + "eval_logps": -93.47087860107422, + "eval_loss": 0.32536742091178894, + "eval_objective": 0.32468801736831665, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.25408732891082764, + "eval_runtime": 157.7178, + "eval_samples_per_second": 36.711, + "eval_steps_per_second": 3.062, + "step": 2550 + }, + { + "dpo_loss": 0.6955153346061707, + "epoch": 4.827586206896552, + "grad_norm": 69.02890069280883, + "learning_rate": 3.092311729419737e-07, + "logits": -1.2395265102386475, + "logps": -88.45282745361328, + "loss": 0.1858, + "objective": 0.17923080921173096, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.612500011920929, + "ranking_simple": 0.612500011920929, + "regularize": 0.10967928171157837, + "step": 2555 + }, + { + "dpo_loss": 0.6916204690933228, + "epoch": 4.837033538025508, + "grad_norm": 69.16848391532415, + "learning_rate": 3.0842951162728157e-07, + "logits": -1.1788740158081055, + "logps": -86.39543914794922, + "loss": 0.1829, + "objective": 0.17950484156608582, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5062500238418579, + "regularize": 0.11034281551837921, + "step": 2560 + }, + { + "dpo_loss": 0.693740725517273, + "epoch": 4.846480869154464, + "grad_norm": 68.5339569030316, + "learning_rate": 3.0762721428602695e-07, + "logits": -1.2411420345306396, + "logps": -86.57717895507812, + "loss": 0.1871, + "objective": 0.1871481090784073, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5625, + "regularize": 0.11777403205633163, + "step": 2565 + }, + { + "dpo_loss": 0.6866006851196289, + "epoch": 4.85592820028342, + "grad_norm": 73.61264970416494, + "learning_rate": 3.068242896515093e-07, + "logits": -1.1418663263320923, + "logps": -86.59040069580078, + "loss": 0.1737, + "objective": 0.16396993398666382, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.518750011920929, + "regularize": 0.09530988335609436, + "step": 2570 + }, + { + "dpo_loss": 0.6778737306594849, + "epoch": 4.865375531412376, + "grad_norm": 70.3681521743756, + "learning_rate": 3.060207464638564e-07, + "logits": -1.3109599351882935, + "logps": -87.58135223388672, + "loss": 0.1818, + "objective": 0.1783079206943512, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.11052054166793823, + "step": 2575 + }, + { + "dpo_loss": 0.698595404624939, + "epoch": 4.874822862541333, + "grad_norm": 66.61584272656816, + "learning_rate": 3.052165934699291e-07, + "logits": -1.1655247211456299, + "logps": -87.35575866699219, + "loss": 0.179, + "objective": 0.18281424045562744, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.11295469850301743, + "step": 2580 + }, + { + "dpo_loss": 0.6869684457778931, + "epoch": 4.884270193670288, + "grad_norm": 64.53729015678043, + "learning_rate": 3.044118394232263e-07, + "logits": -1.1486632823944092, + "logps": -85.10508728027344, + "loss": 0.1744, + "objective": 0.1802261918783188, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.11152936518192291, + "step": 2585 + }, + { + "dpo_loss": 0.6830800175666809, + "epoch": 4.893717524799245, + "grad_norm": 64.62619594802779, + "learning_rate": 3.0360649308378965e-07, + "logits": -1.2419785261154175, + "logps": -87.8184814453125, + "loss": 0.1761, + "objective": 0.1736673265695572, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.10535933077335358, + "step": 2590 + }, + { + "dpo_loss": 0.6906985640525818, + "epoch": 4.903164855928201, + "grad_norm": 70.30080047514623, + "learning_rate": 3.0280056321810793e-07, + "logits": -1.2014786005020142, + "logps": -86.3695297241211, + "loss": 0.1827, + "objective": 0.17873261868953705, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.10966275632381439, + "step": 2595 + }, + { + "dpo_loss": 0.6820223927497864, + "epoch": 4.912612187057157, + "grad_norm": 65.23956981989254, + "learning_rate": 3.019940585990219e-07, + "logits": -1.3367946147918701, + "logps": -88.90962982177734, + "loss": 0.1717, + "objective": 0.17627449333667755, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.10807224363088608, + "step": 2600 + }, + { + "epoch": 4.912612187057157, + "eval_dpo_loss": 0.7049936652183533, + "eval_logits": -1.2077727317810059, + "eval_logps": -94.28855895996094, + "eval_loss": 0.32867535948753357, + "eval_objective": 0.32916179299354553, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.520703911781311, + "eval_regularize": 0.2586624324321747, + "eval_runtime": 157.7677, + "eval_samples_per_second": 36.7, + "eval_steps_per_second": 3.061, + "step": 2600 + }, + { + "dpo_loss": 0.6886630058288574, + "epoch": 4.922059518186113, + "grad_norm": 72.59462116847205, + "learning_rate": 3.013484469806475e-07, + "logits": -1.2272846698760986, + "logps": -89.29021453857422, + "loss": 0.1751, + "objective": 0.17364418506622314, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.10477789491415024, + "step": 2605 + }, + { + "dpo_loss": 0.6812406778335571, + "epoch": 4.931506849315069, + "grad_norm": 67.46210838128205, + "learning_rate": 3.0054092993290186e-07, + "logits": -1.1636372804641724, + "logps": -87.42491149902344, + "loss": 0.1778, + "objective": 0.17332686483860016, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.10520277917385101, + "step": 2610 + }, + { + "dpo_loss": 0.688554584980011, + "epoch": 4.940954180444025, + "grad_norm": 73.04114552356964, + "learning_rate": 2.997328627286852e-07, + "logits": -1.2463303804397583, + "logps": -86.92112731933594, + "loss": 0.1727, + "objective": 0.17535845935344696, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5249999761581421, + "regularize": 0.10650300979614258, + "step": 2615 + }, + { + "dpo_loss": 0.6917858719825745, + "epoch": 4.950401511572981, + "grad_norm": 64.2146343951538, + "learning_rate": 2.9892425416410385e-07, + "logits": -1.2629753351211548, + "logps": -88.22035217285156, + "loss": 0.1788, + "objective": 0.1773729771375656, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48124998807907104, + "regularize": 0.10819437354803085, + "step": 2620 + }, + { + "dpo_loss": 0.699199914932251, + "epoch": 4.959848842701937, + "grad_norm": 70.39578465923876, + "learning_rate": 2.9811511304115715e-07, + "logits": -1.1896778345108032, + "logps": -89.34644317626953, + "loss": 0.1783, + "objective": 0.18884512782096863, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.11892513185739517, + "step": 2625 + }, + { + "dpo_loss": 0.6905206441879272, + "epoch": 4.969296173830893, + "grad_norm": 69.32265365835673, + "learning_rate": 2.9730544816764175e-07, + "logits": -1.1768453121185303, + "logps": -89.61418151855469, + "loss": 0.1782, + "objective": 0.18465279042720795, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.11560072749853134, + "step": 2630 + }, + { + "dpo_loss": 0.6918953657150269, + "epoch": 4.978743504959849, + "grad_norm": 72.72456109597668, + "learning_rate": 2.9649526835705517e-07, + "logits": -1.255723476409912, + "logps": -87.71105194091797, + "loss": 0.1797, + "objective": 0.18056915700435638, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.543749988079071, + "regularize": 0.11137963831424713, + "step": 2635 + }, + { + "dpo_loss": 0.6785508990287781, + "epoch": 4.988190836088805, + "grad_norm": 71.85403127935078, + "learning_rate": 2.9568458242850053e-07, + "logits": -1.1688531637191772, + "logps": -88.31727600097656, + "loss": 0.1755, + "objective": 0.18157070875167847, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.581250011920929, + "regularize": 0.11371561139822006, + "step": 2640 + }, + { + "dpo_loss": 0.6890773773193359, + "epoch": 4.997638167217761, + "grad_norm": 67.62187141331893, + "learning_rate": 2.9487339920659005e-07, + "logits": -1.3337209224700928, + "logps": -88.54768371582031, + "loss": 0.1825, + "objective": 0.17994090914726257, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.11103316396474838, + "step": 2645 + }, + { + "dpo_loss": 0.6844527721405029, + "epoch": 5.007085498346717, + "grad_norm": 70.1158522407508, + "learning_rate": 2.9406172752134914e-07, + "logits": -1.1873667240142822, + "logps": -85.24104309082031, + "loss": 0.1761, + "objective": 0.18002694845199585, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.11158166825771332, + "step": 2650 + }, + { + "epoch": 5.007085498346717, + "eval_dpo_loss": 0.7060586214065552, + "eval_logits": -1.205496907234192, + "eval_logps": -93.62104034423828, + "eval_loss": 0.3257303535938263, + "eval_objective": 0.3239457905292511, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.253339946269989, + "eval_runtime": 157.8318, + "eval_samples_per_second": 36.685, + "eval_steps_per_second": 3.06, + "step": 2650 + }, + { + "dpo_loss": 0.6852763295173645, + "epoch": 5.016532829475673, + "grad_norm": 71.3277288070423, + "learning_rate": 2.932495762081205e-07, + "logits": -1.1914253234863281, + "logps": -89.50672912597656, + "loss": 0.1789, + "objective": 0.1824033111333847, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.11387567222118378, + "step": 2655 + }, + { + "dpo_loss": 0.6922017335891724, + "epoch": 5.025980160604629, + "grad_norm": 70.69002903393073, + "learning_rate": 2.924369541074674e-07, + "logits": -1.1777479648590088, + "logps": -87.5579833984375, + "loss": 0.1814, + "objective": 0.18187366425991058, + "ranking_idealized": 0.45625001192092896, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.11265347898006439, + "step": 2660 + }, + { + "dpo_loss": 0.6896432638168335, + "epoch": 5.035427491733585, + "grad_norm": 69.459829412544, + "learning_rate": 2.916238700650783e-07, + "logits": -1.2779085636138916, + "logps": -86.95489501953125, + "loss": 0.1774, + "objective": 0.1773608922958374, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.10839656740427017, + "step": 2665 + }, + { + "dpo_loss": 0.6889906525611877, + "epoch": 5.044874822862542, + "grad_norm": 66.60019086580874, + "learning_rate": 2.908103329316697e-07, + "logits": -1.1906511783599854, + "logps": -87.39493560791016, + "loss": 0.171, + "objective": 0.1724904477596283, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.10359140485525131, + "step": 2670 + }, + { + "dpo_loss": 0.6954147219657898, + "epoch": 5.054322153991498, + "grad_norm": 69.47070958379493, + "learning_rate": 2.8999635156289027e-07, + "logits": -1.275161862373352, + "logps": -87.87858581542969, + "loss": 0.1752, + "objective": 0.18025653064250946, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.11071505397558212, + "step": 2675 + }, + { + "dpo_loss": 0.6879990696907043, + "epoch": 5.063769485120454, + "grad_norm": 63.420675987238475, + "learning_rate": 2.8918193481922425e-07, + "logits": -1.1934435367584229, + "logps": -86.3830795288086, + "loss": 0.1634, + "objective": 0.16555842757225037, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.48750001192092896, + "regularize": 0.09675852209329605, + "step": 2680 + }, + { + "dpo_loss": 0.6914385557174683, + "epoch": 5.07321681624941, + "grad_norm": 70.46171647986608, + "learning_rate": 2.883670915658952e-07, + "logits": -1.1908118724822998, + "logps": -88.31812286376953, + "loss": 0.1767, + "objective": 0.17208442091941833, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.46875, + "regularize": 0.10294058173894882, + "step": 2685 + }, + { + "dpo_loss": 0.6840112209320068, + "epoch": 5.082664147378366, + "grad_norm": 66.86402457294675, + "learning_rate": 2.8755183067276955e-07, + "logits": -1.1934614181518555, + "logps": -88.08988952636719, + "loss": 0.1767, + "objective": 0.17896486818790436, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.1105637326836586, + "step": 2690 + }, + { + "dpo_loss": 0.6855770349502563, + "epoch": 5.092111478507322, + "grad_norm": 65.28727826104723, + "learning_rate": 2.8673616101425946e-07, + "logits": -1.1915392875671387, + "logps": -87.99581146240234, + "loss": 0.1719, + "objective": 0.17346800863742828, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.6000000238418579, + "regularize": 0.10491027683019638, + "step": 2695 + }, + { + "dpo_loss": 0.6846620440483093, + "epoch": 5.101558809636278, + "grad_norm": 67.24661354657333, + "learning_rate": 2.85920091469227e-07, + "logits": -1.2413551807403564, + "logps": -87.92008972167969, + "loss": 0.1692, + "objective": 0.17094926536083221, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.4937500059604645, + "regularize": 0.10248307138681412, + "step": 2700 + }, + { + "epoch": 5.101558809636278, + "eval_dpo_loss": 0.7072012424468994, + "eval_logits": -1.2062513828277588, + "eval_logps": -93.01093292236328, + "eval_loss": 0.33959639072418213, + "eval_objective": 0.3377668559551239, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.267046719789505, + "eval_runtime": 158.2175, + "eval_samples_per_second": 36.595, + "eval_steps_per_second": 3.053, + "step": 2700 + }, + { + "dpo_loss": 0.6864423751831055, + "epoch": 5.111006140765234, + "grad_norm": 68.97419904948683, + "learning_rate": 2.8510363092088725e-07, + "logits": -1.2034504413604736, + "logps": -88.89812469482422, + "loss": 0.1761, + "objective": 0.17688480019569397, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5375000238418579, + "regularize": 0.10824055969715118, + "step": 2705 + }, + { + "dpo_loss": 0.696429967880249, + "epoch": 5.12045347189419, + "grad_norm": 66.60888495594311, + "learning_rate": 2.8428678825671097e-07, + "logits": -1.2295548915863037, + "logps": -88.55594635009766, + "loss": 0.1749, + "objective": 0.1697665899991989, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5687500238418579, + "regularize": 0.10012359917163849, + "step": 2710 + }, + { + "dpo_loss": 0.6905234456062317, + "epoch": 5.129900803023146, + "grad_norm": 67.47223601522833, + "learning_rate": 2.83469572368329e-07, + "logits": -1.206521987915039, + "logps": -90.2388916015625, + "loss": 0.1675, + "objective": 0.1722148209810257, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.10316245257854462, + "step": 2715 + }, + { + "dpo_loss": 0.6935364007949829, + "epoch": 5.139348134152102, + "grad_norm": 65.87343663255949, + "learning_rate": 2.8265199215143444e-07, + "logits": -1.2320802211761475, + "logps": -88.0063705444336, + "loss": 0.1752, + "objective": 0.18072409927845, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4375, + "regularize": 0.11137046664953232, + "step": 2720 + }, + { + "dpo_loss": 0.6894115209579468, + "epoch": 5.148795465281058, + "grad_norm": 70.65549570326904, + "learning_rate": 2.818340565056864e-07, + "logits": -1.2492519617080688, + "logps": -87.02153778076172, + "loss": 0.172, + "objective": 0.18207569420337677, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.11313454061746597, + "step": 2725 + }, + { + "dpo_loss": 0.6915232539176941, + "epoch": 5.158242796410014, + "grad_norm": 64.75353275942425, + "learning_rate": 2.810157743346131e-07, + "logits": -1.1639604568481445, + "logps": -86.55883026123047, + "loss": 0.1667, + "objective": 0.17392504215240479, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5874999761581421, + "regularize": 0.10477272421121597, + "step": 2730 + }, + { + "dpo_loss": 0.6925600171089172, + "epoch": 5.16769012753897, + "grad_norm": 68.39462262880215, + "learning_rate": 2.801971545455144e-07, + "logits": -1.2426865100860596, + "logps": -86.77247619628906, + "loss": 0.1793, + "objective": 0.18478263914585114, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.11552663892507553, + "step": 2735 + }, + { + "dpo_loss": 0.6853238344192505, + "epoch": 5.177137458667926, + "grad_norm": 63.78064347024799, + "learning_rate": 2.7937820604936583e-07, + "logits": -1.2348917722702026, + "logps": -88.9598388671875, + "loss": 0.1679, + "objective": 0.17504046857357025, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.543749988079071, + "regularize": 0.10650809109210968, + "step": 2740 + }, + { + "dpo_loss": 0.693293571472168, + "epoch": 5.186584789796882, + "grad_norm": 66.4897429476268, + "learning_rate": 2.785589377607205e-07, + "logits": -1.23585045337677, + "logps": -89.13365936279297, + "loss": 0.1648, + "objective": 0.164475217461586, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.09514583647251129, + "step": 2745 + }, + { + "dpo_loss": 0.6954046487808228, + "epoch": 5.196032120925839, + "grad_norm": 68.61846330346695, + "learning_rate": 2.7773935859761287e-07, + "logits": -1.1489399671554565, + "logps": -89.15138244628906, + "loss": 0.1676, + "objective": 0.17117753624916077, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.512499988079071, + "regularize": 0.10163706541061401, + "step": 2750 + }, + { + "epoch": 5.196032120925839, + "eval_dpo_loss": 0.7083860039710999, + "eval_logits": -1.1977908611297607, + "eval_logps": -93.95914459228516, + "eval_loss": 0.340188592672348, + "eval_objective": 0.3383520543575287, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2675134837627411, + "eval_runtime": 162.1543, + "eval_samples_per_second": 35.707, + "eval_steps_per_second": 2.979, + "step": 2750 + }, + { + "dpo_loss": 0.6867315769195557, + "epoch": 5.205479452054795, + "grad_norm": 68.40852812072636, + "learning_rate": 2.769194774814613e-07, + "logits": -1.1747679710388184, + "logps": -88.01338958740234, + "loss": 0.1723, + "objective": 0.17285940051078796, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.10418625175952911, + "step": 2755 + }, + { + "dpo_loss": 0.7033735513687134, + "epoch": 5.214926783183751, + "grad_norm": 71.86539992150202, + "learning_rate": 2.7609930333697094e-07, + "logits": -1.1481572389602661, + "logps": -89.25263977050781, + "loss": 0.1727, + "objective": 0.1682891547679901, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.09795178472995758, + "step": 2760 + }, + { + "dpo_loss": 0.6900596022605896, + "epoch": 5.224374114312707, + "grad_norm": 65.34349513478105, + "learning_rate": 2.7527884509203686e-07, + "logits": -1.1207449436187744, + "logps": -88.28308868408203, + "loss": 0.1599, + "objective": 0.14985518157482147, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.08084921538829803, + "step": 2765 + }, + { + "dpo_loss": 0.6909135580062866, + "epoch": 5.233821445441663, + "grad_norm": 71.32727022935785, + "learning_rate": 2.7445811167764644e-07, + "logits": -1.1768852472305298, + "logps": -90.43959045410156, + "loss": 0.1741, + "objective": 0.1675560474395752, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.0984647125005722, + "step": 2770 + }, + { + "dpo_loss": 0.6930040121078491, + "epoch": 5.243268776570619, + "grad_norm": 68.91942074445643, + "learning_rate": 2.736371120277825e-07, + "logits": -1.2606618404388428, + "logps": -89.94786071777344, + "loss": 0.1732, + "objective": 0.1796959936618805, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.11039556562900543, + "step": 2775 + }, + { + "dpo_loss": 0.6956884860992432, + "epoch": 5.252716107699575, + "grad_norm": 75.06042603871374, + "learning_rate": 2.72815855079326e-07, + "logits": -1.1935651302337646, + "logps": -86.52474212646484, + "loss": 0.169, + "objective": 0.1645137369632721, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.59375, + "regularize": 0.09494488686323166, + "step": 2780 + }, + { + "dpo_loss": 0.690733790397644, + "epoch": 5.262163438828531, + "grad_norm": 76.75556002327785, + "learning_rate": 2.719943497719585e-07, + "logits": -1.2216724157333374, + "logps": -89.10572814941406, + "loss": 0.1631, + "objective": 0.15377160906791687, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.08469823747873306, + "step": 2785 + }, + { + "dpo_loss": 0.6916483640670776, + "epoch": 5.271610769957487, + "grad_norm": 75.0828149872277, + "learning_rate": 2.711726050480653e-07, + "logits": -1.1828960180282593, + "logps": -90.04365539550781, + "loss": 0.1778, + "objective": 0.1791430115699768, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.10997817665338516, + "step": 2790 + }, + { + "dpo_loss": 0.6929569244384766, + "epoch": 5.281058101086443, + "grad_norm": 67.80140559185041, + "learning_rate": 2.703506298526374e-07, + "logits": -1.246824860572815, + "logps": -87.59806060791016, + "loss": 0.1671, + "objective": 0.17096053063869476, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.53125, + "regularize": 0.10166484117507935, + "step": 2795 + }, + { + "dpo_loss": 0.6785708069801331, + "epoch": 5.290505432215399, + "grad_norm": 66.61830847165874, + "learning_rate": 2.6952843313317506e-07, + "logits": -1.168810486793518, + "logps": -87.19744873046875, + "loss": 0.1743, + "objective": 0.1652020812034607, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.0973450094461441, + "step": 2800 + }, + { + "epoch": 5.290505432215399, + "eval_dpo_loss": 0.7076205611228943, + "eval_logits": -1.1971802711486816, + "eval_logps": -93.91000366210938, + "eval_loss": 0.33709466457366943, + "eval_objective": 0.3350926339626312, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.26433053612709045, + "eval_runtime": 158.1327, + "eval_samples_per_second": 36.615, + "eval_steps_per_second": 3.054, + "step": 2800 + }, + { + "dpo_loss": 0.6910735368728638, + "epoch": 5.299952763344355, + "grad_norm": 70.67384600159596, + "learning_rate": 2.687060238395898e-07, + "logits": -1.2108272314071655, + "logps": -87.95372009277344, + "loss": 0.1681, + "objective": 0.15209585428237915, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.08298849314451218, + "step": 2805 + }, + { + "dpo_loss": 0.6852573752403259, + "epoch": 5.309400094473311, + "grad_norm": 70.07310569800453, + "learning_rate": 2.6788341092410667e-07, + "logits": -1.1896169185638428, + "logps": -89.82859802246094, + "loss": 0.1607, + "objective": 0.1615303009748459, + "ranking_idealized": 0.4312500059604645, + "ranking_idealized_expo": 0.4124999940395355, + "ranking_simple": 0.41874998807907104, + "regularize": 0.09300456941127777, + "step": 2810 + }, + { + "dpo_loss": 0.6889798045158386, + "epoch": 5.318847425602267, + "grad_norm": 68.20006338317981, + "learning_rate": 2.6706060334116775e-07, + "logits": -1.1706857681274414, + "logps": -88.37882995605469, + "loss": 0.1694, + "objective": 0.16437754034996033, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.518750011920929, + "regularize": 0.0954795628786087, + "step": 2815 + }, + { + "dpo_loss": 0.6930587291717529, + "epoch": 5.328294756731223, + "grad_norm": 65.44290392226539, + "learning_rate": 2.6623761004733384e-07, + "logits": -1.1820690631866455, + "logps": -89.46732330322266, + "loss": 0.17, + "objective": 0.17971068620681763, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.11040481179952621, + "step": 2820 + }, + { + "dpo_loss": 0.6904914975166321, + "epoch": 5.337742087860179, + "grad_norm": 66.90833905570693, + "learning_rate": 2.6541444000118744e-07, + "logits": -1.1795885562896729, + "logps": -87.92036437988281, + "loss": 0.1717, + "objective": 0.16590887308120728, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.09685972332954407, + "step": 2825 + }, + { + "dpo_loss": 0.6931673288345337, + "epoch": 5.347189418989135, + "grad_norm": 79.47113987338702, + "learning_rate": 2.6459110216323494e-07, + "logits": -1.1880987882614136, + "logps": -88.9898910522461, + "loss": 0.1688, + "objective": 0.17713353037834167, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5874999761581421, + "regularize": 0.10781680047512054, + "step": 2830 + }, + { + "dpo_loss": 0.6927000284194946, + "epoch": 5.356636750118092, + "grad_norm": 77.112287607801, + "learning_rate": 2.637676054958092e-07, + "logits": -1.1750524044036865, + "logps": -88.32490539550781, + "loss": 0.1701, + "objective": 0.1717822402715683, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.10251224040985107, + "step": 2835 + }, + { + "dpo_loss": 0.6875866651535034, + "epoch": 5.366084081247048, + "grad_norm": 66.31189469268375, + "learning_rate": 2.629439589629722e-07, + "logits": -1.1873130798339844, + "logps": -88.2190170288086, + "loss": 0.1628, + "objective": 0.16570058465003967, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.09694191813468933, + "step": 2840 + }, + { + "dpo_loss": 0.6951637864112854, + "epoch": 5.375531412376004, + "grad_norm": 75.85897729374831, + "learning_rate": 2.621201715304168e-07, + "logits": -1.2368756532669067, + "logps": -88.91191101074219, + "loss": 0.1712, + "objective": 0.1714092344045639, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.1018928661942482, + "step": 2845 + }, + { + "dpo_loss": 0.689228892326355, + "epoch": 5.38497874350496, + "grad_norm": 66.51978594280956, + "learning_rate": 2.612962521653702e-07, + "logits": -1.1629688739776611, + "logps": -88.8650131225586, + "loss": 0.1715, + "objective": 0.16678547859191895, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.09786257147789001, + "step": 2850 + }, + { + "epoch": 5.38497874350496, + "eval_dpo_loss": 0.7084411978721619, + "eval_logits": -1.1938939094543457, + "eval_logps": -93.68083953857422, + "eval_loss": 0.3407943844795227, + "eval_objective": 0.34045931696891785, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.26961514353752136, + "eval_runtime": 158.2513, + "eval_samples_per_second": 36.587, + "eval_steps_per_second": 3.052, + "step": 2850 + }, + { + "dpo_loss": 0.6896392107009888, + "epoch": 5.394426074633916, + "grad_norm": 71.43890694528095, + "learning_rate": 2.6047220983649535e-07, + "logits": -1.1977884769439697, + "logps": -89.16146087646484, + "loss": 0.1634, + "objective": 0.16076752543449402, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.53125, + "regularize": 0.09180359542369843, + "step": 2855 + }, + { + "dpo_loss": 0.6991931796073914, + "epoch": 5.403873405762872, + "grad_norm": 70.78967226486061, + "learning_rate": 2.596480535137938e-07, + "logits": -1.1725437641143799, + "logps": -87.13787841796875, + "loss": 0.1664, + "objective": 0.17251865565776825, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.10259934514760971, + "step": 2860 + }, + { + "dpo_loss": 0.6924434900283813, + "epoch": 5.413320736891828, + "grad_norm": 71.78447378915702, + "learning_rate": 2.588237921685079e-07, + "logits": -1.279209852218628, + "logps": -88.69322967529297, + "loss": 0.1646, + "objective": 0.15512174367904663, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.08587740361690521, + "step": 2865 + }, + { + "dpo_loss": 0.6857197880744934, + "epoch": 5.422768068020784, + "grad_norm": 71.6129346374041, + "learning_rate": 2.579994347730233e-07, + "logits": -1.1856725215911865, + "logps": -87.46627044677734, + "loss": 0.1714, + "objective": 0.1694221794605255, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.10085020214319229, + "step": 2870 + }, + { + "dpo_loss": 0.6933203935623169, + "epoch": 5.43221539914974, + "grad_norm": 71.14208242063751, + "learning_rate": 2.571749903007712e-07, + "logits": -1.2006828784942627, + "logps": -87.65222930908203, + "loss": 0.1645, + "objective": 0.17003345489501953, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.10070140659809113, + "step": 2875 + }, + { + "dpo_loss": 0.6922504305839539, + "epoch": 5.441662730278696, + "grad_norm": 64.76173144858346, + "learning_rate": 2.563504677261307e-07, + "logits": -1.0994261503219604, + "logps": -87.58671569824219, + "loss": 0.1664, + "objective": 0.16446347534656525, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.4312500059604645, + "regularize": 0.09523843228816986, + "step": 2880 + }, + { + "dpo_loss": 0.6941710710525513, + "epoch": 5.451110061407652, + "grad_norm": 71.57354676807199, + "learning_rate": 2.555258760243308e-07, + "logits": -1.1873737573623657, + "logps": -86.51579284667969, + "loss": 0.1661, + "objective": 0.1644514799118042, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.09503436088562012, + "step": 2885 + }, + { + "dpo_loss": 0.6851297616958618, + "epoch": 5.460557392536608, + "grad_norm": 68.40112111684203, + "learning_rate": 2.5470122417135325e-07, + "logits": -1.2447443008422852, + "logps": -87.82207489013672, + "loss": 0.1715, + "objective": 0.17926739156246185, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4749999940395355, + "regularize": 0.11075443029403687, + "step": 2890 + }, + { + "dpo_loss": 0.6831666827201843, + "epoch": 5.470004723665564, + "grad_norm": 70.97494885343163, + "learning_rate": 2.538765211438346e-07, + "logits": -1.1744649410247803, + "logps": -87.33937072753906, + "loss": 0.1686, + "objective": 0.17264780402183533, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.10433115065097809, + "step": 2895 + }, + { + "dpo_loss": 0.6850699186325073, + "epoch": 5.47945205479452, + "grad_norm": 67.03454129134816, + "learning_rate": 2.5305177591896827e-07, + "logits": -1.2211939096450806, + "logps": -87.41989135742188, + "loss": 0.1643, + "objective": 0.17151689529418945, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.42500001192092896, + "ranking_simple": 0.4375, + "regularize": 0.1030099019408226, + "step": 2900 + }, + { + "epoch": 5.47945205479452, + "eval_dpo_loss": 0.709491491317749, + "eval_logits": -1.1941238641738892, + "eval_logps": -93.03810119628906, + "eval_loss": 0.3434217870235443, + "eval_objective": 0.34335386753082275, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5191511511802673, + "eval_regularize": 0.2724047005176544, + "eval_runtime": 157.9263, + "eval_samples_per_second": 36.663, + "eval_steps_per_second": 3.058, + "step": 2900 + }, + { + "dpo_loss": 0.6883138418197632, + "epoch": 5.488899385923476, + "grad_norm": 68.25764695131512, + "learning_rate": 2.5222699747440705e-07, + "logits": -1.1506329774856567, + "logps": -87.58967590332031, + "loss": 0.1628, + "objective": 0.1579241305589676, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.08909274637699127, + "step": 2905 + }, + { + "dpo_loss": 0.6868668794631958, + "epoch": 5.498346717052432, + "grad_norm": 63.82252969817784, + "learning_rate": 2.514021947881654e-07, + "logits": -1.1989600658416748, + "logps": -85.99334716796875, + "loss": 0.1579, + "objective": 0.15128464996814728, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.08259795606136322, + "step": 2910 + }, + { + "dpo_loss": 0.6849532723426819, + "epoch": 5.507794048181388, + "grad_norm": 70.75728445226883, + "learning_rate": 2.5057737683852166e-07, + "logits": -1.18284273147583, + "logps": -86.6116943359375, + "loss": 0.1592, + "objective": 0.1478767693042755, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.07938142865896225, + "step": 2915 + }, + { + "dpo_loss": 0.6933680772781372, + "epoch": 5.517241379310345, + "grad_norm": 71.53835495050187, + "learning_rate": 2.497525526039202e-07, + "logits": -1.2268903255462646, + "logps": -87.72315979003906, + "loss": 0.1604, + "objective": 0.16375622153282166, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.09441942721605301, + "step": 2920 + }, + { + "dpo_loss": 0.6884834170341492, + "epoch": 5.526688710439301, + "grad_norm": 70.7363668045601, + "learning_rate": 2.4892773106287406e-07, + "logits": -1.2739144563674927, + "logps": -87.93558502197266, + "loss": 0.1538, + "objective": 0.15243694186210632, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 0.0835886001586914, + "step": 2925 + }, + { + "dpo_loss": 0.6854166984558105, + "epoch": 5.536136041568257, + "grad_norm": 64.52862440957706, + "learning_rate": 2.4810292119386674e-07, + "logits": -1.2274558544158936, + "logps": -87.8212890625, + "loss": 0.1554, + "objective": 0.15207795798778534, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.08353628218173981, + "step": 2930 + }, + { + "dpo_loss": 0.6835038661956787, + "epoch": 5.545583372697213, + "grad_norm": 71.03427758426722, + "learning_rate": 2.472781319752546e-07, + "logits": -1.1815805435180664, + "logps": -88.67829895019531, + "loss": 0.161, + "objective": 0.15246529877185822, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.0841149091720581, + "step": 2935 + }, + { + "dpo_loss": 0.6861475110054016, + "epoch": 5.555030703826169, + "grad_norm": 69.93722884891055, + "learning_rate": 2.4645337238516953e-07, + "logits": -1.2078359127044678, + "logps": -89.95475006103516, + "loss": 0.1645, + "objective": 0.1642766147851944, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.09566183388233185, + "step": 2940 + }, + { + "dpo_loss": 0.6943419575691223, + "epoch": 5.564478034955125, + "grad_norm": 67.34370411603598, + "learning_rate": 2.4562865140142065e-07, + "logits": -1.2549326419830322, + "logps": -87.40699768066406, + "loss": 0.1626, + "objective": 0.16093124449253082, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4437499940395355, + "regularize": 0.0914970338344574, + "step": 2945 + }, + { + "dpo_loss": 0.6921701431274414, + "epoch": 5.573925366084081, + "grad_norm": 67.90112389845122, + "learning_rate": 2.4480397800139697e-07, + "logits": -1.2807310819625854, + "logps": -85.22374725341797, + "loss": 0.1569, + "objective": 0.1513904631137848, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.550000011920929, + "regularize": 0.08217345923185349, + "step": 2950 + }, + { + "epoch": 5.573925366084081, + "eval_dpo_loss": 0.7083237767219543, + "eval_logits": -1.1993257999420166, + "eval_logps": -94.44893646240234, + "eval_loss": 0.34027528762817383, + "eval_objective": 0.340644896030426, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5191511511802673, + "eval_regularize": 0.2698124647140503, + "eval_runtime": 157.6445, + "eval_samples_per_second": 36.728, + "eval_steps_per_second": 3.064, + "step": 2950 + }, + { + "dpo_loss": 0.699337363243103, + "epoch": 5.583372697213037, + "grad_norm": 70.66002533656673, + "learning_rate": 2.439793611619693e-07, + "logits": -1.195084810256958, + "logps": -89.92610168457031, + "loss": 0.1672, + "objective": 0.16342179477214813, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.09348806738853455, + "step": 2955 + }, + { + "dpo_loss": 0.6918556690216064, + "epoch": 5.592820028341993, + "grad_norm": 64.9097814778515, + "learning_rate": 2.431548098593932e-07, + "logits": -1.2656776905059814, + "logps": -88.05064392089844, + "loss": 0.1528, + "objective": 0.15726497769355774, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5562499761581421, + "regularize": 0.08807940781116486, + "step": 2960 + }, + { + "dpo_loss": 0.6896753311157227, + "epoch": 5.602267359470949, + "grad_norm": 66.60639839782714, + "learning_rate": 2.4233033306921044e-07, + "logits": -1.2398788928985596, + "logps": -87.45914459228516, + "loss": 0.1555, + "objective": 0.15855464339256287, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.08958712220191956, + "step": 2965 + }, + { + "dpo_loss": 0.6900865435600281, + "epoch": 5.611714690599905, + "grad_norm": 68.83649894986999, + "learning_rate": 2.415059397661519e-07, + "logits": -1.2571332454681396, + "logps": -89.16593170166016, + "loss": 0.1615, + "objective": 0.160673126578331, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.09166448563337326, + "step": 2970 + }, + { + "dpo_loss": 0.6876694560050964, + "epoch": 5.621162021728861, + "grad_norm": 71.4594888607687, + "learning_rate": 2.4068163892403954e-07, + "logits": -1.2201224565505981, + "logps": -91.45027160644531, + "loss": 0.1677, + "objective": 0.16672007739543915, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.09795314073562622, + "step": 2975 + }, + { + "dpo_loss": 0.6961122751235962, + "epoch": 5.630609352857817, + "grad_norm": 66.06190204842538, + "learning_rate": 2.3985743951568896e-07, + "logits": -1.1683927774429321, + "logps": -89.21009826660156, + "loss": 0.1615, + "objective": 0.16139890253543854, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.543749988079071, + "regularize": 0.0917876735329628, + "step": 2980 + }, + { + "dpo_loss": 0.6950571537017822, + "epoch": 5.640056683986773, + "grad_norm": 71.8921965768204, + "learning_rate": 2.3903335051281155e-07, + "logits": -1.184612512588501, + "logps": -88.78089904785156, + "loss": 0.162, + "objective": 0.15779440104961395, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.08828870207071304, + "step": 2985 + }, + { + "dpo_loss": 0.6960257291793823, + "epoch": 5.649504015115729, + "grad_norm": 73.58902577978184, + "learning_rate": 2.3820938088591694e-07, + "logits": -1.1792681217193604, + "logps": -88.75108337402344, + "loss": 0.163, + "objective": 0.1602451205253601, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.0906425416469574, + "step": 2990 + }, + { + "dpo_loss": 0.6898195147514343, + "epoch": 5.658951346244686, + "grad_norm": 67.15580408040853, + "learning_rate": 2.373855396042152e-07, + "logits": -1.2541872262954712, + "logps": -91.73573303222656, + "loss": 0.1571, + "objective": 0.16459956765174866, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.09561760723590851, + "step": 2995 + }, + { + "dpo_loss": 0.6943185329437256, + "epoch": 5.6683986773736414, + "grad_norm": 71.35092730092892, + "learning_rate": 2.3656183563551954e-07, + "logits": -1.2561254501342773, + "logps": -89.2706527709961, + "loss": 0.16, + "objective": 0.16651032865047455, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.09707846492528915, + "step": 3000 + }, + { + "epoch": 5.6683986773736414, + "eval_dpo_loss": 0.7067892551422119, + "eval_logits": -1.1952264308929443, + "eval_logps": -94.13389587402344, + "eval_loss": 0.3336896300315857, + "eval_objective": 0.33315029740333557, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5232919454574585, + "eval_regularize": 0.26247134804725647, + "eval_runtime": 157.4877, + "eval_samples_per_second": 36.765, + "eval_steps_per_second": 3.067, + "step": 3000 + }, + { + "dpo_loss": 0.6860445141792297, + "epoch": 5.677846008502598, + "grad_norm": 69.9695559274476, + "learning_rate": 2.3573827794614836e-07, + "logits": -1.235999345779419, + "logps": -88.295654296875, + "loss": 0.1581, + "objective": 0.15271826088428497, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.612500011920929, + "ranking_simple": 0.6312500238418579, + "regularize": 0.08411379158496857, + "step": 3005 + }, + { + "dpo_loss": 0.6918520927429199, + "epoch": 5.687293339631554, + "grad_norm": 68.61279004584298, + "learning_rate": 2.3491487550082759e-07, + "logits": -1.1995729207992554, + "logps": -88.19017028808594, + "loss": 0.1579, + "objective": 0.1600116491317749, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.09082644432783127, + "step": 3010 + }, + { + "dpo_loss": 0.690693736076355, + "epoch": 5.69674067076051, + "grad_norm": 71.44504599708077, + "learning_rate": 2.3409163726259354e-07, + "logits": -1.2030599117279053, + "logps": -89.19579315185547, + "loss": 0.1578, + "objective": 0.15412333607673645, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.08505398035049438, + "step": 3015 + }, + { + "dpo_loss": 0.6911080479621887, + "epoch": 5.706188001889466, + "grad_norm": 62.79376194718201, + "learning_rate": 2.332685721926948e-07, + "logits": -1.1986589431762695, + "logps": -88.55294036865234, + "loss": 0.1619, + "objective": 0.15914146602153778, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.09003065526485443, + "step": 3020 + }, + { + "dpo_loss": 0.6796199083328247, + "epoch": 5.715635333018422, + "grad_norm": 69.44076468937035, + "learning_rate": 2.3244568925049522e-07, + "logits": -1.2015893459320068, + "logps": -89.00029754638672, + "loss": 0.1662, + "objective": 0.16765353083610535, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.09969155490398407, + "step": 3025 + }, + { + "dpo_loss": 0.6879712343215942, + "epoch": 5.725082664147378, + "grad_norm": 69.49885636121284, + "learning_rate": 2.3162299739337586e-07, + "logits": -1.2396279573440552, + "logps": -88.59342956542969, + "loss": 0.1609, + "objective": 0.163058340549469, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.09426120668649673, + "step": 3030 + }, + { + "dpo_loss": 0.6858317255973816, + "epoch": 5.7345299952763344, + "grad_norm": 65.93617691088278, + "learning_rate": 2.3080050557663807e-07, + "logits": -1.1753551959991455, + "logps": -89.18077087402344, + "loss": 0.1518, + "objective": 0.1514325886964798, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.46875, + "regularize": 0.08284942060709, + "step": 3035 + }, + { + "dpo_loss": 0.6900348663330078, + "epoch": 5.7439773264052905, + "grad_norm": 66.57478285269204, + "learning_rate": 2.2997822275340545e-07, + "logits": -1.1697237491607666, + "logps": -87.06428527832031, + "loss": 0.1562, + "objective": 0.1557130068540573, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.08670951426029205, + "step": 3040 + }, + { + "dpo_loss": 0.6868686079978943, + "epoch": 5.7534246575342465, + "grad_norm": 73.35640138982335, + "learning_rate": 2.2915615787452664e-07, + "logits": -1.164442777633667, + "logps": -88.9676742553711, + "loss": 0.1539, + "objective": 0.1539611518383026, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.08527431637048721, + "step": 3045 + }, + { + "dpo_loss": 0.6946436762809753, + "epoch": 5.7628719886632025, + "grad_norm": 69.71603742581622, + "learning_rate": 2.283343198884779e-07, + "logits": -1.171112060546875, + "logps": -88.559326171875, + "loss": 0.1556, + "objective": 0.159796804189682, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.09033244848251343, + "step": 3050 + }, + { + "epoch": 5.7628719886632025, + "eval_dpo_loss": 0.7074865102767944, + "eval_logits": -1.1943256855010986, + "eval_logps": -93.7010726928711, + "eval_loss": 0.3378671705722809, + "eval_objective": 0.3365963399410248, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.26584771275520325, + "eval_runtime": 158.4185, + "eval_samples_per_second": 36.549, + "eval_steps_per_second": 3.049, + "step": 3050 + }, + { + "dpo_loss": 0.6892920136451721, + "epoch": 5.7723193197921585, + "grad_norm": 70.77530028281073, + "learning_rate": 2.2751271774126578e-07, + "logits": -1.29044508934021, + "logps": -89.32376861572266, + "loss": 0.1527, + "objective": 0.15522916615009308, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.08629997074604034, + "step": 3055 + }, + { + "dpo_loss": 0.691388726234436, + "epoch": 5.7817666509211145, + "grad_norm": 67.76333355139332, + "learning_rate": 2.266913603763295e-07, + "logits": -1.2957406044006348, + "logps": -90.00084686279297, + "loss": 0.1522, + "objective": 0.14783525466918945, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.07869637757539749, + "step": 3060 + }, + { + "dpo_loss": 0.6861189603805542, + "epoch": 5.7912139820500705, + "grad_norm": 67.53343152706475, + "learning_rate": 2.2587025673444384e-07, + "logits": -1.228858232498169, + "logps": -88.78462219238281, + "loss": 0.1601, + "objective": 0.15433308482170105, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48750001192092896, + "regularize": 0.08572117984294891, + "step": 3065 + }, + { + "dpo_loss": 0.6877793073654175, + "epoch": 5.800661313179027, + "grad_norm": 64.90782659817482, + "learning_rate": 2.2504941575362153e-07, + "logits": -1.1816461086273193, + "logps": -88.59249877929688, + "loss": 0.147, + "objective": 0.14250199496746063, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.07372405380010605, + "step": 3070 + }, + { + "dpo_loss": 0.6855846047401428, + "epoch": 5.810108644307983, + "grad_norm": 68.67570881638909, + "learning_rate": 2.2422884636901648e-07, + "logits": -1.1656111478805542, + "logps": -89.04707336425781, + "loss": 0.152, + "objective": 0.1549055278301239, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.0863470733165741, + "step": 3075 + }, + { + "dpo_loss": 0.6973013877868652, + "epoch": 5.8195559754369395, + "grad_norm": 75.69732008065817, + "learning_rate": 2.2340855751282593e-07, + "logits": -1.1823872327804565, + "logps": -85.95021057128906, + "loss": 0.1618, + "objective": 0.15959547460079193, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.08986534923315048, + "step": 3080 + }, + { + "dpo_loss": 0.6956144571304321, + "epoch": 5.8290033065658955, + "grad_norm": 72.67073003432586, + "learning_rate": 2.2258855811419338e-07, + "logits": -1.1929875612258911, + "logps": -88.76408386230469, + "loss": 0.1562, + "objective": 0.15549372136592865, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.08593227714300156, + "step": 3085 + }, + { + "dpo_loss": 0.6876565217971802, + "epoch": 5.8384506376948515, + "grad_norm": 72.90703070061888, + "learning_rate": 2.2176885709911174e-07, + "logits": -1.2129268646240234, + "logps": -88.34392547607422, + "loss": 0.1516, + "objective": 0.1628263145685196, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.09406064450740814, + "step": 3090 + }, + { + "dpo_loss": 0.6911213397979736, + "epoch": 5.8478979688238075, + "grad_norm": 70.10000752386972, + "learning_rate": 2.209494633903257e-07, + "logits": -1.2557677030563354, + "logps": -89.69297790527344, + "loss": 0.1528, + "objective": 0.15556392073631287, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.08645178377628326, + "step": 3095 + }, + { + "dpo_loss": 0.6917442083358765, + "epoch": 5.8573452999527635, + "grad_norm": 68.58660189854848, + "learning_rate": 2.201303859072349e-07, + "logits": -1.174304723739624, + "logps": -87.0228500366211, + "loss": 0.1544, + "objective": 0.15769553184509277, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.08852110803127289, + "step": 3100 + }, + { + "epoch": 5.8573452999527635, + "eval_dpo_loss": 0.7081683278083801, + "eval_logits": -1.1895536184310913, + "eval_logps": -93.8059310913086, + "eval_loss": 0.3406558930873871, + "eval_objective": 0.3385447859764099, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.26772794127464294, + "eval_runtime": 157.8418, + "eval_samples_per_second": 36.682, + "eval_steps_per_second": 3.06, + "step": 3100 + }, + { + "dpo_loss": 0.6924247145652771, + "epoch": 5.8667926310817196, + "grad_norm": 70.60312821876914, + "learning_rate": 2.1931163356579667e-07, + "logits": -1.2612760066986084, + "logps": -87.61418151855469, + "loss": 0.1603, + "objective": 0.15718653798103333, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.08794407546520233, + "step": 3105 + }, + { + "dpo_loss": 0.6939336657524109, + "epoch": 5.876239962210676, + "grad_norm": 66.79816676685348, + "learning_rate": 2.184932152784292e-07, + "logits": -1.2125117778778076, + "logps": -87.36097717285156, + "loss": 0.154, + "objective": 0.14958223700523376, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.08018886297941208, + "step": 3110 + }, + { + "dpo_loss": 0.6892750263214111, + "epoch": 5.885687293339632, + "grad_norm": 61.589115768630904, + "learning_rate": 2.176751399539143e-07, + "logits": -1.1249029636383057, + "logps": -85.64997100830078, + "loss": 0.1516, + "objective": 0.1503055989742279, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.08137810230255127, + "step": 3115 + }, + { + "dpo_loss": 0.6893929839134216, + "epoch": 5.895134624468588, + "grad_norm": 67.96856168245768, + "learning_rate": 2.168574164973005e-07, + "logits": -1.2797582149505615, + "logps": -86.67450714111328, + "loss": 0.1526, + "objective": 0.15556205809116364, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.08662275969982147, + "step": 3120 + }, + { + "dpo_loss": 0.6934850811958313, + "epoch": 5.904581955597544, + "grad_norm": 61.93077678604341, + "learning_rate": 2.16040053809806e-07, + "logits": -1.2393492460250854, + "logps": -89.31920623779297, + "loss": 0.1476, + "objective": 0.14929169416427612, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48750001192092896, + "regularize": 0.07994318008422852, + "step": 3125 + }, + { + "dpo_loss": 0.6860083937644958, + "epoch": 5.9140292867265, + "grad_norm": 65.98308041473744, + "learning_rate": 2.1522306078872217e-07, + "logits": -1.1525981426239014, + "logps": -87.80781555175781, + "loss": 0.1452, + "objective": 0.15540987253189087, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.08680902421474457, + "step": 3130 + }, + { + "dpo_loss": 0.6895954012870789, + "epoch": 5.923476617855456, + "grad_norm": 69.45472051004639, + "learning_rate": 2.1440644632731608e-07, + "logits": -1.2585550546646118, + "logps": -86.21622467041016, + "loss": 0.1526, + "objective": 0.1478964388370514, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.07893688976764679, + "step": 3135 + }, + { + "dpo_loss": 0.6982107758522034, + "epoch": 5.932923948984412, + "grad_norm": 69.75620955548241, + "learning_rate": 2.1359021931473444e-07, + "logits": -1.1937921047210693, + "logps": -87.94801330566406, + "loss": 0.1535, + "objective": 0.16531306505203247, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.09549199044704437, + "step": 3140 + }, + { + "dpo_loss": 0.6916354894638062, + "epoch": 5.942371280113368, + "grad_norm": 69.26169680454808, + "learning_rate": 2.1277438863590602e-07, + "logits": -1.2406847476959229, + "logps": -86.54442596435547, + "loss": 0.1513, + "objective": 0.14869888126850128, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.0795353353023529, + "step": 3145 + }, + { + "dpo_loss": 0.6927730441093445, + "epoch": 5.951818611242324, + "grad_norm": 69.08292513293352, + "learning_rate": 2.119589631714457e-07, + "logits": -1.1690309047698975, + "logps": -88.31249237060547, + "loss": 0.1539, + "objective": 0.15352843701839447, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.581250011920929, + "regularize": 0.08425113558769226, + "step": 3150 + }, + { + "epoch": 5.951818611242324, + "eval_dpo_loss": 0.7079322338104248, + "eval_logits": -1.2013435363769531, + "eval_logps": -93.3647232055664, + "eval_loss": 0.3377290964126587, + "eval_objective": 0.3357836604118347, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.520703911781311, + "eval_regularize": 0.2649904191493988, + "eval_runtime": 158.4666, + "eval_samples_per_second": 36.538, + "eval_steps_per_second": 3.048, + "step": 3150 + }, + { + "dpo_loss": 0.6902034878730774, + "epoch": 5.961265942371281, + "grad_norm": 63.635186729008495, + "learning_rate": 2.1114395179755736e-07, + "logits": -1.2969659566879272, + "logps": -90.55731201171875, + "loss": 0.1479, + "objective": 0.14589950442314148, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.07687915861606598, + "step": 3155 + }, + { + "dpo_loss": 0.677855372428894, + "epoch": 5.970713273500236, + "grad_norm": 72.69121739480386, + "learning_rate": 2.1032936338593717e-07, + "logits": -1.2355512380599976, + "logps": -88.95880126953125, + "loss": 0.1504, + "objective": 0.14666247367858887, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.07887694984674454, + "step": 3160 + }, + { + "dpo_loss": 0.6994338631629944, + "epoch": 5.980160604629193, + "grad_norm": 66.8268105376101, + "learning_rate": 2.0951520680367742e-07, + "logits": -1.246654748916626, + "logps": -89.81675720214844, + "loss": 0.1491, + "objective": 0.15047189593315125, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.08052850514650345, + "step": 3165 + }, + { + "dpo_loss": 0.6910912394523621, + "epoch": 5.989607935758149, + "grad_norm": 74.09902751093182, + "learning_rate": 2.0870149091316966e-07, + "logits": -1.2551653385162354, + "logps": -86.7063980102539, + "loss": 0.1467, + "objective": 0.14258627593517303, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.07347716391086578, + "step": 3170 + }, + { + "dpo_loss": 0.6935809850692749, + "epoch": 5.999055266887105, + "grad_norm": 66.65350456772539, + "learning_rate": 2.0788822457200842e-07, + "logits": -1.154550313949585, + "logps": -88.93148803710938, + "loss": 0.1516, + "objective": 0.15211038291454315, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.0827522873878479, + "step": 3175 + }, + { + "dpo_loss": 0.6875017881393433, + "epoch": 6.008502598016061, + "grad_norm": 67.3310083616562, + "learning_rate": 2.0707541663289462e-07, + "logits": -1.1374294757843018, + "logps": -87.5985107421875, + "loss": 0.1529, + "objective": 0.15485554933547974, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.0861053541302681, + "step": 3180 + }, + { + "dpo_loss": 0.6881211400032043, + "epoch": 6.017949929145017, + "grad_norm": 64.40493710346342, + "learning_rate": 2.0626307594353936e-07, + "logits": -1.2522262334823608, + "logps": -87.79068756103516, + "loss": 0.1497, + "objective": 0.14534898102283478, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.0765368640422821, + "step": 3185 + }, + { + "dpo_loss": 0.6818069219589233, + "epoch": 6.027397260273973, + "grad_norm": 65.13273445841992, + "learning_rate": 2.0545121134656777e-07, + "logits": -1.1388541460037231, + "logps": -86.80610656738281, + "loss": 0.1491, + "objective": 0.15392068028450012, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.08573999255895615, + "step": 3190 + }, + { + "dpo_loss": 0.6863843202590942, + "epoch": 6.036844591402929, + "grad_norm": 65.24808997845832, + "learning_rate": 2.0463983167942218e-07, + "logits": -1.2672401666641235, + "logps": -86.259033203125, + "loss": 0.1502, + "objective": 0.1560548096895218, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.08741637319326401, + "step": 3195 + }, + { + "dpo_loss": 0.6929386258125305, + "epoch": 6.046291922531885, + "grad_norm": 62.711010349308516, + "learning_rate": 2.0382894577426642e-07, + "logits": -1.1413090229034424, + "logps": -86.51998138427734, + "loss": 0.1448, + "objective": 0.14656969904899597, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.07727585732936859, + "step": 3200 + }, + { + "epoch": 6.046291922531885, + "eval_dpo_loss": 0.7086135149002075, + "eval_logits": -1.1912150382995605, + "eval_logps": -93.06739044189453, + "eval_loss": 0.34175461530685425, + "eval_objective": 0.34017154574394226, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5181159377098083, + "eval_regularize": 0.2693101763725281, + "eval_runtime": 157.8644, + "eval_samples_per_second": 36.677, + "eval_steps_per_second": 3.06, + "step": 3200 + }, + { + "dpo_loss": 0.6970037221908569, + "epoch": 6.055739253660841, + "grad_norm": 65.08813909383588, + "learning_rate": 2.0301856245788965e-07, + "logits": -1.2222968339920044, + "logps": -86.71208190917969, + "loss": 0.1466, + "objective": 0.14986823499202728, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5375000238418579, + "regularize": 0.08016785979270935, + "step": 3205 + }, + { + "dpo_loss": 0.6925697922706604, + "epoch": 6.065186584789797, + "grad_norm": 67.16033624454055, + "learning_rate": 2.0220869055160998e-07, + "logits": -1.1997841596603394, + "logps": -87.62628173828125, + "loss": 0.1476, + "objective": 0.1423240303993225, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.46875, + "regularize": 0.07306705415248871, + "step": 3210 + }, + { + "dpo_loss": 0.6911340355873108, + "epoch": 6.074633915918753, + "grad_norm": 69.8806686059364, + "learning_rate": 2.0139933887117886e-07, + "logits": -1.2009985446929932, + "logps": -86.01728057861328, + "loss": 0.1462, + "objective": 0.148556187748909, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.07944278419017792, + "step": 3215 + }, + { + "dpo_loss": 0.6944746375083923, + "epoch": 6.084081247047709, + "grad_norm": 70.31322329461948, + "learning_rate": 2.0059051622668456e-07, + "logits": -1.1811740398406982, + "logps": -89.98876190185547, + "loss": 0.1503, + "objective": 0.15364637970924377, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.08419889211654663, + "step": 3220 + }, + { + "dpo_loss": 0.6908897161483765, + "epoch": 6.093528578176665, + "grad_norm": 65.60153658686136, + "learning_rate": 1.9978223142245707e-07, + "logits": -1.1639667749404907, + "logps": -87.06336975097656, + "loss": 0.1469, + "objective": 0.14311951398849487, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.48124998807907104, + "regularize": 0.07403053343296051, + "step": 3225 + }, + { + "dpo_loss": 0.6866973638534546, + "epoch": 6.102975909305621, + "grad_norm": 65.29973630577604, + "learning_rate": 1.989744932569714e-07, + "logits": -1.2155070304870605, + "logps": -85.58670043945312, + "loss": 0.1466, + "objective": 0.14489760994911194, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.07622788101434708, + "step": 3230 + }, + { + "dpo_loss": 0.6991506218910217, + "epoch": 6.112423240434577, + "grad_norm": 76.05159682239864, + "learning_rate": 1.9816731052275233e-07, + "logits": -1.231654167175293, + "logps": -88.26829528808594, + "loss": 0.1494, + "objective": 0.15017834305763245, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.08026325702667236, + "step": 3235 + }, + { + "dpo_loss": 0.6891576647758484, + "epoch": 6.121870571563533, + "grad_norm": 70.81562644042008, + "learning_rate": 1.973606920062786e-07, + "logits": -1.22501802444458, + "logps": -89.1203842163086, + "loss": 0.1463, + "objective": 0.1460135132074356, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.4749999940395355, + "regularize": 0.07709775865077972, + "step": 3240 + }, + { + "dpo_loss": 0.6927801370620728, + "epoch": 6.13131790269249, + "grad_norm": 65.23971186277288, + "learning_rate": 1.965546464878871e-07, + "logits": -1.1830025911331177, + "logps": -88.51461791992188, + "loss": 0.1431, + "objective": 0.14562612771987915, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.07634811103343964, + "step": 3245 + }, + { + "dpo_loss": 0.688566267490387, + "epoch": 6.140765233821446, + "grad_norm": 67.00376402860445, + "learning_rate": 1.957491827416777e-07, + "logits": -1.1757619380950928, + "logps": -88.70887756347656, + "loss": 0.1479, + "objective": 0.16261467337608337, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.09375804662704468, + "step": 3250 + }, + { + "epoch": 6.140765233821446, + "eval_dpo_loss": 0.7078864574432373, + "eval_logits": -1.1883435249328613, + "eval_logps": -93.16507720947266, + "eval_loss": 0.34374916553497314, + "eval_objective": 0.34232527017593384, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.52173912525177, + "eval_regularize": 0.27153661847114563, + "eval_runtime": 159.3838, + "eval_samples_per_second": 36.327, + "eval_steps_per_second": 3.03, + "step": 3250 + }, + { + "dpo_loss": 0.6872513294219971, + "epoch": 6.150212564950402, + "grad_norm": 70.56196265247418, + "learning_rate": 1.9494430953541719e-07, + "logits": -1.215637445449829, + "logps": -87.37667083740234, + "loss": 0.1434, + "objective": 0.13861486315727234, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.06988973170518875, + "step": 3255 + }, + { + "dpo_loss": 0.6876817941665649, + "epoch": 6.159659896079358, + "grad_norm": 67.32751350607244, + "learning_rate": 1.9414003563044401e-07, + "logits": -1.2061582803726196, + "logps": -87.83901977539062, + "loss": 0.1417, + "objective": 0.15059073269367218, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.08182253688573837, + "step": 3260 + }, + { + "dpo_loss": 0.6937980651855469, + "epoch": 6.169107227208314, + "grad_norm": 78.19000031864657, + "learning_rate": 1.9333636978157363e-07, + "logits": -1.1729189157485962, + "logps": -88.02546691894531, + "loss": 0.1467, + "objective": 0.1545541137456894, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.543749988079071, + "regularize": 0.08517428487539291, + "step": 3265 + }, + { + "dpo_loss": 0.6834089756011963, + "epoch": 6.17855455833727, + "grad_norm": 65.86655539594295, + "learning_rate": 1.9253332073700193e-07, + "logits": -1.2429250478744507, + "logps": -87.91304016113281, + "loss": 0.1404, + "objective": 0.14111605286598206, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.07277516275644302, + "step": 3270 + }, + { + "dpo_loss": 0.6903216242790222, + "epoch": 6.188001889466226, + "grad_norm": 73.07389560054459, + "learning_rate": 1.9173089723821087e-07, + "logits": -1.1890922784805298, + "logps": -89.6094970703125, + "loss": 0.1426, + "objective": 0.14552678167819977, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.07649461925029755, + "step": 3275 + }, + { + "dpo_loss": 0.6901880502700806, + "epoch": 6.197449220595182, + "grad_norm": 69.12858727264873, + "learning_rate": 1.9092910801987324e-07, + "logits": -1.1704219579696655, + "logps": -87.91764831542969, + "loss": 0.147, + "objective": 0.14026370644569397, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.07124490290880203, + "step": 3280 + }, + { + "dpo_loss": 0.6826928853988647, + "epoch": 6.206896551724138, + "grad_norm": 66.9720371973547, + "learning_rate": 1.9012796180975726e-07, + "logits": -1.1560781002044678, + "logps": -89.125732421875, + "loss": 0.144, + "objective": 0.13750192523002625, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.06923265755176544, + "step": 3285 + }, + { + "dpo_loss": 0.684235155582428, + "epoch": 6.216343882853094, + "grad_norm": 78.42657030667156, + "learning_rate": 1.8932746732863196e-07, + "logits": -1.2505360841751099, + "logps": -87.80974578857422, + "loss": 0.1487, + "objective": 0.15419574081897736, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.53125, + "regularize": 0.08577221632003784, + "step": 3290 + }, + { + "dpo_loss": 0.6891018152236938, + "epoch": 6.22579121398205, + "grad_norm": 69.4336784183768, + "learning_rate": 1.8852763329017186e-07, + "logits": -1.182964563369751, + "logps": -87.61400604248047, + "loss": 0.1426, + "objective": 0.14548328518867493, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.07657310366630554, + "step": 3295 + }, + { + "dpo_loss": 0.6955987811088562, + "epoch": 6.235238545111006, + "grad_norm": 65.24436277654908, + "learning_rate": 1.877284684008625e-07, + "logits": -1.2475354671478271, + "logps": -86.30474090576172, + "loss": 0.1408, + "objective": 0.13629359006881714, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.0667337030172348, + "step": 3300 + }, + { + "epoch": 6.235238545111006, + "eval_dpo_loss": 0.7074025273323059, + "eval_logits": -1.182108998298645, + "eval_logps": -93.40288543701172, + "eval_loss": 0.34269240498542786, + "eval_objective": 0.3404931426048279, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.26975294947624207, + "eval_runtime": 157.5057, + "eval_samples_per_second": 36.761, + "eval_steps_per_second": 3.067, + "step": 3300 + }, + { + "dpo_loss": 0.6913751363754272, + "epoch": 6.244685876239962, + "grad_norm": 70.40922944532815, + "learning_rate": 1.8692998135990552e-07, + "logits": -1.1527745723724365, + "logps": -88.61917114257812, + "loss": 0.1391, + "objective": 0.14121171832084656, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.4937500059604645, + "regularize": 0.07207418978214264, + "step": 3305 + }, + { + "dpo_loss": 0.6933282613754272, + "epoch": 6.254133207368918, + "grad_norm": 64.18766718774896, + "learning_rate": 1.8613218085912363e-07, + "logits": -1.2052415609359741, + "logps": -87.96755981445312, + "loss": 0.1439, + "objective": 0.14554929733276367, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.07621648907661438, + "step": 3310 + }, + { + "dpo_loss": 0.6829043030738831, + "epoch": 6.263580538497874, + "grad_norm": 66.70699808027437, + "learning_rate": 1.8533507558286666e-07, + "logits": -1.1629126071929932, + "logps": -85.55716705322266, + "loss": 0.1414, + "objective": 0.13286006450653076, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.0645696222782135, + "step": 3315 + }, + { + "dpo_loss": 0.6907526254653931, + "epoch": 6.27302786962683, + "grad_norm": 64.95029785368621, + "learning_rate": 1.8453867420791635e-07, + "logits": -1.217167854309082, + "logps": -87.27310943603516, + "loss": 0.1431, + "objective": 0.13572126626968384, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.06664600968360901, + "step": 3320 + }, + { + "dpo_loss": 0.6905287504196167, + "epoch": 6.282475200755787, + "grad_norm": 63.119014952755904, + "learning_rate": 1.8374298540339256e-07, + "logits": -1.142101526260376, + "logps": -86.95668029785156, + "loss": 0.1372, + "objective": 0.13219018280506134, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.06313730776309967, + "step": 3325 + }, + { + "dpo_loss": 0.6907231211662292, + "epoch": 6.291922531884743, + "grad_norm": 70.32560354245551, + "learning_rate": 1.8294801783065828e-07, + "logits": -1.243328332901001, + "logps": -89.5649642944336, + "loss": 0.1386, + "objective": 0.1380024254322052, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.06893011927604675, + "step": 3330 + }, + { + "dpo_loss": 0.6874068975448608, + "epoch": 6.301369863013699, + "grad_norm": 71.44456692823061, + "learning_rate": 1.8215378014322557e-07, + "logits": -1.1567237377166748, + "logps": -88.77133178710938, + "loss": 0.1371, + "objective": 0.1324915885925293, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.06375088542699814, + "step": 3335 + }, + { + "dpo_loss": 0.6807462573051453, + "epoch": 6.310817194142655, + "grad_norm": 68.66265924559801, + "learning_rate": 1.8136028098666187e-07, + "logits": -1.1961827278137207, + "logps": -88.57237243652344, + "loss": 0.1372, + "objective": 0.1379082351922989, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.543749988079071, + "regularize": 0.06983359158039093, + "step": 3340 + }, + { + "dpo_loss": 0.6893335580825806, + "epoch": 6.320264525271611, + "grad_norm": 70.97693865652336, + "learning_rate": 1.8056752899849503e-07, + "logits": -1.1134297847747803, + "logps": -86.894287109375, + "loss": 0.1414, + "objective": 0.1386771947145462, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.45625001192092896, + "regularize": 0.06974383443593979, + "step": 3345 + }, + { + "dpo_loss": 0.6851335167884827, + "epoch": 6.329711856400567, + "grad_norm": 67.57923187365041, + "learning_rate": 1.7977553280811975e-07, + "logits": -1.1563271284103394, + "logps": -88.98851013183594, + "loss": 0.1475, + "objective": 0.1491604745388031, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.08064713329076767, + "step": 3350 + }, + { + "epoch": 6.329711856400567, + "eval_dpo_loss": 0.7078341841697693, + "eval_logits": -1.1855790615081787, + "eval_logps": -93.60319519042969, + "eval_loss": 0.34008708596229553, + "eval_objective": 0.3383205533027649, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5191511511802673, + "eval_regularize": 0.2675371468067169, + "eval_runtime": 157.0694, + "eval_samples_per_second": 36.863, + "eval_steps_per_second": 3.075, + "step": 3350 + }, + { + "dpo_loss": 0.686167299747467, + "epoch": 6.339159187529523, + "grad_norm": 69.56892331213925, + "learning_rate": 1.7898430103670373e-07, + "logits": -1.1535394191741943, + "logps": -87.93611907958984, + "loss": 0.1388, + "objective": 0.13828575611114502, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.06966903805732727, + "step": 3355 + }, + { + "dpo_loss": 0.6901485323905945, + "epoch": 6.348606518658479, + "grad_norm": 70.11794207823513, + "learning_rate": 1.7819384229709355e-07, + "logits": -1.162483811378479, + "logps": -88.49964904785156, + "loss": 0.1419, + "objective": 0.14245642721652985, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.46875, + "regularize": 0.07344158738851547, + "step": 3360 + }, + { + "dpo_loss": 0.6842073798179626, + "epoch": 6.358053849787435, + "grad_norm": 72.89111653782912, + "learning_rate": 1.7740416519372126e-07, + "logits": -1.1979695558547974, + "logps": -87.9147720336914, + "loss": 0.1353, + "objective": 0.14146491885185242, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.07304418087005615, + "step": 3365 + }, + { + "dpo_loss": 0.6911894083023071, + "epoch": 6.367501180916391, + "grad_norm": 64.23327954770339, + "learning_rate": 1.7661527832251023e-07, + "logits": -1.2230565547943115, + "logps": -88.17019653320312, + "loss": 0.142, + "objective": 0.14444653689861298, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4312500059604645, + "regularize": 0.07532760500907898, + "step": 3370 + }, + { + "dpo_loss": 0.6906020641326904, + "epoch": 6.376948512045347, + "grad_norm": 68.69602710927053, + "learning_rate": 1.7582719027078206e-07, + "logits": -1.212783932685852, + "logps": -87.7943344116211, + "loss": 0.137, + "objective": 0.13813506066799164, + "ranking_idealized": 0.6312500238418579, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.06907486170530319, + "step": 3375 + }, + { + "dpo_loss": 0.6869951486587524, + "epoch": 6.386395843174303, + "grad_norm": 68.40435028508888, + "learning_rate": 1.7503990961716303e-07, + "logits": -1.1598793268203735, + "logps": -86.40039825439453, + "loss": 0.1404, + "objective": 0.1395477056503296, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.07084819674491882, + "step": 3380 + }, + { + "dpo_loss": 0.6884499788284302, + "epoch": 6.395843174303259, + "grad_norm": 64.66069456653081, + "learning_rate": 1.7425344493149025e-07, + "logits": -1.162398338317871, + "logps": -88.71234130859375, + "loss": 0.1368, + "objective": 0.13811743259429932, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.06927243620157242, + "step": 3385 + }, + { + "dpo_loss": 0.6859569549560547, + "epoch": 6.405290505432215, + "grad_norm": 66.24542313878766, + "learning_rate": 1.7346780477471897e-07, + "logits": -1.166072130203247, + "logps": -87.08891296386719, + "loss": 0.1329, + "objective": 0.13712583482265472, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.06853015720844269, + "step": 3390 + }, + { + "dpo_loss": 0.6902660131454468, + "epoch": 6.414737836561171, + "grad_norm": 65.71480604448934, + "learning_rate": 1.7268299769882905e-07, + "logits": -1.1450494527816772, + "logps": -87.38763427734375, + "loss": 0.135, + "objective": 0.136188805103302, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.06716220825910568, + "step": 3395 + }, + { + "dpo_loss": 0.6831803321838379, + "epoch": 6.424185167690127, + "grad_norm": 74.21086801419243, + "learning_rate": 1.7189903224673205e-07, + "logits": -1.257408857345581, + "logps": -90.4032974243164, + "loss": 0.1339, + "objective": 0.13736803829669952, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.06904999911785126, + "step": 3400 + }, + { + "epoch": 6.424185167690127, + "eval_dpo_loss": 0.7081868648529053, + "eval_logits": -1.1891323328018188, + "eval_logps": -93.52287292480469, + "eval_loss": 0.34146103262901306, + "eval_objective": 0.34015777707099915, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.26933905482292175, + "eval_runtime": 162.0893, + "eval_samples_per_second": 35.721, + "eval_steps_per_second": 2.98, + "step": 3400 + }, + { + "dpo_loss": 0.6947253942489624, + "epoch": 6.433632498819083, + "grad_norm": 63.6629594252536, + "learning_rate": 1.7111591695217803e-07, + "logits": -1.2772033214569092, + "logps": -88.42134857177734, + "loss": 0.1367, + "objective": 0.13918960094451904, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.06971704959869385, + "step": 3405 + }, + { + "dpo_loss": 0.6906223297119141, + "epoch": 6.44307982994804, + "grad_norm": 69.0498878453693, + "learning_rate": 1.7033366033966273e-07, + "logits": -1.1488436460494995, + "logps": -87.87260437011719, + "loss": 0.137, + "objective": 0.1409592628479004, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.07189702987670898, + "step": 3410 + }, + { + "dpo_loss": 0.6856287121772766, + "epoch": 6.452527161076996, + "grad_norm": 72.65038733274254, + "learning_rate": 1.6955227092433511e-07, + "logits": -1.1925437450408936, + "logps": -88.14703369140625, + "loss": 0.1418, + "objective": 0.13947254419326782, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5, + "regularize": 0.07090966403484344, + "step": 3415 + }, + { + "dpo_loss": 0.6915432214736938, + "epoch": 6.461974492205952, + "grad_norm": 68.73931631536732, + "learning_rate": 1.6877175721190413e-07, + "logits": -1.2278176546096802, + "logps": -90.1395263671875, + "loss": 0.1422, + "objective": 0.14660508930683136, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4375, + "regularize": 0.07745076715946198, + "step": 3420 + }, + { + "dpo_loss": 0.6973224878311157, + "epoch": 6.471421823334908, + "grad_norm": 64.82032182916474, + "learning_rate": 1.679921276985464e-07, + "logits": -1.1448619365692139, + "logps": -86.83024597167969, + "loss": 0.1369, + "objective": 0.1359107941389084, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.06617854535579681, + "step": 3425 + }, + { + "dpo_loss": 0.6940586566925049, + "epoch": 6.480869154463864, + "grad_norm": 66.38959885778102, + "learning_rate": 1.672133908708138e-07, + "logits": -1.1017616987228394, + "logps": -85.83534240722656, + "loss": 0.1311, + "objective": 0.13164441287517548, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.062238551676273346, + "step": 3430 + }, + { + "dpo_loss": 0.6849575042724609, + "epoch": 6.49031648559282, + "grad_norm": 62.103826503006154, + "learning_rate": 1.6643555520554098e-07, + "logits": -1.2360210418701172, + "logps": -88.12874603271484, + "loss": 0.1367, + "objective": 0.13433274626731873, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.6000000238418579, + "ranking_simple": 0.606249988079071, + "regularize": 0.06583701074123383, + "step": 3435 + }, + { + "dpo_loss": 0.6873617172241211, + "epoch": 6.499763816721776, + "grad_norm": 68.82750339484012, + "learning_rate": 1.6565862916975307e-07, + "logits": -1.160133719444275, + "logps": -86.71800231933594, + "loss": 0.1419, + "objective": 0.14208652079105377, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.07335034757852554, + "step": 3440 + }, + { + "dpo_loss": 0.6891940236091614, + "epoch": 6.509211147850732, + "grad_norm": 74.40728288667519, + "learning_rate": 1.648826212205735e-07, + "logits": -1.2320117950439453, + "logps": -88.15089416503906, + "loss": 0.14, + "objective": 0.1433391571044922, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.07441975176334381, + "step": 3445 + }, + { + "dpo_loss": 0.6927613615989685, + "epoch": 6.518658478979688, + "grad_norm": 66.771754638497, + "learning_rate": 1.6410753980513208e-07, + "logits": -1.1595888137817383, + "logps": -89.47431945800781, + "loss": 0.1394, + "objective": 0.14031726121902466, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.07104112207889557, + "step": 3450 + }, + { + "epoch": 6.518658478979688, + "eval_dpo_loss": 0.7083378434181213, + "eval_logits": -1.1958953142166138, + "eval_logps": -94.05181884765625, + "eval_loss": 0.3397524952888489, + "eval_objective": 0.33791953325271606, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5186335444450378, + "eval_regularize": 0.2670857906341553, + "eval_runtime": 155.9657, + "eval_samples_per_second": 37.124, + "eval_steps_per_second": 3.097, + "step": 3450 + }, + { + "dpo_loss": 0.6850086450576782, + "epoch": 6.528105810108644, + "grad_norm": 70.24192103411116, + "learning_rate": 1.633333933604731e-07, + "logits": -1.2264412641525269, + "logps": -90.35914611816406, + "loss": 0.1381, + "objective": 0.13573698699474335, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.06723614037036896, + "step": 3455 + }, + { + "dpo_loss": 0.6905264854431152, + "epoch": 6.5375531412376, + "grad_norm": 66.32091148253436, + "learning_rate": 1.6256019031346301e-07, + "logits": -1.188407301902771, + "logps": -86.26610565185547, + "loss": 0.1352, + "objective": 0.1351533830165863, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.06610073894262314, + "step": 3460 + }, + { + "dpo_loss": 0.6922488212585449, + "epoch": 6.547000472366556, + "grad_norm": 69.0551628405319, + "learning_rate": 1.6178793908069938e-07, + "logits": -1.2067480087280273, + "logps": -89.35780334472656, + "loss": 0.1331, + "objective": 0.1331738531589508, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.06394897401332855, + "step": 3465 + }, + { + "dpo_loss": 0.6851745843887329, + "epoch": 6.556447803495512, + "grad_norm": 68.33813577635021, + "learning_rate": 1.6101664806841857e-07, + "logits": -1.151658296585083, + "logps": -88.51567077636719, + "loss": 0.135, + "objective": 0.13486088812351227, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.06634342670440674, + "step": 3470 + }, + { + "dpo_loss": 0.6871123909950256, + "epoch": 6.565895134624468, + "grad_norm": 63.68925248136598, + "learning_rate": 1.60246325672405e-07, + "logits": -1.2574050426483154, + "logps": -88.74208068847656, + "loss": 0.131, + "objective": 0.13215181231498718, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.06344055384397507, + "step": 3475 + }, + { + "dpo_loss": 0.6891454458236694, + "epoch": 6.575342465753424, + "grad_norm": 65.67103831685853, + "learning_rate": 1.59476980277899e-07, + "logits": -1.193644404411316, + "logps": -87.88640594482422, + "loss": 0.1312, + "objective": 0.1272941380739212, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.05837959796190262, + "step": 3480 + }, + { + "dpo_loss": 0.68227618932724, + "epoch": 6.584789796882381, + "grad_norm": 69.3694590300883, + "learning_rate": 1.5870862025950595e-07, + "logits": -1.2936718463897705, + "logps": -87.17106628417969, + "loss": 0.1321, + "objective": 0.12874533236026764, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5625, + "regularize": 0.06051770970225334, + "step": 3485 + }, + { + "dpo_loss": 0.6884613037109375, + "epoch": 6.594237128011336, + "grad_norm": 66.43795002581034, + "learning_rate": 1.579412539811053e-07, + "logits": -1.232988715171814, + "logps": -89.5428695678711, + "loss": 0.1353, + "objective": 0.13002143800258636, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.061175305396318436, + "step": 3490 + }, + { + "dpo_loss": 0.6869865655899048, + "epoch": 6.603684459140293, + "grad_norm": 67.64239996395119, + "learning_rate": 1.571748897957591e-07, + "logits": -1.1515171527862549, + "logps": -87.28607940673828, + "loss": 0.1296, + "objective": 0.12666872143745422, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.05797005817294121, + "step": 3495 + }, + { + "dpo_loss": 0.6819984316825867, + "epoch": 6.613131790269249, + "grad_norm": 68.22980092850779, + "learning_rate": 1.5640953604562113e-07, + "logits": -1.1667726039886475, + "logps": -87.58833312988281, + "loss": 0.1324, + "objective": 0.13020966947078705, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.46875, + "regularize": 0.06200982257723808, + "step": 3500 + }, + { + "epoch": 6.613131790269249, + "eval_dpo_loss": 0.707497775554657, + "eval_logits": -1.1836313009262085, + "eval_logps": -93.94660949707031, + "eval_loss": 0.34014692902565, + "eval_objective": 0.33894920349121094, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5191511511802673, + "eval_regularize": 0.26819944381713867, + "eval_runtime": 155.8735, + "eval_samples_per_second": 37.145, + "eval_steps_per_second": 3.099, + "step": 3500 + }, + { + "dpo_loss": 0.6908973455429077, + "epoch": 6.622579121398205, + "grad_norm": 72.27666665513064, + "learning_rate": 1.5564520106184643e-07, + "logits": -1.1951795816421509, + "logps": -89.80879211425781, + "loss": 0.1314, + "objective": 0.12301365286111832, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48750001192092896, + "regularize": 0.05392392352223396, + "step": 3505 + }, + { + "dpo_loss": 0.68874591588974, + "epoch": 6.632026452527161, + "grad_norm": 70.12917103624659, + "learning_rate": 1.5488189316450018e-07, + "logits": -1.3211259841918945, + "logps": -87.243896484375, + "loss": 0.1322, + "objective": 0.13392779231071472, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.06505317986011505, + "step": 3510 + }, + { + "dpo_loss": 0.6867557168006897, + "epoch": 6.641473783656117, + "grad_norm": 67.28560339736524, + "learning_rate": 1.5411962066246765e-07, + "logits": -1.1426551342010498, + "logps": -87.66487121582031, + "loss": 0.1324, + "objective": 0.12283537536859512, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.46875, + "regularize": 0.05415979027748108, + "step": 3515 + }, + { + "dpo_loss": 0.6930004954338074, + "epoch": 6.650921114785073, + "grad_norm": 71.83681748999514, + "learning_rate": 1.533583918533631e-07, + "logits": -1.1357409954071045, + "logps": -89.56889343261719, + "loss": 0.1353, + "objective": 0.13924241065979004, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.6499999761581421, + "ranking_simple": 0.6499999761581421, + "regularize": 0.06994234025478363, + "step": 3520 + }, + { + "dpo_loss": 0.6901943683624268, + "epoch": 6.660368445914029, + "grad_norm": 63.771756125302865, + "learning_rate": 1.5259821502344004e-07, + "logits": -1.1956990957260132, + "logps": -86.81988525390625, + "loss": 0.1325, + "objective": 0.13418355584144592, + "ranking_idealized": 0.4437499940395355, + "ranking_idealized_expo": 0.4124999940395355, + "ranking_simple": 0.41874998807907104, + "regularize": 0.06516411155462265, + "step": 3525 + }, + { + "dpo_loss": 0.68889981508255, + "epoch": 6.669815777042985, + "grad_norm": 65.86146652055191, + "learning_rate": 1.5183909844750085e-07, + "logits": -1.2661304473876953, + "logps": -88.42591857910156, + "loss": 0.1301, + "objective": 0.13600081205368042, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.543749988079071, + "regularize": 0.0671108216047287, + "step": 3530 + }, + { + "dpo_loss": 0.6970779895782471, + "epoch": 6.679263108171941, + "grad_norm": 73.55635902035519, + "learning_rate": 1.510810503888064e-07, + "logits": -1.1741832494735718, + "logps": -89.119873046875, + "loss": 0.1362, + "objective": 0.13686904311180115, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48124998807907104, + "regularize": 0.06716124713420868, + "step": 3535 + }, + { + "dpo_loss": 0.690667986869812, + "epoch": 6.688710439300897, + "grad_norm": 68.09799154479465, + "learning_rate": 1.503240790989867e-07, + "logits": -1.2058817148208618, + "logps": -89.58091735839844, + "loss": 0.1327, + "objective": 0.13466203212738037, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.06559522449970245, + "step": 3540 + }, + { + "dpo_loss": 0.6922726035118103, + "epoch": 6.698157770429853, + "grad_norm": 71.11909965177945, + "learning_rate": 1.4956819281795038e-07, + "logits": -1.1305687427520752, + "logps": -87.77249145507812, + "loss": 0.1278, + "objective": 0.1287274956703186, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5249999761581421, + "regularize": 0.059500228613615036, + "step": 3545 + }, + { + "dpo_loss": 0.6947690844535828, + "epoch": 6.707605101558809, + "grad_norm": 69.61974547667347, + "learning_rate": 1.4881339977379564e-07, + "logits": -1.199467420578003, + "logps": -89.92427825927734, + "loss": 0.1385, + "objective": 0.14292028546333313, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.07344337552785873, + "step": 3550 + }, + { + "epoch": 6.707605101558809, + "eval_dpo_loss": 0.707977294921875, + "eval_logits": -1.1866289377212524, + "eval_logps": -93.62445831298828, + "eval_loss": 0.34494805335998535, + "eval_objective": 0.3436737358570099, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.27287599444389343, + "eval_runtime": 156.8242, + "eval_samples_per_second": 36.92, + "eval_steps_per_second": 3.08, + "step": 3550 + }, + { + "dpo_loss": 0.6931701898574829, + "epoch": 6.717052432687765, + "grad_norm": 70.05227121284707, + "learning_rate": 1.480597081827203e-07, + "logits": -1.2326997518539429, + "logps": -88.47614288330078, + "loss": 0.1266, + "objective": 0.12746387720108032, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.05814685299992561, + "step": 3555 + }, + { + "dpo_loss": 0.6866840124130249, + "epoch": 6.726499763816721, + "grad_norm": 65.862462651802, + "learning_rate": 1.473071262489322e-07, + "logits": -1.1386873722076416, + "logps": -88.5678482055664, + "loss": 0.1302, + "objective": 0.13396327197551727, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48124998807907104, + "regularize": 0.06529487669467926, + "step": 3560 + }, + { + "dpo_loss": 0.6916936635971069, + "epoch": 6.7359470949456774, + "grad_norm": 70.79260627546033, + "learning_rate": 1.465556621645607e-07, + "logits": -1.1299726963043213, + "logps": -86.30937194824219, + "loss": 0.1267, + "objective": 0.1276959329843521, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.05852656438946724, + "step": 3565 + }, + { + "dpo_loss": 0.6887668371200562, + "epoch": 6.745394426074634, + "grad_norm": 69.13953631290167, + "learning_rate": 1.4580532410956658e-07, + "logits": -1.2006984949111938, + "logps": -88.484130859375, + "loss": 0.132, + "objective": 0.13525152206420898, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.06637485325336456, + "step": 3570 + }, + { + "dpo_loss": 0.6862061619758606, + "epoch": 6.75484175720359, + "grad_norm": 69.38531825951776, + "learning_rate": 1.4505612025165347e-07, + "logits": -1.2285568714141846, + "logps": -89.47730255126953, + "loss": 0.1287, + "objective": 0.1284267008304596, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.05980608984827995, + "step": 3575 + }, + { + "dpo_loss": 0.6938058137893677, + "epoch": 6.764289088332546, + "grad_norm": 73.48578648094887, + "learning_rate": 1.4430805874617882e-07, + "logits": -1.1846771240234375, + "logps": -87.36907196044922, + "loss": 0.1292, + "objective": 0.13662242889404297, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.06724182516336441, + "step": 3580 + }, + { + "dpo_loss": 0.6926447153091431, + "epoch": 6.773736419461502, + "grad_norm": 70.9810049729385, + "learning_rate": 1.4356114773606515e-07, + "logits": -1.260349988937378, + "logps": -89.3043212890625, + "loss": 0.133, + "objective": 0.12815634906291962, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.05889187008142471, + "step": 3585 + }, + { + "dpo_loss": 0.6904715299606323, + "epoch": 6.783183750590458, + "grad_norm": 68.29513245816993, + "learning_rate": 1.4281539535171138e-07, + "logits": -1.1519930362701416, + "logps": -87.79924011230469, + "loss": 0.129, + "objective": 0.12700645625591278, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.574999988079071, + "regularize": 0.05795930698513985, + "step": 3590 + }, + { + "dpo_loss": 0.6834983825683594, + "epoch": 6.792631081719414, + "grad_norm": 67.53864839379757, + "learning_rate": 1.420708097109047e-07, + "logits": -1.2150757312774658, + "logps": -87.30172729492188, + "loss": 0.1318, + "objective": 0.1265515387058258, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.46875, + "regularize": 0.058201707899570465, + "step": 3595 + }, + { + "dpo_loss": 0.6890872716903687, + "epoch": 6.8020784128483704, + "grad_norm": 63.489388278693234, + "learning_rate": 1.4132739891873124e-07, + "logits": -1.1841628551483154, + "logps": -87.87871551513672, + "loss": 0.1289, + "objective": 0.12770512700080872, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.05879638344049454, + "step": 3600 + }, + { + "epoch": 6.8020784128483704, + "eval_dpo_loss": 0.708841860294342, + "eval_logits": -1.1858102083206177, + "eval_logps": -93.84819793701172, + "eval_loss": 0.3432690501213074, + "eval_objective": 0.341215580701828, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5191511511802673, + "eval_regularize": 0.27033141255378723, + "eval_runtime": 156.5964, + "eval_samples_per_second": 36.974, + "eval_steps_per_second": 3.084, + "step": 3600 + }, + { + "dpo_loss": 0.6896633505821228, + "epoch": 6.8115257439773265, + "grad_norm": 66.40829611486825, + "learning_rate": 1.4058517106748915e-07, + "logits": -1.1672431230545044, + "logps": -87.52486419677734, + "loss": 0.1277, + "objective": 0.1254403442144394, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.05647401884198189, + "step": 3605 + }, + { + "dpo_loss": 0.6861889958381653, + "epoch": 6.8209730751062825, + "grad_norm": 72.87149287628974, + "learning_rate": 1.398441342365994e-07, + "logits": -1.257767915725708, + "logps": -88.41429138183594, + "loss": 0.1232, + "objective": 0.12373647838830948, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.055117569863796234, + "step": 3610 + }, + { + "dpo_loss": 0.6913567781448364, + "epoch": 6.8304204062352385, + "grad_norm": 73.82859446285208, + "learning_rate": 1.391042964925183e-07, + "logits": -1.2193915843963623, + "logps": -89.33489227294922, + "loss": 0.127, + "objective": 0.12400223314762115, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5625, + "regularize": 0.05486655235290527, + "step": 3615 + }, + { + "dpo_loss": 0.6929974555969238, + "epoch": 6.8398677373641945, + "grad_norm": 66.78663032939671, + "learning_rate": 1.3836566588865e-07, + "logits": -1.1925121545791626, + "logps": -88.87205505371094, + "loss": 0.1268, + "objective": 0.12733788788318634, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.581250011920929, + "regularize": 0.058038126677274704, + "step": 3620 + }, + { + "dpo_loss": 0.6925061345100403, + "epoch": 6.8493150684931505, + "grad_norm": 68.91716941519066, + "learning_rate": 1.3762825046525802e-07, + "logits": -1.1834360361099243, + "logps": -88.37583923339844, + "loss": 0.1246, + "objective": 0.1241520419716835, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.05490143224596977, + "step": 3625 + }, + { + "dpo_loss": 0.6956363916397095, + "epoch": 6.8587623996221065, + "grad_norm": 64.49188844038778, + "learning_rate": 1.3689205824937855e-07, + "logits": -1.1987342834472656, + "logps": -86.22975158691406, + "loss": 0.1271, + "objective": 0.12109167873859406, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4312500059604645, + "regularize": 0.05152803659439087, + "step": 3630 + }, + { + "dpo_loss": 0.6963600516319275, + "epoch": 6.868209730751063, + "grad_norm": 70.9120124443984, + "learning_rate": 1.361570972547324e-07, + "logits": -1.1320016384124756, + "logps": -89.20863342285156, + "loss": 0.1295, + "objective": 0.13553881645202637, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5, + "regularize": 0.06590281426906586, + "step": 3635 + }, + { + "dpo_loss": 0.6899456977844238, + "epoch": 6.877657061880019, + "grad_norm": 67.98797208533495, + "learning_rate": 1.3542337548163854e-07, + "logits": -1.1944100856781006, + "logps": -90.85575866699219, + "loss": 0.1297, + "objective": 0.1339595913887024, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.06496501713991165, + "step": 3640 + }, + { + "dpo_loss": 0.6944170594215393, + "epoch": 6.887104393008975, + "grad_norm": 68.58400023807916, + "learning_rate": 1.3469090091692606e-07, + "logits": -1.2223891019821167, + "logps": -90.43609619140625, + "loss": 0.1277, + "objective": 0.12617693841457367, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.05673524737358093, + "step": 3645 + }, + { + "dpo_loss": 0.6873482465744019, + "epoch": 6.896551724137931, + "grad_norm": 69.87269982347813, + "learning_rate": 1.3395968153384818e-07, + "logits": -1.1659064292907715, + "logps": -87.1473159790039, + "loss": 0.1272, + "objective": 0.12711526453495026, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.05838043615221977, + "step": 3650 + }, + { + "epoch": 6.896551724137931, + "eval_dpo_loss": 0.7080458998680115, + "eval_logits": -1.1978520154953003, + "eval_logps": -93.93708038330078, + "eval_loss": 0.34308624267578125, + "eval_objective": 0.3417006731033325, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2708960771560669, + "eval_runtime": 155.0927, + "eval_samples_per_second": 37.333, + "eval_steps_per_second": 3.114, + "step": 3650 + }, + { + "dpo_loss": 0.6883994340896606, + "epoch": 6.9059990552668875, + "grad_norm": 66.06237659948253, + "learning_rate": 1.3322972529199472e-07, + "logits": -1.1440255641937256, + "logps": -89.079345703125, + "loss": 0.132, + "objective": 0.1393580138683319, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.07051806151866913, + "step": 3655 + }, + { + "dpo_loss": 0.6803470849990845, + "epoch": 6.9154463863958435, + "grad_norm": 68.89901994571194, + "learning_rate": 1.3250104013720577e-07, + "logits": -1.0780181884765625, + "logps": -89.1418685913086, + "loss": 0.1257, + "objective": 0.13033224642276764, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.06229754164814949, + "step": 3660 + }, + { + "dpo_loss": 0.6918571591377258, + "epoch": 6.9248937175247995, + "grad_norm": 70.23465718773171, + "learning_rate": 1.3177363400148521e-07, + "logits": -1.2739157676696777, + "logps": -88.82063293457031, + "loss": 0.1235, + "objective": 0.12518411874771118, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.05599840357899666, + "step": 3665 + }, + { + "dpo_loss": 0.6917076110839844, + "epoch": 6.934341048653756, + "grad_norm": 71.12782015732581, + "learning_rate": 1.3104751480291448e-07, + "logits": -1.1866590976715088, + "logps": -89.13645935058594, + "loss": 0.1219, + "objective": 0.12391819804906845, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.054747432470321655, + "step": 3670 + }, + { + "dpo_loss": 0.6851991415023804, + "epoch": 6.943788379782712, + "grad_norm": 63.64952176646913, + "learning_rate": 1.30322690445566e-07, + "logits": -1.2462929487228394, + "logps": -88.42264556884766, + "loss": 0.1252, + "objective": 0.125773623585701, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.05725371837615967, + "step": 3675 + }, + { + "dpo_loss": 0.6953834295272827, + "epoch": 6.953235710911668, + "grad_norm": 66.21002613526767, + "learning_rate": 1.2959916881941755e-07, + "logits": -1.2305362224578857, + "logps": -88.08800506591797, + "loss": 0.1264, + "objective": 0.12478785216808319, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.46875, + "regularize": 0.05524950474500656, + "step": 3680 + }, + { + "dpo_loss": 0.6868912577629089, + "epoch": 6.962683042040624, + "grad_norm": 69.20229018358609, + "learning_rate": 1.2887695780026614e-07, + "logits": -1.263931393623352, + "logps": -86.4261703491211, + "loss": 0.1234, + "objective": 0.11907543987035751, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.05038632079958916, + "step": 3685 + }, + { + "dpo_loss": 0.6869106888771057, + "epoch": 6.97213037316958, + "grad_norm": 65.75821684775042, + "learning_rate": 1.2815606524964218e-07, + "logits": -1.2265689373016357, + "logps": -87.24744415283203, + "loss": 0.1259, + "objective": 0.12614509463310242, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.057454027235507965, + "step": 3690 + }, + { + "dpo_loss": 0.6816592216491699, + "epoch": 6.981577704298536, + "grad_norm": 68.4662770532532, + "learning_rate": 1.2743649901472446e-07, + "logits": -1.1392152309417725, + "logps": -87.72526550292969, + "loss": 0.1278, + "objective": 0.13349346816539764, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.06532754749059677, + "step": 3695 + }, + { + "dpo_loss": 0.6968778371810913, + "epoch": 6.991025035427492, + "grad_norm": 68.16552756051469, + "learning_rate": 1.2671826692825403e-07, + "logits": -1.206505537033081, + "logps": -90.31403350830078, + "loss": 0.125, + "objective": 0.1260785311460495, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.056390754878520966, + "step": 3700 + }, + { + "epoch": 6.991025035427492, + "eval_dpo_loss": 0.707888126373291, + "eval_logits": -1.1951963901519775, + "eval_logps": -93.96659088134766, + "eval_loss": 0.34364402294158936, + "eval_objective": 0.34248366951942444, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2716948688030243, + "eval_runtime": 155.8519, + "eval_samples_per_second": 37.151, + "eval_steps_per_second": 3.099, + "step": 3700 + }, + { + "dpo_loss": 0.692703366279602, + "epoch": 7.000472366556448, + "grad_norm": 65.88227214495035, + "learning_rate": 1.2600137680844928e-07, + "logits": -1.3077120780944824, + "logps": -89.81864166259766, + "loss": 0.1237, + "objective": 0.12311004102230072, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.05383969470858574, + "step": 3705 + }, + { + "dpo_loss": 0.691459059715271, + "epoch": 7.009919697685404, + "grad_norm": 69.68883276511424, + "learning_rate": 1.2528583645892088e-07, + "logits": -1.3056930303573608, + "logps": -87.62162780761719, + "loss": 0.1187, + "objective": 0.11696214973926544, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5562499761581421, + "regularize": 0.04781625419855118, + "step": 3710 + }, + { + "dpo_loss": 0.688200831413269, + "epoch": 7.01936702881436, + "grad_norm": 65.12006779525382, + "learning_rate": 1.245716536685866e-07, + "logits": -1.1382930278778076, + "logps": -88.21986389160156, + "loss": 0.1191, + "objective": 0.12016657739877701, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.606249988079071, + "ranking_simple": 0.612500011920929, + "regularize": 0.051346492022275925, + "step": 3715 + }, + { + "dpo_loss": 0.6875385046005249, + "epoch": 7.028814359943316, + "grad_norm": 66.59560630354164, + "learning_rate": 1.2385883621158694e-07, + "logits": -1.1439882516860962, + "logps": -88.45091247558594, + "loss": 0.1226, + "objective": 0.12256159633398056, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.05380775406956673, + "step": 3720 + }, + { + "dpo_loss": 0.6896174550056458, + "epoch": 7.038261691072272, + "grad_norm": 63.44555622200776, + "learning_rate": 1.2314739184720018e-07, + "logits": -1.2115542888641357, + "logps": -88.55656433105469, + "loss": 0.1227, + "objective": 0.1167091354727745, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5625, + "regularize": 0.04774738475680351, + "step": 3725 + }, + { + "dpo_loss": 0.6908005475997925, + "epoch": 7.047709022201228, + "grad_norm": 69.3967153206809, + "learning_rate": 1.2243732831975785e-07, + "logits": -1.17701256275177, + "logps": -89.33656311035156, + "loss": 0.1219, + "objective": 0.12276466190814972, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48750001192092896, + "regularize": 0.05368459224700928, + "step": 3730 + }, + { + "dpo_loss": 0.6931012868881226, + "epoch": 7.057156353330185, + "grad_norm": 68.56316233493952, + "learning_rate": 1.2172865335856064e-07, + "logits": -1.2081263065338135, + "logps": -88.94207000732422, + "loss": 0.123, + "objective": 0.12437693774700165, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.055066801607608795, + "step": 3735 + }, + { + "dpo_loss": 0.6907673478126526, + "epoch": 7.066603684459141, + "grad_norm": 74.9215388811123, + "learning_rate": 1.2102137467779409e-07, + "logits": -1.2592813968658447, + "logps": -88.12895202636719, + "loss": 0.1187, + "objective": 0.11529743671417236, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4937500059604645, + "regularize": 0.046220697462558746, + "step": 3740 + }, + { + "dpo_loss": 0.6894332766532898, + "epoch": 7.076051015588097, + "grad_norm": 68.51635982925964, + "learning_rate": 1.2031549997644498e-07, + "logits": -1.2517545223236084, + "logps": -89.99351501464844, + "loss": 0.12, + "objective": 0.12121138721704483, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.05226803943514824, + "step": 3745 + }, + { + "dpo_loss": 0.6855832934379578, + "epoch": 7.085498346717053, + "grad_norm": 65.23697097053963, + "learning_rate": 1.1961103693821694e-07, + "logits": -1.1581439971923828, + "logps": -89.9802474975586, + "loss": 0.1227, + "objective": 0.12154228985309601, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.05298396199941635, + "step": 3750 + }, + { + "epoch": 7.085498346717053, + "eval_dpo_loss": 0.7086306214332581, + "eval_logits": -1.2021671533584595, + "eval_logps": -93.87805938720703, + "eval_loss": 0.34044116735458374, + "eval_objective": 0.3382265567779541, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.2673634886741638, + "eval_runtime": 154.6879, + "eval_samples_per_second": 37.43, + "eval_steps_per_second": 3.122, + "step": 3750 + }, + { + "dpo_loss": 0.6866408586502075, + "epoch": 7.094945677846009, + "grad_norm": 68.10588099468598, + "learning_rate": 1.1890799323144749e-07, + "logits": -1.2115185260772705, + "logps": -88.48025512695312, + "loss": 0.1226, + "objective": 0.11928554624319077, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.050621457397937775, + "step": 3755 + }, + { + "dpo_loss": 0.6906393766403198, + "epoch": 7.104393008974965, + "grad_norm": 68.72119036287717, + "learning_rate": 1.1820637650902387e-07, + "logits": -1.2775551080703735, + "logps": -88.83606719970703, + "loss": 0.1236, + "objective": 0.1245572417974472, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.0554933063685894, + "step": 3760 + }, + { + "dpo_loss": 0.688023030757904, + "epoch": 7.113840340103921, + "grad_norm": 68.91483050812526, + "learning_rate": 1.1750619440830014e-07, + "logits": -1.1680511236190796, + "logps": -87.44700622558594, + "loss": 0.1197, + "objective": 0.11044274270534515, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.04164043813943863, + "step": 3765 + }, + { + "dpo_loss": 0.6897870302200317, + "epoch": 7.123287671232877, + "grad_norm": 67.85665764483112, + "learning_rate": 1.1680745455101426e-07, + "logits": -1.205358624458313, + "logps": -87.68621826171875, + "loss": 0.1174, + "objective": 0.11713232100009918, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4312500059604645, + "regularize": 0.048153601586818695, + "step": 3770 + }, + { + "dpo_loss": 0.6883377432823181, + "epoch": 7.132735002361833, + "grad_norm": 68.01990647906555, + "learning_rate": 1.1611016454320452e-07, + "logits": -1.2369829416275024, + "logps": -87.67942810058594, + "loss": 0.1203, + "objective": 0.11703640222549438, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.581250011920929, + "regularize": 0.04820261523127556, + "step": 3775 + }, + { + "dpo_loss": 0.691364586353302, + "epoch": 7.142182333490789, + "grad_norm": 67.10762501083026, + "learning_rate": 1.1541433197512717e-07, + "logits": -1.229856014251709, + "logps": -88.16793060302734, + "loss": 0.1202, + "objective": 0.1137702614068985, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4749999940395355, + "regularize": 0.04463379830121994, + "step": 3780 + }, + { + "dpo_loss": 0.6905776262283325, + "epoch": 7.151629664619745, + "grad_norm": 60.558726867867506, + "learning_rate": 1.1471996442117374e-07, + "logits": -1.2590320110321045, + "logps": -89.91032409667969, + "loss": 0.1163, + "objective": 0.115619957447052, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.046562183648347855, + "step": 3785 + }, + { + "dpo_loss": 0.688174843788147, + "epoch": 7.161076995748701, + "grad_norm": 65.46106583390794, + "learning_rate": 1.1402706943978843e-07, + "logits": -1.2758677005767822, + "logps": -88.68501281738281, + "loss": 0.1142, + "objective": 0.11551575362682343, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.512499988079071, + "regularize": 0.04669825732707977, + "step": 3790 + }, + { + "dpo_loss": 0.6905801296234131, + "epoch": 7.170524326877657, + "grad_norm": 69.42634954648608, + "learning_rate": 1.133356545733861e-07, + "logits": -1.1488438844680786, + "logps": -88.26316833496094, + "loss": 0.1148, + "objective": 0.11274246126413345, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.043684449046850204, + "step": 3795 + }, + { + "dpo_loss": 0.6877044439315796, + "epoch": 7.179971658006613, + "grad_norm": 79.2004419422603, + "learning_rate": 1.1264572734827008e-07, + "logits": -1.2306697368621826, + "logps": -88.87214660644531, + "loss": 0.1142, + "objective": 0.11047129333019257, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4375, + "regularize": 0.041700832545757294, + "step": 3800 + }, + { + "epoch": 7.179971658006613, + "eval_dpo_loss": 0.7083066701889038, + "eval_logits": -1.1874396800994873, + "eval_logps": -93.82341766357422, + "eval_loss": 0.3426133394241333, + "eval_objective": 0.3419845700263977, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.520703911781311, + "eval_regularize": 0.27115386724472046, + "eval_runtime": 154.3413, + "eval_samples_per_second": 37.514, + "eval_steps_per_second": 3.129, + "step": 3800 + }, + { + "dpo_loss": 0.6943923234939575, + "epoch": 7.189418989135569, + "grad_norm": 67.50840725502906, + "learning_rate": 1.1195729527454994e-07, + "logits": -1.1638985872268677, + "logps": -86.8692626953125, + "loss": 0.1167, + "objective": 0.1133107915520668, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.04387155547738075, + "step": 3805 + }, + { + "dpo_loss": 0.6871123313903809, + "epoch": 7.198866320264525, + "grad_norm": 71.05284807752426, + "learning_rate": 1.1127036584606012e-07, + "logits": -1.169498085975647, + "logps": -87.57221984863281, + "loss": 0.1189, + "objective": 0.11854559183120728, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.04983435943722725, + "step": 3810 + }, + { + "dpo_loss": 0.689956784248352, + "epoch": 7.208313651393482, + "grad_norm": 72.85395631216751, + "learning_rate": 1.1058494654027806e-07, + "logits": -1.177872896194458, + "logps": -89.75749206542969, + "loss": 0.118, + "objective": 0.12170775234699249, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.05271206423640251, + "step": 3815 + }, + { + "dpo_loss": 0.6926616430282593, + "epoch": 7.217760982522438, + "grad_norm": 68.4949492318128, + "learning_rate": 1.0990104481824336e-07, + "logits": -1.2210584878921509, + "logps": -88.03923034667969, + "loss": 0.1189, + "objective": 0.12480070441961288, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.05553454905748367, + "step": 3820 + }, + { + "dpo_loss": 0.6882877349853516, + "epoch": 7.227208313651394, + "grad_norm": 72.17531652399471, + "learning_rate": 1.0921866812447567e-07, + "logits": -1.2541043758392334, + "logps": -88.48805236816406, + "loss": 0.1186, + "objective": 0.11761631071567535, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.048787519335746765, + "step": 3825 + }, + { + "dpo_loss": 0.6862500905990601, + "epoch": 7.23665564478035, + "grad_norm": 63.810028727336274, + "learning_rate": 1.0853782388689456e-07, + "logits": -1.1824160814285278, + "logps": -86.62850189208984, + "loss": 0.1173, + "objective": 0.12098387628793716, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.05235886573791504, + "step": 3830 + }, + { + "dpo_loss": 0.6879441738128662, + "epoch": 7.246102975909306, + "grad_norm": 64.9312849341968, + "learning_rate": 1.0785851951673805e-07, + "logits": -1.1976611614227295, + "logps": -87.53474426269531, + "loss": 0.1151, + "objective": 0.11285368353128433, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5625, + "regularize": 0.044059254229068756, + "step": 3835 + }, + { + "dpo_loss": 0.6905585527420044, + "epoch": 7.255550307038262, + "grad_norm": 70.35920534332588, + "learning_rate": 1.0718076240848211e-07, + "logits": -1.20999014377594, + "logps": -86.69548797607422, + "loss": 0.1152, + "objective": 0.10998617112636566, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.04093032330274582, + "step": 3840 + }, + { + "dpo_loss": 0.6859630942344666, + "epoch": 7.264997638167218, + "grad_norm": 68.34159831161008, + "learning_rate": 1.0650455993976021e-07, + "logits": -1.181806206703186, + "logps": -88.92317199707031, + "loss": 0.1169, + "objective": 0.11873508989810944, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.05013876408338547, + "step": 3845 + }, + { + "dpo_loss": 0.6961053013801575, + "epoch": 7.274444969296174, + "grad_norm": 73.44616251707711, + "learning_rate": 1.0582991947128323e-07, + "logits": -1.1553852558135986, + "logps": -88.92807006835938, + "loss": 0.1142, + "objective": 0.11381669342517853, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.0442061573266983, + "step": 3850 + }, + { + "epoch": 7.274444969296174, + "eval_dpo_loss": 0.7090203762054443, + "eval_logits": -1.1775156259536743, + "eval_logps": -93.68949890136719, + "eval_loss": 0.34542912244796753, + "eval_objective": 0.34420666098594666, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.27330461144447327, + "eval_runtime": 155.6654, + "eval_samples_per_second": 37.195, + "eval_steps_per_second": 3.103, + "step": 3850 + }, + { + "dpo_loss": 0.6929150223731995, + "epoch": 7.28389230042513, + "grad_norm": 63.048191058037425, + "learning_rate": 1.0515684834675884e-07, + "logits": -1.17747962474823, + "logps": -88.01960754394531, + "loss": 0.1143, + "objective": 0.11103460937738419, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.04174310341477394, + "step": 3855 + }, + { + "dpo_loss": 0.6904551982879639, + "epoch": 7.293339631554086, + "grad_norm": 66.45739287207125, + "learning_rate": 1.0448535389281191e-07, + "logits": -1.1651670932769775, + "logps": -88.12651062011719, + "loss": 0.1175, + "objective": 0.11071532964706421, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.041669812053442, + "step": 3860 + }, + { + "dpo_loss": 0.6892693042755127, + "epoch": 7.302786962683042, + "grad_norm": 65.61604925434061, + "learning_rate": 1.038154434189046e-07, + "logits": -1.2562576532363892, + "logps": -87.65116882324219, + "loss": 0.1188, + "objective": 0.11806901544332504, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.04914209246635437, + "step": 3865 + }, + { + "dpo_loss": 0.6871533393859863, + "epoch": 7.312234293811998, + "grad_norm": 65.23941905124475, + "learning_rate": 1.0314712421725707e-07, + "logits": -1.237535834312439, + "logps": -89.5743637084961, + "loss": 0.1154, + "objective": 0.1144314780831337, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.045716144144535065, + "step": 3870 + }, + { + "dpo_loss": 0.6926724910736084, + "epoch": 7.321681624940954, + "grad_norm": 66.65320725431135, + "learning_rate": 1.0248040356276785e-07, + "logits": -1.2694696187973022, + "logps": -91.21292877197266, + "loss": 0.1145, + "objective": 0.11743433773517609, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.04816708713769913, + "step": 3875 + }, + { + "dpo_loss": 0.688007652759552, + "epoch": 7.33112895606991, + "grad_norm": 67.38572150902526, + "learning_rate": 1.0181528871293452e-07, + "logits": -1.252357840538025, + "logps": -87.56616973876953, + "loss": 0.1152, + "objective": 0.11621659994125366, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.047415841370821, + "step": 3880 + }, + { + "dpo_loss": 0.6883160471916199, + "epoch": 7.340576287198866, + "grad_norm": 69.25950898070047, + "learning_rate": 1.0115178690777507e-07, + "logits": -1.1671538352966309, + "logps": -88.53242492675781, + "loss": 0.1149, + "objective": 0.11127232015132904, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.042440708726644516, + "step": 3885 + }, + { + "dpo_loss": 0.6874035000801086, + "epoch": 7.350023618327822, + "grad_norm": 65.20783339989941, + "learning_rate": 1.004899053697487e-07, + "logits": -1.2028082609176636, + "logps": -89.45445251464844, + "loss": 0.1126, + "objective": 0.11188634485006332, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5, + "regularize": 0.04314599931240082, + "step": 3890 + }, + { + "dpo_loss": 0.689424991607666, + "epoch": 7.359470949456778, + "grad_norm": 69.45795757708476, + "learning_rate": 9.982965130367774e-08, + "logits": -1.1444966793060303, + "logps": -89.14888000488281, + "loss": 0.1119, + "objective": 0.11185325682163239, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.4625000059604645, + "regularize": 0.042910750955343246, + "step": 3895 + }, + { + "dpo_loss": 0.685333251953125, + "epoch": 7.368918280585735, + "grad_norm": 70.7089984246962, + "learning_rate": 9.917103189666864e-08, + "logits": -1.2610085010528564, + "logps": -89.09552764892578, + "loss": 0.1128, + "objective": 0.1101282611489296, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.574999988079071, + "regularize": 0.04159492999315262, + "step": 3900 + }, + { + "epoch": 7.368918280585735, + "eval_dpo_loss": 0.7082622647285461, + "eval_logits": -1.183833360671997, + "eval_logps": -94.05205535888672, + "eval_loss": 0.34172287583351135, + "eval_objective": 0.34058767557144165, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.26976147294044495, + "eval_runtime": 154.5614, + "eval_samples_per_second": 37.461, + "eval_steps_per_second": 3.125, + "step": 3900 + }, + { + "dpo_loss": 0.6910353302955627, + "epoch": 7.378365611714691, + "grad_norm": 66.13991711330048, + "learning_rate": 9.851405431803397e-08, + "logits": -1.2042526006698608, + "logps": -89.818603515625, + "loss": 0.1143, + "objective": 0.1198992133140564, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.05079569295048714, + "step": 3905 + }, + { + "dpo_loss": 0.6896607875823975, + "epoch": 7.387812942843647, + "grad_norm": 66.42398501362136, + "learning_rate": 9.785872571921467e-08, + "logits": -1.146799921989441, + "logps": -87.97035217285156, + "loss": 0.1125, + "objective": 0.11261601746082306, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.043649934232234955, + "step": 3910 + }, + { + "dpo_loss": 0.6883934736251831, + "epoch": 7.397260273972603, + "grad_norm": 70.24223672772072, + "learning_rate": 9.720505323370165e-08, + "logits": -1.1669622659683228, + "logps": -89.51654052734375, + "loss": 0.114, + "objective": 0.11565478891134262, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.04681544750928879, + "step": 3915 + }, + { + "dpo_loss": 0.6910319328308105, + "epoch": 7.406707605101559, + "grad_norm": 69.94033055223287, + "learning_rate": 9.655304397695843e-08, + "logits": -1.22225022315979, + "logps": -86.38886260986328, + "loss": 0.1109, + "objective": 0.10618066787719727, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.03707747161388397, + "step": 3920 + }, + { + "dpo_loss": 0.6937940716743469, + "epoch": 7.416154936230515, + "grad_norm": 66.97802233109535, + "learning_rate": 9.590270504634396e-08, + "logits": -1.1933887004852295, + "logps": -89.13442993164062, + "loss": 0.1159, + "objective": 0.11764197051525116, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.048262566328048706, + "step": 3925 + }, + { + "dpo_loss": 0.6914020776748657, + "epoch": 7.425602267359471, + "grad_norm": 70.59997983605382, + "learning_rate": 9.52540435210348e-08, + "logits": -1.1378452777862549, + "logps": -89.34126281738281, + "loss": 0.1146, + "objective": 0.11447039991617203, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.04533017799258232, + "step": 3930 + }, + { + "dpo_loss": 0.6921123266220093, + "epoch": 7.435049598488427, + "grad_norm": 62.12464069488986, + "learning_rate": 9.460706646194843e-08, + "logits": -1.2301725149154663, + "logps": -86.09767150878906, + "loss": 0.1145, + "objective": 0.1187603622674942, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.04954911768436432, + "step": 3935 + }, + { + "dpo_loss": 0.6864615678787231, + "epoch": 7.444496929617383, + "grad_norm": 65.02567329964197, + "learning_rate": 9.396178091166623e-08, + "logits": -1.2188907861709595, + "logps": -89.6012954711914, + "loss": 0.1129, + "objective": 0.11353881657123566, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.04489266127347946, + "step": 3940 + }, + { + "dpo_loss": 0.692807137966156, + "epoch": 7.453944260746339, + "grad_norm": 69.78518471690447, + "learning_rate": 9.331819389435702e-08, + "logits": -1.2514480352401733, + "logps": -86.1063232421875, + "loss": 0.1121, + "objective": 0.11364670097827911, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.543749988079071, + "regularize": 0.04436598718166351, + "step": 3945 + }, + { + "dpo_loss": 0.6867848038673401, + "epoch": 7.463391591875295, + "grad_norm": 68.38044982421731, + "learning_rate": 9.267631241570051e-08, + "logits": -1.1391940116882324, + "logps": -89.96165466308594, + "loss": 0.1158, + "objective": 0.11638858169317245, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.04771009460091591, + "step": 3950 + }, + { + "epoch": 7.463391591875295, + "eval_dpo_loss": 0.7085540294647217, + "eval_logits": -1.1875077486038208, + "eval_logps": -93.92083740234375, + "eval_loss": 0.3433838188648224, + "eval_objective": 0.34226563572883606, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.2714102268218994, + "eval_runtime": 157.9826, + "eval_samples_per_second": 36.65, + "eval_steps_per_second": 3.057, + "step": 3950 + }, + { + "dpo_loss": 0.6887429356575012, + "epoch": 7.472838923004251, + "grad_norm": 69.82696415854126, + "learning_rate": 9.203614346281083e-08, + "logits": -1.178577184677124, + "logps": -88.75222778320312, + "loss": 0.1129, + "objective": 0.11189063638448715, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.04301634058356285, + "step": 3955 + }, + { + "dpo_loss": 0.6867369413375854, + "epoch": 7.482286254133207, + "grad_norm": 68.98701188066258, + "learning_rate": 9.139769400416066e-08, + "logits": -1.2077807188034058, + "logps": -87.89368438720703, + "loss": 0.1137, + "objective": 0.11300931125879288, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.044335611164569855, + "step": 3960 + }, + { + "dpo_loss": 0.6933126449584961, + "epoch": 7.491733585262163, + "grad_norm": 71.68343168413607, + "learning_rate": 9.076097098950541e-08, + "logits": -1.2484073638916016, + "logps": -87.87511444091797, + "loss": 0.112, + "objective": 0.11054714769124985, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.041215892881155014, + "step": 3965 + }, + { + "dpo_loss": 0.6919286847114563, + "epoch": 7.501180916391119, + "grad_norm": 65.63600778904674, + "learning_rate": 9.012598134980762e-08, + "logits": -1.2233575582504272, + "logps": -88.73258972167969, + "loss": 0.1109, + "objective": 0.10980510711669922, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.04061223939061165, + "step": 3970 + }, + { + "dpo_loss": 0.6842435002326965, + "epoch": 7.510628247520076, + "grad_norm": 67.39010843092791, + "learning_rate": 8.949273199716124e-08, + "logits": -1.1742600202560425, + "logps": -88.2282485961914, + "loss": 0.1161, + "objective": 0.12271026521921158, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.0542859211564064, + "step": 3975 + }, + { + "dpo_loss": 0.6895918846130371, + "epoch": 7.520075578649031, + "grad_norm": 66.23441397917325, + "learning_rate": 8.886122982471653e-08, + "logits": -1.165290355682373, + "logps": -87.8960952758789, + "loss": 0.1126, + "objective": 0.1167561262845993, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.04779692739248276, + "step": 3980 + }, + { + "dpo_loss": 0.688515305519104, + "epoch": 7.529522909777988, + "grad_norm": 69.12404399558686, + "learning_rate": 8.823148170660533e-08, + "logits": -1.2020834684371948, + "logps": -87.83871459960938, + "loss": 0.1123, + "objective": 0.11350240558385849, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.04465087503194809, + "step": 3985 + }, + { + "dpo_loss": 0.6893686056137085, + "epoch": 7.538970240906944, + "grad_norm": 77.00746497085886, + "learning_rate": 8.760349449786569e-08, + "logits": -1.2218725681304932, + "logps": -87.24681091308594, + "loss": 0.1135, + "objective": 0.11317841708660126, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.04424155130982399, + "step": 3990 + }, + { + "dpo_loss": 0.6874690651893616, + "epoch": 7.5484175720359, + "grad_norm": 64.95968671329116, + "learning_rate": 8.697727503436756e-08, + "logits": -1.1924564838409424, + "logps": -88.96747589111328, + "loss": 0.1096, + "objective": 0.10814033448696136, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.039393432438373566, + "step": 3995 + }, + { + "dpo_loss": 0.6912730932235718, + "epoch": 7.557864903164856, + "grad_norm": 73.49530141426365, + "learning_rate": 8.635283013273853e-08, + "logits": -1.2151412963867188, + "logps": -88.40718841552734, + "loss": 0.113, + "objective": 0.1168517917394638, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.512499988079071, + "regularize": 0.047724489122629166, + "step": 4000 + }, + { + "epoch": 7.557864903164856, + "eval_dpo_loss": 0.708738386631012, + "eval_logits": -1.184956669807434, + "eval_logps": -93.68663787841797, + "eval_loss": 0.34279587864875793, + "eval_objective": 0.34106388688087463, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.2701900601387024, + "eval_runtime": 154.4859, + "eval_samples_per_second": 37.479, + "eval_steps_per_second": 3.126, + "step": 4000 + }, + { + "dpo_loss": 0.6873505711555481, + "epoch": 7.567312234293812, + "grad_norm": 68.58470344270393, + "learning_rate": 8.57301665902892e-08, + "logits": -1.1909992694854736, + "logps": -88.84749603271484, + "loss": 0.1089, + "objective": 0.109657421708107, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.040922366082668304, + "step": 4005 + }, + { + "dpo_loss": 0.6915440559387207, + "epoch": 7.576759565422768, + "grad_norm": 66.8202796391533, + "learning_rate": 8.510929118493951e-08, + "logits": -1.1787294149398804, + "logps": -88.94754791259766, + "loss": 0.1111, + "objective": 0.11381425708532333, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.04465986043214798, + "step": 4010 + }, + { + "dpo_loss": 0.6843048334121704, + "epoch": 7.586206896551724, + "grad_norm": 70.49292621484116, + "learning_rate": 8.449021067514483e-08, + "logits": -1.1641387939453125, + "logps": -88.68355560302734, + "loss": 0.1125, + "objective": 0.11452841758728027, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.04609794169664383, + "step": 4015 + }, + { + "dpo_loss": 0.6912661790847778, + "epoch": 7.59565422768068, + "grad_norm": 67.40017733094507, + "learning_rate": 8.387293179982257e-08, + "logits": -1.1959151029586792, + "logps": -87.34587860107422, + "loss": 0.1115, + "objective": 0.11274880170822144, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.04362217336893082, + "step": 4020 + }, + { + "dpo_loss": 0.6897019147872925, + "epoch": 7.605101558809636, + "grad_norm": 68.09459225606703, + "learning_rate": 8.32574612782787e-08, + "logits": -1.1839287281036377, + "logps": -91.09791564941406, + "loss": 0.1061, + "objective": 0.10851629823446274, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5562499761581421, + "regularize": 0.03954610973596573, + "step": 4025 + }, + { + "dpo_loss": 0.6882971525192261, + "epoch": 7.614548889938592, + "grad_norm": 62.462183761915234, + "learning_rate": 8.264380581013442e-08, + "logits": -1.1843148469924927, + "logps": -88.91985321044922, + "loss": 0.1086, + "objective": 0.10946284234523773, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 0.040633127093315125, + "step": 4030 + }, + { + "dpo_loss": 0.6897971630096436, + "epoch": 7.623996221067548, + "grad_norm": 70.50481329581429, + "learning_rate": 8.203197207525347e-08, + "logits": -1.108965277671814, + "logps": -88.89385986328125, + "loss": 0.1082, + "objective": 0.10758545249700546, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.03860573098063469, + "step": 4035 + }, + { + "dpo_loss": 0.6894980072975159, + "epoch": 7.633443552196504, + "grad_norm": 82.68503480035113, + "learning_rate": 8.142196673366936e-08, + "logits": -1.1271682977676392, + "logps": -87.27262115478516, + "loss": 0.1094, + "objective": 0.110640749335289, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4749999940395355, + "regularize": 0.04169095307588577, + "step": 4040 + }, + { + "dpo_loss": 0.6852022409439087, + "epoch": 7.64289088332546, + "grad_norm": 69.13124584150115, + "learning_rate": 8.081379642551301e-08, + "logits": -1.071737289428711, + "logps": -87.50572967529297, + "loss": 0.1063, + "objective": 0.10488457977771759, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.036364343017339706, + "step": 4045 + }, + { + "dpo_loss": 0.6881375312805176, + "epoch": 7.652338214454416, + "grad_norm": 70.93720732954677, + "learning_rate": 8.02074677709402e-08, + "logits": -1.1176092624664307, + "logps": -87.46456146240234, + "loss": 0.1113, + "objective": 0.10660157352685928, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.03778781741857529, + "step": 4050 + }, + { + "epoch": 7.652338214454416, + "eval_dpo_loss": 0.7087328433990479, + "eval_logits": -1.1836637258529663, + "eval_logps": -93.61712646484375, + "eval_loss": 0.34341859817504883, + "eval_objective": 0.34246423840522766, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2715909481048584, + "eval_runtime": 154.5352, + "eval_samples_per_second": 37.467, + "eval_steps_per_second": 3.126, + "step": 4050 + }, + { + "dpo_loss": 0.6898146271705627, + "epoch": 7.661785545583372, + "grad_norm": 68.15988794344355, + "learning_rate": 7.960298737005952e-08, + "logits": -1.14591383934021, + "logps": -87.33655548095703, + "loss": 0.1077, + "objective": 0.10561883449554443, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.03663736954331398, + "step": 4055 + }, + { + "dpo_loss": 0.6883107423782349, + "epoch": 7.671232876712329, + "grad_norm": 67.6835835394026, + "learning_rate": 7.900036180286102e-08, + "logits": -1.1574805974960327, + "logps": -87.51531982421875, + "loss": 0.1049, + "objective": 0.10436640679836273, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.03553532809019089, + "step": 4060 + }, + { + "dpo_loss": 0.6928078532218933, + "epoch": 7.680680207841284, + "grad_norm": 65.52857280252256, + "learning_rate": 7.839959762914383e-08, + "logits": -1.188384771347046, + "logps": -88.98746490478516, + "loss": 0.1065, + "objective": 0.10643269121646881, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.037151891738176346, + "step": 4065 + }, + { + "dpo_loss": 0.6930032968521118, + "epoch": 7.690127538970241, + "grad_norm": 64.74313053889571, + "learning_rate": 7.780070138844522e-08, + "logits": -1.1631094217300415, + "logps": -87.88558197021484, + "loss": 0.1081, + "objective": 0.11115256696939468, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.04185224324464798, + "step": 4070 + }, + { + "dpo_loss": 0.6907125115394592, + "epoch": 7.699574870099197, + "grad_norm": 67.13432378815105, + "learning_rate": 7.720367959996954e-08, + "logits": -1.2005220651626587, + "logps": -89.42745208740234, + "loss": 0.1088, + "objective": 0.10870270431041718, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.48750001192092896, + "regularize": 0.03963145241141319, + "step": 4075 + }, + { + "dpo_loss": 0.6925803422927856, + "epoch": 7.709022201228153, + "grad_norm": 71.33412473182247, + "learning_rate": 7.660853876251683e-08, + "logits": -1.189697265625, + "logps": -86.2691421508789, + "loss": 0.1076, + "objective": 0.1080603152513504, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.03880227357149124, + "step": 4080 + }, + { + "dpo_loss": 0.6878358721733093, + "epoch": 7.718469532357109, + "grad_norm": 65.78903913253299, + "learning_rate": 7.601528535441232e-08, + "logits": -1.1525851488113403, + "logps": -88.75498962402344, + "loss": 0.1087, + "objective": 0.11228512227535248, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.04350154846906662, + "step": 4085 + }, + { + "dpo_loss": 0.6919507384300232, + "epoch": 7.727916863486065, + "grad_norm": 66.3099078877917, + "learning_rate": 7.542392583343583e-08, + "logits": -1.0845839977264404, + "logps": -87.10064697265625, + "loss": 0.1089, + "objective": 0.1093490943312645, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.04015401005744934, + "step": 4090 + }, + { + "dpo_loss": 0.6882933378219604, + "epoch": 7.737364194615021, + "grad_norm": 71.99346678649765, + "learning_rate": 7.483446663675169e-08, + "logits": -1.1613986492156982, + "logps": -87.78504180908203, + "loss": 0.1061, + "objective": 0.10825704038143158, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.03942771628499031, + "step": 4095 + }, + { + "dpo_loss": 0.6845086812973022, + "epoch": 7.746811525743977, + "grad_norm": 62.65200304841601, + "learning_rate": 7.424691418083854e-08, + "logits": -1.2002389430999756, + "logps": -88.44282531738281, + "loss": 0.1082, + "objective": 0.11072710901498795, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.44999998807907104, + "regularize": 0.042276252061128616, + "step": 4100 + }, + { + "epoch": 7.746811525743977, + "eval_dpo_loss": 0.7081242203712463, + "eval_logits": -1.1851998567581177, + "eval_logps": -94.00126647949219, + "eval_loss": 0.34109148383140564, + "eval_objective": 0.34028691053390503, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5191511511802673, + "eval_regularize": 0.26947447657585144, + "eval_runtime": 154.5082, + "eval_samples_per_second": 37.474, + "eval_steps_per_second": 3.126, + "step": 4100 + }, + { + "dpo_loss": 0.6903212666511536, + "epoch": 7.756258856872933, + "grad_norm": 71.55327544805871, + "learning_rate": 7.366127486141918e-08, + "logits": -1.1610606908798218, + "logps": -87.8761978149414, + "loss": 0.1044, + "objective": 0.10260413587093353, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.543749988079071, + "regularize": 0.033572006970644, + "step": 4105 + }, + { + "dpo_loss": 0.6889077425003052, + "epoch": 7.765706188001889, + "grad_norm": 65.38368309326316, + "learning_rate": 7.319414514879208e-08, + "logits": -1.106872797012329, + "logps": -88.81092834472656, + "loss": 0.1095, + "objective": 0.10929863154888153, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.04040784761309624, + "step": 4110 + }, + { + "dpo_loss": 0.6919306516647339, + "epoch": 7.775153519130845, + "grad_norm": 66.77104121920884, + "learning_rate": 7.261196552576512e-08, + "logits": -1.1112695932388306, + "logps": -87.815185546875, + "loss": 0.1061, + "objective": 0.10447889566421509, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.035285837948322296, + "step": 4115 + }, + { + "dpo_loss": 0.6867073178291321, + "epoch": 7.784600850259801, + "grad_norm": 62.880099903131345, + "learning_rate": 7.203171683624498e-08, + "logits": -1.193832278251648, + "logps": -87.20587158203125, + "loss": 0.1047, + "objective": 0.10617707669734955, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.0375063456594944, + "step": 4120 + }, + { + "dpo_loss": 0.6886472105979919, + "epoch": 7.794048181388757, + "grad_norm": 65.15194680583734, + "learning_rate": 7.14534053964504e-08, + "logits": -1.178002119064331, + "logps": -87.29219055175781, + "loss": 0.107, + "objective": 0.10785814374685287, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.03899341821670532, + "step": 4125 + }, + { + "dpo_loss": 0.6928061842918396, + "epoch": 7.8034955125177135, + "grad_norm": 66.0849283350675, + "learning_rate": 7.087703750151244e-08, + "logits": -1.1966705322265625, + "logps": -88.70189666748047, + "loss": 0.1055, + "objective": 0.10542736202478409, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.03614673763513565, + "step": 4130 + }, + { + "dpo_loss": 0.6882622838020325, + "epoch": 7.8129428436466695, + "grad_norm": 67.7948862553432, + "learning_rate": 7.030261942540602e-08, + "logits": -1.1530323028564453, + "logps": -88.18225860595703, + "loss": 0.1057, + "objective": 0.10691492259502411, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.46875, + "regularize": 0.03808870166540146, + "step": 4135 + }, + { + "dpo_loss": 0.690109133720398, + "epoch": 7.8223901747756255, + "grad_norm": 67.20968655610226, + "learning_rate": 6.973015742088173e-08, + "logits": -1.2623380422592163, + "logps": -87.86051177978516, + "loss": 0.1065, + "objective": 0.10408475250005722, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.035073842853307724, + "step": 4140 + }, + { + "dpo_loss": 0.6884062886238098, + "epoch": 7.831837505904582, + "grad_norm": 68.68688981851791, + "learning_rate": 6.915965771939725e-08, + "logits": -1.1563483476638794, + "logps": -88.83927917480469, + "loss": 0.1037, + "objective": 0.10269608348608017, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.03385545685887337, + "step": 4145 + }, + { + "dpo_loss": 0.6895419359207153, + "epoch": 7.841284837033538, + "grad_norm": 72.99502346008097, + "learning_rate": 6.859112653105023e-08, + "logits": -1.107596516609192, + "logps": -89.7536849975586, + "loss": 0.1051, + "objective": 0.10431935638189316, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.03536514565348625, + "step": 4150 + }, + { + "epoch": 7.841284837033538, + "eval_dpo_loss": 0.7083097100257874, + "eval_logits": -1.1848164796829224, + "eval_logps": -93.8552474975586, + "eval_loss": 0.3425026834011078, + "eval_objective": 0.3417186439037323, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.27088767290115356, + "eval_runtime": 154.5073, + "eval_samples_per_second": 37.474, + "eval_steps_per_second": 3.126, + "step": 4150 + }, + { + "dpo_loss": 0.689522922039032, + "epoch": 7.850732168162494, + "grad_norm": 61.625780749855046, + "learning_rate": 6.802457004451018e-08, + "logits": -1.2146650552749634, + "logps": -87.07714080810547, + "loss": 0.1048, + "objective": 0.10108639299869537, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.03213409334421158, + "step": 4155 + }, + { + "dpo_loss": 0.6918686032295227, + "epoch": 7.86017949929145, + "grad_norm": 66.69255583419884, + "learning_rate": 6.74599944269512e-08, + "logits": -1.219438910484314, + "logps": -86.26667785644531, + "loss": 0.1049, + "objective": 0.10439826548099518, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.03521140664815903, + "step": 4160 + }, + { + "dpo_loss": 0.6884658932685852, + "epoch": 7.8696268304204064, + "grad_norm": 69.93747327295206, + "learning_rate": 6.68974058239849e-08, + "logits": -1.160825490951538, + "logps": -87.0850601196289, + "loss": 0.1037, + "objective": 0.10178661346435547, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48124998807907104, + "regularize": 0.03294002264738083, + "step": 4165 + }, + { + "dpo_loss": 0.688989520072937, + "epoch": 7.8790741615493625, + "grad_norm": 66.57073575298065, + "learning_rate": 6.633681035959374e-08, + "logits": -1.2526596784591675, + "logps": -88.16157531738281, + "loss": 0.1061, + "objective": 0.1027727723121643, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5, + "regularize": 0.033873818814754486, + "step": 4170 + }, + { + "dpo_loss": 0.6920872926712036, + "epoch": 7.8885214926783185, + "grad_norm": 65.368918169017, + "learning_rate": 6.577821413606394e-08, + "logits": -1.1576217412948608, + "logps": -89.33631896972656, + "loss": 0.1042, + "objective": 0.10328070819377899, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.03407198190689087, + "step": 4175 + }, + { + "dpo_loss": 0.6866429448127747, + "epoch": 7.8979688238072745, + "grad_norm": 64.0686419132941, + "learning_rate": 6.522162323391925e-08, + "logits": -1.1338218450546265, + "logps": -88.63480377197266, + "loss": 0.1045, + "objective": 0.1009424477815628, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.032278142869472504, + "step": 4180 + }, + { + "dpo_loss": 0.6868075132369995, + "epoch": 7.9074161549362305, + "grad_norm": 71.10493618106902, + "learning_rate": 6.466704371185478e-08, + "logits": -1.1870781183242798, + "logps": -86.28584289550781, + "loss": 0.1041, + "objective": 0.10661537945270538, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.037934623658657074, + "step": 4185 + }, + { + "dpo_loss": 0.6851555705070496, + "epoch": 7.9168634860651865, + "grad_norm": 64.35034498511142, + "learning_rate": 6.411448160667113e-08, + "logits": -1.137531042098999, + "logps": -87.21583557128906, + "loss": 0.1031, + "objective": 0.10514400899410248, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.036628447473049164, + "step": 4190 + }, + { + "dpo_loss": 0.6898540258407593, + "epoch": 7.9263108171941425, + "grad_norm": 64.62703600883805, + "learning_rate": 6.356394293320854e-08, + "logits": -1.154130458831787, + "logps": -89.00459289550781, + "loss": 0.1035, + "objective": 0.10545846074819565, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.03647305816411972, + "step": 4195 + }, + { + "dpo_loss": 0.6923469305038452, + "epoch": 7.935758148323099, + "grad_norm": 66.02875824508733, + "learning_rate": 6.301543368428141e-08, + "logits": -1.0741298198699951, + "logps": -87.47310638427734, + "loss": 0.1047, + "objective": 0.10732004791498184, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.03808536380529404, + "step": 4200 + }, + { + "epoch": 7.935758148323099, + "eval_dpo_loss": 0.7085427045822144, + "eval_logits": -1.187182903289795, + "eval_logps": -93.66959381103516, + "eval_loss": 0.34216782450675964, + "eval_objective": 0.34106820821762085, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.27021393179893494, + "eval_runtime": 154.4178, + "eval_samples_per_second": 37.496, + "eval_steps_per_second": 3.128, + "step": 4200 + }, + { + "dpo_loss": 0.6902600526809692, + "epoch": 7.945205479452055, + "grad_norm": 70.3187555611494, + "learning_rate": 6.246895983061315e-08, + "logits": -1.2895134687423706, + "logps": -87.88436889648438, + "loss": 0.1009, + "objective": 0.10067249834537506, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.0316464826464653, + "step": 4205 + }, + { + "dpo_loss": 0.6903197169303894, + "epoch": 7.954652810581011, + "grad_norm": 64.72695206813354, + "learning_rate": 6.192452732077099e-08, + "logits": -1.2405836582183838, + "logps": -88.14324951171875, + "loss": 0.1053, + "objective": 0.10551361739635468, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.036481648683547974, + "step": 4210 + }, + { + "dpo_loss": 0.6930989027023315, + "epoch": 7.964100141709967, + "grad_norm": 68.35426549454564, + "learning_rate": 6.138214208110176e-08, + "logits": -1.2283471822738647, + "logps": -87.89575958251953, + "loss": 0.1038, + "objective": 0.10590711981058121, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5375000238418579, + "regularize": 0.03659723699092865, + "step": 4215 + }, + { + "dpo_loss": 0.6872066259384155, + "epoch": 7.973547472838923, + "grad_norm": 74.31478828809273, + "learning_rate": 6.084181001566657e-08, + "logits": -1.1427218914031982, + "logps": -87.60230255126953, + "loss": 0.1038, + "objective": 0.10263446718454361, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5249999761581421, + "regularize": 0.03391381725668907, + "step": 4220 + }, + { + "dpo_loss": 0.6878780126571655, + "epoch": 7.982994803967879, + "grad_norm": 66.25372718162421, + "learning_rate": 6.030353700617738e-08, + "logits": -1.19222891330719, + "logps": -88.9227523803711, + "loss": 0.1033, + "objective": 0.10655965656042099, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.03777185454964638, + "step": 4225 + }, + { + "dpo_loss": 0.690626859664917, + "epoch": 7.9924421350968355, + "grad_norm": 73.101616411094, + "learning_rate": 5.976732891193226e-08, + "logits": -1.1580171585083008, + "logps": -89.56224060058594, + "loss": 0.1021, + "objective": 0.10268416255712509, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.03362148255109787, + "step": 4230 + }, + { + "dpo_loss": 0.6919626593589783, + "epoch": 8.00188946622579, + "grad_norm": 65.50765796086647, + "learning_rate": 5.9233191569752096e-08, + "logits": -1.2294315099716187, + "logps": -87.63737487792969, + "loss": 0.1029, + "objective": 0.10424093902111053, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.035044677555561066, + "step": 4235 + }, + { + "dpo_loss": 0.6897158622741699, + "epoch": 8.011336797354748, + "grad_norm": 67.31454023920725, + "learning_rate": 5.870113079391673e-08, + "logits": -1.1569894552230835, + "logps": -88.41534423828125, + "loss": 0.0972, + "objective": 0.0982559472322464, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.46875, + "regularize": 0.029284348711371422, + "step": 4240 + }, + { + "dpo_loss": 0.6918286681175232, + "epoch": 8.020784128483703, + "grad_norm": 65.95526469820857, + "learning_rate": 5.8171152376102065e-08, + "logits": -1.2049682140350342, + "logps": -88.76419830322266, + "loss": 0.0978, + "objective": 0.09947766363620758, + "ranking_idealized": 0.44999998807907104, + "ranking_idealized_expo": 0.40625, + "ranking_simple": 0.40625, + "regularize": 0.03029480017721653, + "step": 4245 + }, + { + "dpo_loss": 0.6871441602706909, + "epoch": 8.03023145961266, + "grad_norm": 66.216010377231, + "learning_rate": 5.764326208531661e-08, + "logits": -1.1581623554229736, + "logps": -89.52348327636719, + "loss": 0.0985, + "objective": 0.09997192770242691, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.41874998807907104, + "ranking_simple": 0.41874998807907104, + "regularize": 0.0312575027346611, + "step": 4250 + }, + { + "epoch": 8.03023145961266, + "eval_dpo_loss": 0.7083138823509216, + "eval_logits": -1.1844391822814941, + "eval_logps": -93.69242095947266, + "eval_loss": 0.3415719270706177, + "eval_objective": 0.34033986926078796, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.2695084512233734, + "eval_runtime": 155.0337, + "eval_samples_per_second": 37.347, + "eval_steps_per_second": 3.115, + "step": 4250 + }, + { + "dpo_loss": 0.6859526634216309, + "epoch": 8.039678790741615, + "grad_norm": 66.75299577172807, + "learning_rate": 5.711746566783881e-08, + "logits": -1.1340796947479248, + "logps": -86.99250793457031, + "loss": 0.1001, + "objective": 0.10409112274646759, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.59375, + "regularize": 0.03549586609005928, + "step": 4255 + }, + { + "dpo_loss": 0.691058874130249, + "epoch": 8.049126121870572, + "grad_norm": 63.70150795972831, + "learning_rate": 5.6593768847154585e-08, + "logits": -1.2099452018737793, + "logps": -88.73480224609375, + "loss": 0.0999, + "objective": 0.09854385256767273, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.029437970370054245, + "step": 4260 + }, + { + "dpo_loss": 0.6891748309135437, + "epoch": 8.058573452999527, + "grad_norm": 65.84663674520789, + "learning_rate": 5.607217732389502e-08, + "logits": -1.2241264581680298, + "logps": -87.21629333496094, + "loss": 0.0965, + "objective": 0.09340299665927887, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.024485522881150246, + "step": 4265 + }, + { + "dpo_loss": 0.6913408041000366, + "epoch": 8.068020784128484, + "grad_norm": 65.15468330480556, + "learning_rate": 5.555269677577432e-08, + "logits": -1.1851134300231934, + "logps": -87.51316833496094, + "loss": 0.0989, + "objective": 0.1005776897072792, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.03144359961152077, + "step": 4270 + }, + { + "dpo_loss": 0.6871658563613892, + "epoch": 8.07746811525744, + "grad_norm": 64.42370950124189, + "learning_rate": 5.503533285752785e-08, + "logits": -1.1673667430877686, + "logps": -87.55001068115234, + "loss": 0.0997, + "objective": 0.09894660115242004, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.030230006203055382, + "step": 4275 + }, + { + "dpo_loss": 0.6906015872955322, + "epoch": 8.086915446386396, + "grad_norm": 65.25737645815519, + "learning_rate": 5.452009120085063e-08, + "logits": -1.10977303981781, + "logps": -88.37261962890625, + "loss": 0.1, + "objective": 0.10002779960632324, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.030967634171247482, + "step": 4280 + }, + { + "dpo_loss": 0.6894617080688477, + "epoch": 8.096362777515353, + "grad_norm": 66.36346720749803, + "learning_rate": 5.400697741433624e-08, + "logits": -1.2385480403900146, + "logps": -89.33724212646484, + "loss": 0.0992, + "objective": 0.09789734333753586, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.02895117737352848, + "step": 4285 + }, + { + "dpo_loss": 0.6887727379798889, + "epoch": 8.105810108644308, + "grad_norm": 64.7308617932561, + "learning_rate": 5.3495997083415454e-08, + "logits": -1.2975224256515503, + "logps": -88.44245147705078, + "loss": 0.0993, + "objective": 0.09873533993959427, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.02985805831849575, + "step": 4290 + }, + { + "dpo_loss": 0.6871117353439331, + "epoch": 8.115257439773265, + "grad_norm": 71.20794837846714, + "learning_rate": 5.2987155770295835e-08, + "logits": -1.1390098333358765, + "logps": -88.33921813964844, + "loss": 0.0977, + "objective": 0.09580464661121368, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.027093475684523582, + "step": 4295 + }, + { + "dpo_loss": 0.6903670430183411, + "epoch": 8.12470477090222, + "grad_norm": 63.395320073665424, + "learning_rate": 5.2480459013900666e-08, + "logits": -1.2452596426010132, + "logps": -90.17387390136719, + "loss": 0.0964, + "objective": 0.0957658439874649, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.46875, + "regularize": 0.02672913670539856, + "step": 4300 + }, + { + "epoch": 8.12470477090222, + "eval_dpo_loss": 0.708151638507843, + "eval_logits": -1.1870557069778442, + "eval_logps": -93.50247955322266, + "eval_loss": 0.3422209322452545, + "eval_objective": 0.3408995568752289, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.270084410905838, + "eval_runtime": 154.5816, + "eval_samples_per_second": 37.456, + "eval_steps_per_second": 3.125, + "step": 4300 + }, + { + "dpo_loss": 0.6924780607223511, + "epoch": 8.134152102031177, + "grad_norm": 65.77929031759814, + "learning_rate": 5.1975912329809245e-08, + "logits": -1.1215593814849854, + "logps": -89.29200744628906, + "loss": 0.0983, + "objective": 0.09922514110803604, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.029977332800626755, + "step": 4305 + }, + { + "dpo_loss": 0.6879432797431946, + "epoch": 8.143599433160132, + "grad_norm": 60.81145106842867, + "learning_rate": 5.147352121019635e-08, + "logits": -1.247128963470459, + "logps": -86.65443420410156, + "loss": 0.0958, + "objective": 0.09523816406726837, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.026443829759955406, + "step": 4310 + }, + { + "dpo_loss": 0.6936001777648926, + "epoch": 8.153046764289089, + "grad_norm": 71.82393154987687, + "learning_rate": 5.0973291123772636e-08, + "logits": -1.203482985496521, + "logps": -89.65019226074219, + "loss": 0.0987, + "objective": 0.10044272989034653, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.031082715839147568, + "step": 4315 + }, + { + "dpo_loss": 0.6879534721374512, + "epoch": 8.162494095418044, + "grad_norm": 68.61775723034592, + "learning_rate": 5.047522751572528e-08, + "logits": -1.2061399221420288, + "logps": -89.13920593261719, + "loss": 0.0975, + "objective": 0.09912938624620438, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.03033403679728508, + "step": 4320 + }, + { + "dpo_loss": 0.6893355250358582, + "epoch": 8.171941426547, + "grad_norm": 64.96895658828034, + "learning_rate": 5.007834013783027e-08, + "logits": -1.204025387763977, + "logps": -88.53910827636719, + "loss": 0.097, + "objective": 0.09730519354343414, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.028371628373861313, + "step": 4325 + }, + { + "dpo_loss": 0.6900074481964111, + "epoch": 8.181388757675956, + "grad_norm": 64.5727180759984, + "learning_rate": 4.958418983741744e-08, + "logits": -1.227060317993164, + "logps": -86.502685546875, + "loss": 0.0985, + "objective": 0.09863508492708206, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.029634330421686172, + "step": 4330 + }, + { + "dpo_loss": 0.6896570324897766, + "epoch": 8.190836088804913, + "grad_norm": 67.13135125464676, + "learning_rate": 4.909222113625544e-08, + "logits": -1.17739999294281, + "logps": -89.40184020996094, + "loss": 0.0975, + "objective": 0.09737617522478104, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.028410470113158226, + "step": 4335 + }, + { + "dpo_loss": 0.6879313588142395, + "epoch": 8.200283419933868, + "grad_norm": 70.81753843915308, + "learning_rate": 4.860243938960329e-08, + "logits": -1.1501330137252808, + "logps": -90.64093017578125, + "loss": 0.0997, + "objective": 0.09839653968811035, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.5874999761581421, + "regularize": 0.029603416100144386, + "step": 4340 + }, + { + "dpo_loss": 0.6932226419448853, + "epoch": 8.209730751062825, + "grad_norm": 72.04751784384656, + "learning_rate": 4.8114849928914013e-08, + "logits": -1.2128031253814697, + "logps": -89.16693115234375, + "loss": 0.0979, + "objective": 0.10338658094406128, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.034064311534166336, + "step": 4345 + }, + { + "dpo_loss": 0.6911360025405884, + "epoch": 8.219178082191782, + "grad_norm": 65.5013939361684, + "learning_rate": 4.7629458061776816e-08, + "logits": -1.2401503324508667, + "logps": -88.55441284179688, + "loss": 0.0997, + "objective": 0.09972050040960312, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.03060689941048622, + "step": 4350 + }, + { + "epoch": 8.219178082191782, + "eval_dpo_loss": 0.7081336975097656, + "eval_logits": -1.1865993738174438, + "eval_logps": -93.80738067626953, + "eval_loss": 0.34230393171310425, + "eval_objective": 0.3408054709434509, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5186335444450378, + "eval_regularize": 0.26999205350875854, + "eval_runtime": 154.5279, + "eval_samples_per_second": 37.469, + "eval_steps_per_second": 3.126, + "step": 4350 + }, + { + "dpo_loss": 0.6863928437232971, + "epoch": 8.228625413320737, + "grad_norm": 69.86443537052166, + "learning_rate": 4.7146269071859477e-08, + "logits": -1.2031657695770264, + "logps": -89.26103210449219, + "loss": 0.0971, + "objective": 0.09626658260822296, + "ranking_idealized": 0.45625001192092896, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.027627307921648026, + "step": 4355 + }, + { + "dpo_loss": 0.6876174807548523, + "epoch": 8.238072744449694, + "grad_norm": 71.68346903393217, + "learning_rate": 4.6665288218850404e-08, + "logits": -1.1734378337860107, + "logps": -87.59542846679688, + "loss": 0.0969, + "objective": 0.0959327444434166, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.02717098966240883, + "step": 4360 + }, + { + "dpo_loss": 0.6887752413749695, + "epoch": 8.247520075578649, + "grad_norm": 68.38626337967031, + "learning_rate": 4.618652073840187e-08, + "logits": -1.124677062034607, + "logps": -88.45066833496094, + "loss": 0.0978, + "objective": 0.09622694551944733, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.027349423617124557, + "step": 4365 + }, + { + "dpo_loss": 0.6909067034721375, + "epoch": 8.256967406707606, + "grad_norm": 62.8555313961238, + "learning_rate": 4.570997184207262e-08, + "logits": -1.1489849090576172, + "logps": -88.59163665771484, + "loss": 0.0969, + "objective": 0.09771253913640976, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.028621861711144447, + "step": 4370 + }, + { + "dpo_loss": 0.691406786441803, + "epoch": 8.266414737836561, + "grad_norm": 67.83043830233812, + "learning_rate": 4.523564671727134e-08, + "logits": -1.27828848361969, + "logps": -87.43110656738281, + "loss": 0.0959, + "objective": 0.09452847391366959, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.02538778819143772, + "step": 4375 + }, + { + "dpo_loss": 0.6888766288757324, + "epoch": 8.275862068965518, + "grad_norm": 69.32541466192775, + "learning_rate": 4.476355052720013e-08, + "logits": -1.1250083446502686, + "logps": -91.4510269165039, + "loss": 0.0973, + "objective": 0.0973498672246933, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.028462210670113564, + "step": 4380 + }, + { + "dpo_loss": 0.6925772428512573, + "epoch": 8.285309400094473, + "grad_norm": 72.54182169425356, + "learning_rate": 4.4293688410798306e-08, + "logits": -1.2165477275848389, + "logps": -88.94318389892578, + "loss": 0.0972, + "objective": 0.09378346055746078, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.024525735527276993, + "step": 4385 + }, + { + "dpo_loss": 0.6887747049331665, + "epoch": 8.29475673122343, + "grad_norm": 69.46068601996431, + "learning_rate": 4.382606548268658e-08, + "logits": -1.2112553119659424, + "logps": -88.78561401367188, + "loss": 0.0972, + "objective": 0.09837064892053604, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.029493171721696854, + "step": 4390 + }, + { + "dpo_loss": 0.6883238554000854, + "epoch": 8.304204062352385, + "grad_norm": 63.45350469479899, + "learning_rate": 4.336068683311125e-08, + "logits": -1.1967496871948242, + "logps": -89.25916290283203, + "loss": 0.0968, + "objective": 0.0968455821275711, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.028013193979859352, + "step": 4395 + }, + { + "dpo_loss": 0.6897088289260864, + "epoch": 8.313651393481342, + "grad_norm": 64.0026031711914, + "learning_rate": 4.289755752788879e-08, + "logits": -1.2383521795272827, + "logps": -88.59597778320312, + "loss": 0.0963, + "objective": 0.0949353277683258, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.025964435189962387, + "step": 4400 + }, + { + "epoch": 8.313651393481342, + "eval_dpo_loss": 0.7083983421325684, + "eval_logits": -1.1860986948013306, + "eval_logps": -93.68854522705078, + "eval_loss": 0.34339407086372375, + "eval_objective": 0.3419049382209778, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2710651159286499, + "eval_runtime": 154.6301, + "eval_samples_per_second": 37.444, + "eval_steps_per_second": 3.124, + "step": 4400 + }, + { + "dpo_loss": 0.6894680261611938, + "epoch": 8.323098724610297, + "grad_norm": 64.99137183554055, + "learning_rate": 4.2436682608350705e-08, + "logits": -1.1680843830108643, + "logps": -89.59283447265625, + "loss": 0.0934, + "objective": 0.09198366850614548, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5562499761581421, + "regularize": 0.023036859929561615, + "step": 4405 + }, + { + "dpo_loss": 0.6878527402877808, + "epoch": 8.332546055739254, + "grad_norm": 66.27524605172559, + "learning_rate": 4.197806709128865e-08, + "logits": -1.2237894535064697, + "logps": -87.18922424316406, + "loss": 0.0972, + "objective": 0.0979805439710617, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.029195260256528854, + "step": 4410 + }, + { + "dpo_loss": 0.6892653107643127, + "epoch": 8.341993386868209, + "grad_norm": 67.46944259801737, + "learning_rate": 4.152171596890008e-08, + "logits": -1.2152959108352661, + "logps": -88.88487243652344, + "loss": 0.0972, + "objective": 0.09294287860393524, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.48750001192092896, + "regularize": 0.02401634305715561, + "step": 4415 + }, + { + "dpo_loss": 0.6894920468330383, + "epoch": 8.351440717997166, + "grad_norm": 71.97312442790279, + "learning_rate": 4.106763420873349e-08, + "logits": -1.1700403690338135, + "logps": -87.84864807128906, + "loss": 0.0979, + "objective": 0.09823272377252579, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.029283514246344566, + "step": 4420 + }, + { + "dpo_loss": 0.6906135678291321, + "epoch": 8.360888049126121, + "grad_norm": 64.88947011442576, + "learning_rate": 4.061582675363459e-08, + "logits": -1.1943508386611938, + "logps": -86.23715209960938, + "loss": 0.0952, + "objective": 0.09793750196695328, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.028876136988401413, + "step": 4425 + }, + { + "dpo_loss": 0.6880233287811279, + "epoch": 8.370335380255078, + "grad_norm": 66.23358310841932, + "learning_rate": 4.016629852169237e-08, + "logits": -1.197622537612915, + "logps": -87.19839477539062, + "loss": 0.0965, + "objective": 0.09400331228971481, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.025200972333550453, + "step": 4430 + }, + { + "dpo_loss": 0.6888989806175232, + "epoch": 8.379782711384035, + "grad_norm": 69.18798130701948, + "learning_rate": 3.9719054406185806e-08, + "logits": -1.2378427982330322, + "logps": -87.28419494628906, + "loss": 0.0973, + "objective": 0.1004018634557724, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.03151196241378784, + "step": 4435 + }, + { + "dpo_loss": 0.6903999447822571, + "epoch": 8.38923004251299, + "grad_norm": 63.93310100734026, + "learning_rate": 3.927409927553038e-08, + "logits": -1.1226755380630493, + "logps": -88.65518951416016, + "loss": 0.096, + "objective": 0.0965915396809578, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.02755153737962246, + "step": 4440 + }, + { + "dpo_loss": 0.6903192400932312, + "epoch": 8.398677373641947, + "grad_norm": 70.19789859402223, + "learning_rate": 3.8831437973225076e-08, + "logits": -1.1719176769256592, + "logps": -89.19178771972656, + "loss": 0.0954, + "objective": 0.09195469319820404, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.02292276918888092, + "step": 4445 + }, + { + "dpo_loss": 0.692059338092804, + "epoch": 8.408124704770902, + "grad_norm": 67.97213768976081, + "learning_rate": 3.8391075317799784e-08, + "logits": -1.2177083492279053, + "logps": -88.48794555664062, + "loss": 0.0966, + "objective": 0.10071361064910889, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.03150767460465431, + "step": 4450 + }, + { + "epoch": 8.408124704770902, + "eval_dpo_loss": 0.7083788514137268, + "eval_logits": -1.1875168085098267, + "eval_logps": -93.731201171875, + "eval_loss": 0.3433586359024048, + "eval_objective": 0.3419070541858673, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5186335444450378, + "eval_regularize": 0.271069198846817, + "eval_runtime": 154.7595, + "eval_samples_per_second": 37.413, + "eval_steps_per_second": 3.121, + "step": 4450 + }, + { + "dpo_loss": 0.687419056892395, + "epoch": 8.417572035899859, + "grad_norm": 64.10285998797039, + "learning_rate": 3.795301610276269e-08, + "logits": -1.2001512050628662, + "logps": -85.2901611328125, + "loss": 0.0944, + "objective": 0.09056023508310318, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.02181832864880562, + "step": 4455 + }, + { + "dpo_loss": 0.689990758895874, + "epoch": 8.427019367028814, + "grad_norm": 65.93551136370232, + "learning_rate": 3.7517265096548236e-08, + "logits": -1.1889123916625977, + "logps": -91.23262786865234, + "loss": 0.0946, + "objective": 0.09441934525966644, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.02542026713490486, + "step": 4460 + }, + { + "dpo_loss": 0.6894329786300659, + "epoch": 8.436466698157771, + "grad_norm": 68.36833954510912, + "learning_rate": 3.708382704246521e-08, + "logits": -1.2022359371185303, + "logps": -87.67900848388672, + "loss": 0.0967, + "objective": 0.0936957448720932, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.02475244179368019, + "step": 4465 + }, + { + "dpo_loss": 0.6907721757888794, + "epoch": 8.445914029286726, + "grad_norm": 63.76925048241595, + "learning_rate": 3.6652706658645146e-08, + "logits": -1.2040332555770874, + "logps": -87.47300720214844, + "loss": 0.0958, + "objective": 0.0988548994064331, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48124998807907104, + "regularize": 0.029777679592370987, + "step": 4470 + }, + { + "dpo_loss": 0.6902170777320862, + "epoch": 8.455361360415683, + "grad_norm": 69.38300500893175, + "learning_rate": 3.6223908637990686e-08, + "logits": -1.2222057580947876, + "logps": -88.08490753173828, + "loss": 0.0941, + "objective": 0.09459289163351059, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.025571173056960106, + "step": 4475 + }, + { + "dpo_loss": 0.6911331415176392, + "epoch": 8.464808691544638, + "grad_norm": 67.18588794078038, + "learning_rate": 3.579743764812487e-08, + "logits": -1.2379624843597412, + "logps": -88.86549377441406, + "loss": 0.0934, + "objective": 0.0922469049692154, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.02313357964158058, + "step": 4480 + }, + { + "dpo_loss": 0.6889945864677429, + "epoch": 8.474256022673595, + "grad_norm": 64.37437826023299, + "learning_rate": 3.537329833134001e-08, + "logits": -1.2001641988754272, + "logps": -89.17662048339844, + "loss": 0.098, + "objective": 0.09767808020114899, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.028778618201613426, + "step": 4485 + }, + { + "dpo_loss": 0.6907839179039001, + "epoch": 8.48370335380255, + "grad_norm": 72.47472879601983, + "learning_rate": 3.495149530454747e-08, + "logits": -1.2376738786697388, + "logps": -89.4019775390625, + "loss": 0.093, + "objective": 0.09337928891181946, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.024300888180732727, + "step": 4490 + }, + { + "dpo_loss": 0.6883755922317505, + "epoch": 8.493150684931507, + "grad_norm": 69.09675000652916, + "learning_rate": 3.4532033159227174e-08, + "logits": -1.2523037195205688, + "logps": -87.72993469238281, + "loss": 0.0933, + "objective": 0.09463658928871155, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.025799039751291275, + "step": 4495 + }, + { + "dpo_loss": 0.6915408968925476, + "epoch": 8.502598016060462, + "grad_norm": 74.02222044202652, + "learning_rate": 3.4114916461377627e-08, + "logits": -1.148749589920044, + "logps": -87.29940032958984, + "loss": 0.0956, + "objective": 0.09479153901338577, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.025637447834014893, + "step": 4500 + }, + { + "epoch": 8.502598016060462, + "eval_dpo_loss": 0.7081142663955688, + "eval_logits": -1.1866111755371094, + "eval_logps": -93.84306335449219, + "eval_loss": 0.34312811493873596, + "eval_objective": 0.34156593680381775, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5186335444450378, + "eval_regularize": 0.27075451612472534, + "eval_runtime": 157.504, + "eval_samples_per_second": 36.761, + "eval_steps_per_second": 3.067, + "step": 4500 + }, + { + "dpo_loss": 0.6882936954498291, + "epoch": 8.512045347189419, + "grad_norm": 68.0477059878961, + "learning_rate": 3.3700149751466344e-08, + "logits": -1.096351981163025, + "logps": -89.81024169921875, + "loss": 0.096, + "objective": 0.09751152992248535, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.028682153671979904, + "step": 4505 + }, + { + "dpo_loss": 0.6905571222305298, + "epoch": 8.521492678318374, + "grad_norm": 63.99219462642485, + "learning_rate": 3.3287737544380385e-08, + "logits": -1.2304773330688477, + "logps": -87.42345428466797, + "loss": 0.0949, + "objective": 0.09371695667505264, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.024661244824528694, + "step": 4510 + }, + { + "dpo_loss": 0.6905447244644165, + "epoch": 8.530940009447331, + "grad_norm": 66.08184879335815, + "learning_rate": 3.287768432937721e-08, + "logits": -1.169162392616272, + "logps": -88.21883392333984, + "loss": 0.094, + "objective": 0.09295524656772614, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.023900777101516724, + "step": 4515 + }, + { + "dpo_loss": 0.6899552345275879, + "epoch": 8.540387340576288, + "grad_norm": 66.47372887760963, + "learning_rate": 3.246999457003574e-08, + "logits": -1.1336697340011597, + "logps": -87.81645965576172, + "loss": 0.0954, + "objective": 0.09886420518159866, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.029868673533201218, + "step": 4520 + }, + { + "dpo_loss": 0.6897488832473755, + "epoch": 8.549834671705243, + "grad_norm": 62.47832099540411, + "learning_rate": 3.2064672704207765e-08, + "logits": -1.2520087957382202, + "logps": -89.779052734375, + "loss": 0.0954, + "objective": 0.09957768768072128, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.030602801591157913, + "step": 4525 + }, + { + "dpo_loss": 0.6906110048294067, + "epoch": 8.5592820028342, + "grad_norm": 63.79845554953128, + "learning_rate": 3.1661723143969783e-08, + "logits": -1.1387863159179688, + "logps": -87.83059692382812, + "loss": 0.0939, + "objective": 0.09394125640392303, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.02488015405833721, + "step": 4530 + }, + { + "dpo_loss": 0.6908597350120544, + "epoch": 8.568729333963155, + "grad_norm": 64.58466357139119, + "learning_rate": 3.12611502755748e-08, + "logits": -1.224230408668518, + "logps": -90.1174087524414, + "loss": 0.0925, + "objective": 0.09125302731990814, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.02216704748570919, + "step": 4535 + }, + { + "dpo_loss": 0.6905266642570496, + "epoch": 8.578176665092112, + "grad_norm": 72.54050125765384, + "learning_rate": 3.086295845940473e-08, + "logits": -1.2545582056045532, + "logps": -90.31018829345703, + "loss": 0.0953, + "objective": 0.09556666761636734, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.026513999328017235, + "step": 4540 + }, + { + "dpo_loss": 0.689853310585022, + "epoch": 8.587623996221067, + "grad_norm": 63.01988615424287, + "learning_rate": 3.046715202992292e-08, + "logits": -1.239242672920227, + "logps": -87.2964096069336, + "loss": 0.0962, + "objective": 0.09675108641386032, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.027765760198235512, + "step": 4545 + }, + { + "dpo_loss": 0.691806435585022, + "epoch": 8.597071327350024, + "grad_norm": 69.93692600298262, + "learning_rate": 3.007373529562676e-08, + "logits": -1.1182079315185547, + "logps": -88.34652709960938, + "loss": 0.0928, + "objective": 0.09237860143184662, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.02319796197116375, + "step": 4550 + }, + { + "epoch": 8.597071327350024, + "eval_dpo_loss": 0.7083669900894165, + "eval_logits": -1.1858949661254883, + "eval_logps": -93.82425689697266, + "eval_loss": 0.34278836846351624, + "eval_objective": 0.3414183557033539, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5186335444450378, + "eval_regularize": 0.2705816626548767, + "eval_runtime": 154.5947, + "eval_samples_per_second": 37.453, + "eval_steps_per_second": 3.124, + "step": 4550 + }, + { + "dpo_loss": 0.690921425819397, + "epoch": 8.60651865847898, + "grad_norm": 65.50525148475181, + "learning_rate": 2.9682712539001038e-08, + "logits": -1.2246023416519165, + "logps": -89.01949310302734, + "loss": 0.0932, + "objective": 0.09233587980270386, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.023243745788931847, + "step": 4555 + }, + { + "dpo_loss": 0.6897628307342529, + "epoch": 8.615965989607936, + "grad_norm": 70.33173705088662, + "learning_rate": 2.9294088016471124e-08, + "logits": -1.1716398000717163, + "logps": -87.7445297241211, + "loss": 0.0915, + "objective": 0.09164030849933624, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.02266402170062065, + "step": 4560 + }, + { + "dpo_loss": 0.6929219365119934, + "epoch": 8.625413320736891, + "grad_norm": 66.1921684721932, + "learning_rate": 2.8907865958356926e-08, + "logits": -1.2246098518371582, + "logps": -88.78094482421875, + "loss": 0.0953, + "objective": 0.09624224156141281, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.0269500520080328, + "step": 4565 + }, + { + "dpo_loss": 0.6871283650398254, + "epoch": 8.634860651865848, + "grad_norm": 68.83329016660062, + "learning_rate": 2.8524050568826452e-08, + "logits": -1.2258967161178589, + "logps": -88.34210205078125, + "loss": 0.0952, + "objective": 0.09454770386219025, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.02583487331867218, + "step": 4570 + }, + { + "dpo_loss": 0.6881753206253052, + "epoch": 8.644307982994803, + "grad_norm": 64.84871007090842, + "learning_rate": 2.8142646025850313e-08, + "logits": -1.1877790689468384, + "logps": -86.32475280761719, + "loss": 0.0922, + "objective": 0.09216133505105972, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.59375, + "regularize": 0.023343803361058235, + "step": 4575 + }, + { + "dpo_loss": 0.6900415420532227, + "epoch": 8.65375531412376, + "grad_norm": 65.79624102950804, + "learning_rate": 2.7763656481156195e-08, + "logits": -1.1728365421295166, + "logps": -88.63168334960938, + "loss": 0.0932, + "objective": 0.09233821928501129, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.023334065452218056, + "step": 4580 + }, + { + "dpo_loss": 0.6894615292549133, + "epoch": 8.663202645252715, + "grad_norm": 63.83956436960819, + "learning_rate": 2.7387086060183613e-08, + "logits": -1.1962952613830566, + "logps": -88.25006866455078, + "loss": 0.095, + "objective": 0.09368318319320679, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.02473703771829605, + "step": 4585 + }, + { + "dpo_loss": 0.687892735004425, + "epoch": 8.672649976381672, + "grad_norm": 67.74351010340281, + "learning_rate": 2.701293886203912e-08, + "logits": -1.1812070608139038, + "logps": -89.998291015625, + "loss": 0.0951, + "objective": 0.0939764752984047, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5687500238418579, + "regularize": 0.02518719993531704, + "step": 4590 + }, + { + "dpo_loss": 0.6898320317268372, + "epoch": 8.682097307510627, + "grad_norm": 68.21667103987487, + "learning_rate": 2.664121895945151e-08, + "logits": -1.098860502243042, + "logps": -89.40926361083984, + "loss": 0.0924, + "objective": 0.09196336567401886, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.02298017032444477, + "step": 4595 + }, + { + "dpo_loss": 0.6876360774040222, + "epoch": 8.691544638639584, + "grad_norm": 67.16139911459766, + "learning_rate": 2.627193039872763e-08, + "logits": -1.2400391101837158, + "logps": -88.00350189208984, + "loss": 0.0924, + "objective": 0.09190518409013748, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.023141566663980484, + "step": 4600 + }, + { + "epoch": 8.691544638639584, + "eval_dpo_loss": 0.7081586122512817, + "eval_logits": -1.1870571374893188, + "eval_logps": -93.7706069946289, + "eval_loss": 0.34183570742607117, + "eval_objective": 0.3406393826007843, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5186335444450378, + "eval_regularize": 0.26982349157333374, + "eval_runtime": 154.9258, + "eval_samples_per_second": 37.373, + "eval_steps_per_second": 3.118, + "step": 4600 + }, + { + "dpo_loss": 0.689667820930481, + "epoch": 8.700991969768541, + "grad_norm": 66.78585103551504, + "learning_rate": 2.590507719970822e-08, + "logits": -1.238919734954834, + "logps": -88.60655212402344, + "loss": 0.0921, + "objective": 0.0904478058218956, + "ranking_idealized": 0.6312500238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.021481022238731384, + "step": 4605 + }, + { + "dpo_loss": 0.6921404004096985, + "epoch": 8.710439300897496, + "grad_norm": 64.9738940503894, + "learning_rate": 2.5540663355724247e-08, + "logits": -1.2821037769317627, + "logps": -87.18733215332031, + "loss": 0.092, + "objective": 0.09340129792690277, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48750001192092896, + "regularize": 0.02418726123869419, + "step": 4610 + }, + { + "dpo_loss": 0.6893884539604187, + "epoch": 8.719886632026453, + "grad_norm": 65.1514547817796, + "learning_rate": 2.517869283355345e-08, + "logits": -1.2021890878677368, + "logps": -88.43559265136719, + "loss": 0.0924, + "objective": 0.09232644736766815, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.023387601599097252, + "step": 4615 + }, + { + "dpo_loss": 0.690017819404602, + "epoch": 8.729333963155408, + "grad_norm": 67.83285187738235, + "learning_rate": 2.4819169573377152e-08, + "logits": -1.2051613330841064, + "logps": -91.49044036865234, + "loss": 0.0914, + "objective": 0.09051535278558731, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.021513575688004494, + "step": 4620 + }, + { + "dpo_loss": 0.6906439065933228, + "epoch": 8.738781294284365, + "grad_norm": 67.69617898227318, + "learning_rate": 2.4462097488737232e-08, + "logits": -1.1643754243850708, + "logps": -88.56101989746094, + "loss": 0.0925, + "objective": 0.09072182327508926, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.021657422184944153, + "step": 4625 + }, + { + "dpo_loss": 0.6884030103683472, + "epoch": 8.74822862541332, + "grad_norm": 72.77658648446189, + "learning_rate": 2.410748046649366e-08, + "logits": -1.2056032419204712, + "logps": -88.59508514404297, + "loss": 0.0937, + "objective": 0.09496771544218063, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.026127416640520096, + "step": 4630 + }, + { + "dpo_loss": 0.6875420212745667, + "epoch": 8.757675956542277, + "grad_norm": 66.43966114497935, + "learning_rate": 2.3755322366782154e-08, + "logits": -1.2251002788543701, + "logps": -92.06501007080078, + "loss": 0.0947, + "objective": 0.09579209983348846, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.02703789807856083, + "step": 4635 + }, + { + "dpo_loss": 0.6887904405593872, + "epoch": 8.767123287671232, + "grad_norm": 75.24456536798026, + "learning_rate": 2.340562702297222e-08, + "logits": -1.1932998895645142, + "logps": -88.05471801757812, + "loss": 0.0891, + "objective": 0.08944825083017349, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.0205692071467638, + "step": 4640 + }, + { + "dpo_loss": 0.6870555877685547, + "epoch": 8.77657061880019, + "grad_norm": 72.27399298291411, + "learning_rate": 2.3058398241625282e-08, + "logits": -1.1654350757598877, + "logps": -88.93949890136719, + "loss": 0.0922, + "objective": 0.09154076874256134, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4749999940395355, + "regularize": 0.022835206240415573, + "step": 4645 + }, + { + "dpo_loss": 0.6922885775566101, + "epoch": 8.786017949929144, + "grad_norm": 67.30299296912347, + "learning_rate": 2.2713639802453416e-08, + "logits": -1.1404081583023071, + "logps": -88.39984130859375, + "loss": 0.0908, + "objective": 0.09168007224798203, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4749999940395355, + "regularize": 0.022451210767030716, + "step": 4650 + }, + { + "epoch": 8.786017949929144, + "eval_dpo_loss": 0.7079064249992371, + "eval_logits": -1.1871592998504639, + "eval_logps": -93.74046325683594, + "eval_loss": 0.34150078892707825, + "eval_objective": 0.3402964472770691, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2695057988166809, + "eval_runtime": 155.9316, + "eval_samples_per_second": 37.132, + "eval_steps_per_second": 3.098, + "step": 4650 + }, + { + "dpo_loss": 0.6876232624053955, + "epoch": 8.795465281058101, + "grad_norm": 65.15183634112627, + "learning_rate": 2.2371355458278007e-08, + "logits": -1.2236615419387817, + "logps": -88.56582641601562, + "loss": 0.0891, + "objective": 0.0867016464471817, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.01793932355940342, + "step": 4655 + }, + { + "dpo_loss": 0.6892760396003723, + "epoch": 8.804912612187056, + "grad_norm": 68.8055375349853, + "learning_rate": 2.2031548934989126e-08, + "logits": -1.1710513830184937, + "logps": -89.77734375, + "loss": 0.0933, + "objective": 0.09406758844852448, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.025139978155493736, + "step": 4660 + }, + { + "dpo_loss": 0.6892111897468567, + "epoch": 8.814359943316013, + "grad_norm": 63.889831491213435, + "learning_rate": 2.1694223931504883e-08, + "logits": -1.2380783557891846, + "logps": -87.5960922241211, + "loss": 0.0899, + "objective": 0.08907774835824966, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.4312500059604645, + "ranking_simple": 0.42500001192092896, + "regularize": 0.02015662007033825, + "step": 4665 + }, + { + "dpo_loss": 0.6887243986129761, + "epoch": 8.823807274444968, + "grad_norm": 67.27570613787965, + "learning_rate": 2.1359384119731012e-08, + "logits": -1.2172998189926147, + "logps": -87.30493927001953, + "loss": 0.0912, + "objective": 0.08973757922649384, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.02086513489484787, + "step": 4670 + }, + { + "dpo_loss": 0.6899820566177368, + "epoch": 8.833254605573925, + "grad_norm": 63.128167239434276, + "learning_rate": 2.102703314452106e-08, + "logits": -1.1707627773284912, + "logps": -88.51395416259766, + "loss": 0.0909, + "objective": 0.0940367728471756, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5687500238418579, + "regularize": 0.025038564577698708, + "step": 4675 + }, + { + "dpo_loss": 0.688649594783783, + "epoch": 8.84270193670288, + "grad_norm": 63.2769665589679, + "learning_rate": 2.069717462363679e-08, + "logits": -1.16934335231781, + "logps": -87.5626449584961, + "loss": 0.0893, + "objective": 0.08972381055355072, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.45625001192092896, + "ranking_simple": 0.45625001192092896, + "regularize": 0.020858842879533768, + "step": 4680 + }, + { + "dpo_loss": 0.6877025365829468, + "epoch": 8.852149267831837, + "grad_norm": 64.74207762173776, + "learning_rate": 2.03698121477085e-08, + "logits": -1.2021405696868896, + "logps": -86.36474609375, + "loss": 0.0891, + "objective": 0.08947821706533432, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5062500238418579, + "regularize": 0.020707957446575165, + "step": 4685 + }, + { + "dpo_loss": 0.6896892786026001, + "epoch": 8.861596598960794, + "grad_norm": 73.692173393817, + "learning_rate": 2.0044949280196317e-08, + "logits": -1.133115530014038, + "logps": -86.93243408203125, + "loss": 0.0912, + "objective": 0.09109000116586685, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.022121066227555275, + "step": 4690 + }, + { + "dpo_loss": 0.6885090470314026, + "epoch": 8.87104393008975, + "grad_norm": 61.78689892348044, + "learning_rate": 1.9722589557351093e-08, + "logits": -1.2453858852386475, + "logps": -86.93772888183594, + "loss": 0.0915, + "objective": 0.08757088333368301, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.018719974905252457, + "step": 4695 + }, + { + "dpo_loss": 0.6881387233734131, + "epoch": 8.880491261218706, + "grad_norm": 67.67673378303367, + "learning_rate": 1.940273648817617e-08, + "logits": -1.2218042612075806, + "logps": -90.48210144042969, + "loss": 0.0922, + "objective": 0.09271275252103806, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.023898882791399956, + "step": 4700 + }, + { + "epoch": 8.880491261218706, + "eval_dpo_loss": 0.7078229784965515, + "eval_logits": -1.1888352632522583, + "eval_logps": -93.71256256103516, + "eval_loss": 0.3418760597705841, + "eval_objective": 0.3405354917049408, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.26975318789482117, + "eval_runtime": 154.247, + "eval_samples_per_second": 37.537, + "eval_steps_per_second": 3.131, + "step": 4700 + }, + { + "dpo_loss": 0.6872705817222595, + "epoch": 8.889938592347661, + "grad_norm": 66.00163288743991, + "learning_rate": 1.9085393554388872e-08, + "logits": -1.1716687679290771, + "logps": -85.67667388916016, + "loss": 0.0886, + "objective": 0.09024933725595474, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.021522274240851402, + "step": 4705 + }, + { + "dpo_loss": 0.6918772459030151, + "epoch": 8.899385923476618, + "grad_norm": 68.31292223070808, + "learning_rate": 1.8770564210382867e-08, + "logits": -1.1566064357757568, + "logps": -89.57164001464844, + "loss": 0.0907, + "objective": 0.0918797180056572, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.022691994905471802, + "step": 4710 + }, + { + "dpo_loss": 0.6887393593788147, + "epoch": 8.908833254605574, + "grad_norm": 70.38766796803583, + "learning_rate": 1.8458251883190557e-08, + "logits": -1.0890405178070068, + "logps": -86.96800231933594, + "loss": 0.0921, + "objective": 0.09513839334249496, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.41874998807907104, + "ranking_simple": 0.42500001192092896, + "regularize": 0.026264458894729614, + "step": 4715 + }, + { + "dpo_loss": 0.6884477138519287, + "epoch": 8.91828058573453, + "grad_norm": 69.40217443634405, + "learning_rate": 1.8148459972445574e-08, + "logits": -1.1955578327178955, + "logps": -88.05402374267578, + "loss": 0.0907, + "objective": 0.08991407603025436, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.59375, + "regularize": 0.021069305017590523, + "step": 4720 + }, + { + "dpo_loss": 0.6898118257522583, + "epoch": 8.927727916863486, + "grad_norm": 60.67398784751232, + "learning_rate": 1.7841191850345966e-08, + "logits": -1.216772198677063, + "logps": -86.17524719238281, + "loss": 0.0912, + "objective": 0.09173218905925751, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.022751014679670334, + "step": 4725 + }, + { + "dpo_loss": 0.6895357370376587, + "epoch": 8.937175247992442, + "grad_norm": 63.314698923503244, + "learning_rate": 1.753645086161737e-08, + "logits": -1.1994549036026, + "logps": -89.67224884033203, + "loss": 0.0928, + "objective": 0.09444586932659149, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.025492290034890175, + "step": 4730 + }, + { + "dpo_loss": 0.6899622678756714, + "epoch": 8.946622579121398, + "grad_norm": 69.25838644392168, + "learning_rate": 1.7234240323476702e-08, + "logits": -1.2246150970458984, + "logps": -86.9800796508789, + "loss": 0.0901, + "objective": 0.0887894406914711, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.48750001192092896, + "regularize": 0.019793204963207245, + "step": 4735 + }, + { + "dpo_loss": 0.6905208826065063, + "epoch": 8.956069910250354, + "grad_norm": 68.50166748835065, + "learning_rate": 1.6934563525596052e-08, + "logits": -1.158728837966919, + "logps": -88.60356140136719, + "loss": 0.0911, + "objective": 0.08902458846569061, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.019972508773207664, + "step": 4740 + }, + { + "dpo_loss": 0.6890329122543335, + "epoch": 8.96551724137931, + "grad_norm": 67.0223995041343, + "learning_rate": 1.6637423730066685e-08, + "logits": -1.2964181900024414, + "logps": -88.4970474243164, + "loss": 0.0893, + "objective": 0.0878550335764885, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.01895173452794552, + "step": 4745 + }, + { + "dpo_loss": 0.6893103718757629, + "epoch": 8.974964572508267, + "grad_norm": 67.04461498936297, + "learning_rate": 1.634282417136376e-08, + "logits": -1.1936671733856201, + "logps": -88.7967758178711, + "loss": 0.0895, + "objective": 0.08902595937252045, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.550000011920929, + "regularize": 0.020094921812415123, + "step": 4750 + }, + { + "epoch": 8.974964572508267, + "eval_dpo_loss": 0.7079932689666748, + "eval_logits": -1.1886335611343384, + "eval_logps": -93.7926025390625, + "eval_loss": 0.34172844886779785, + "eval_objective": 0.3402238190174103, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.2694244980812073, + "eval_runtime": 154.4216, + "eval_samples_per_second": 37.495, + "eval_steps_per_second": 3.128, + "step": 4750 + }, + { + "dpo_loss": 0.6871371269226074, + "epoch": 8.984411903637223, + "grad_norm": 62.54869581796573, + "learning_rate": 1.6050768056311033e-08, + "logits": -1.1841161251068115, + "logps": -87.77922058105469, + "loss": 0.0875, + "objective": 0.08825816214084625, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.0195444468408823, + "step": 4755 + }, + { + "dpo_loss": 0.6882451176643372, + "epoch": 8.993859234766179, + "grad_norm": 69.0916030923187, + "learning_rate": 1.5761258564045838e-08, + "logits": -1.2951503992080688, + "logps": -88.92320251464844, + "loss": 0.0891, + "objective": 0.08905212581157684, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.020227614790201187, + "step": 4760 + }, + { + "dpo_loss": 0.6898214817047119, + "epoch": 9.003306565895135, + "grad_norm": 62.98625686677879, + "learning_rate": 1.547429884598478e-08, + "logits": -1.1931416988372803, + "logps": -87.09309387207031, + "loss": 0.0897, + "objective": 0.09113426506519318, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.02215210348367691, + "step": 4765 + }, + { + "dpo_loss": 0.6886254549026489, + "epoch": 9.01275389702409, + "grad_norm": 62.10509231599403, + "learning_rate": 1.5189892025789047e-08, + "logits": -1.2121164798736572, + "logps": -88.56450653076172, + "loss": 0.0865, + "objective": 0.08668197691440582, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.01781943440437317, + "step": 4770 + }, + { + "dpo_loss": 0.6889467239379883, + "epoch": 9.022201228153047, + "grad_norm": 76.6790638644144, + "learning_rate": 1.4908041199330785e-08, + "logits": -1.2352068424224854, + "logps": -86.6780776977539, + "loss": 0.0862, + "objective": 0.08484284579753876, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.015948180109262466, + "step": 4775 + }, + { + "dpo_loss": 0.6895532608032227, + "epoch": 9.031648559282003, + "grad_norm": 68.78517673168774, + "learning_rate": 1.4628749434659082e-08, + "logits": -1.2529716491699219, + "logps": -88.34037780761719, + "loss": 0.0879, + "objective": 0.08553078025579453, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.01657545007765293, + "step": 4780 + }, + { + "dpo_loss": 0.6879485845565796, + "epoch": 9.04109589041096, + "grad_norm": 67.0168472829834, + "learning_rate": 1.4352019771966707e-08, + "logits": -1.2469146251678467, + "logps": -87.65064239501953, + "loss": 0.0876, + "objective": 0.0845494493842125, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.48750001192092896, + "regularize": 0.015754589810967445, + "step": 4785 + }, + { + "dpo_loss": 0.6894162893295288, + "epoch": 9.050543221539915, + "grad_norm": 66.53150175740558, + "learning_rate": 1.4077855223557117e-08, + "logits": -1.2380765676498413, + "logps": -89.157958984375, + "loss": 0.0883, + "objective": 0.08610600978136063, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.017164375633001328, + "step": 4790 + }, + { + "dpo_loss": 0.6880242824554443, + "epoch": 9.059990552668872, + "grad_norm": 63.93016707524342, + "learning_rate": 1.3806258773811475e-08, + "logits": -1.180908441543579, + "logps": -87.93473052978516, + "loss": 0.0898, + "objective": 0.09052032232284546, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.021717887371778488, + "step": 4795 + }, + { + "dpo_loss": 0.6889215707778931, + "epoch": 9.069437883797827, + "grad_norm": 62.80651843785913, + "learning_rate": 1.3537233379156298e-08, + "logits": -1.2439289093017578, + "logps": -86.09716796875, + "loss": 0.0877, + "objective": 0.085892453789711, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.017000291496515274, + "step": 4800 + }, + { + "epoch": 9.069437883797827, + "eval_dpo_loss": 0.7083116173744202, + "eval_logits": -1.1891411542892456, + "eval_logps": -93.75226593017578, + "eval_loss": 0.34253185987472534, + "eval_objective": 0.34146299958229065, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.2706318497657776, + "eval_runtime": 155.8804, + "eval_samples_per_second": 37.144, + "eval_steps_per_second": 3.099, + "step": 4800 + }, + { + "dpo_loss": 0.6885039210319519, + "epoch": 9.078885214926784, + "grad_norm": 66.81682953585893, + "learning_rate": 1.3270781968031109e-08, + "logits": -1.2414919137954712, + "logps": -88.50529479980469, + "loss": 0.0875, + "objective": 0.09152210503816605, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5062500238418579, + "regularize": 0.022671718150377274, + "step": 4805 + }, + { + "dpo_loss": 0.6892110109329224, + "epoch": 9.088332546055739, + "grad_norm": 65.02550523359547, + "learning_rate": 1.300690744085689e-08, + "logits": -1.1340454816818237, + "logps": -86.27638244628906, + "loss": 0.0862, + "objective": 0.08667820692062378, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.017757095396518707, + "step": 4810 + }, + { + "dpo_loss": 0.6907178163528442, + "epoch": 9.097779877184696, + "grad_norm": 68.9857610536704, + "learning_rate": 1.2745612670004152e-08, + "logits": -1.1740095615386963, + "logps": -87.9771957397461, + "loss": 0.086, + "objective": 0.08404810726642609, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.014976325444877148, + "step": 4815 + }, + { + "dpo_loss": 0.6885272264480591, + "epoch": 9.10722720831365, + "grad_norm": 63.104116776332255, + "learning_rate": 1.2486900499761894e-08, + "logits": -1.2118504047393799, + "logps": -88.93104553222656, + "loss": 0.0882, + "objective": 0.08766726404428482, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.01881454512476921, + "step": 4820 + }, + { + "dpo_loss": 0.6883677840232849, + "epoch": 9.116674539442608, + "grad_norm": 64.89314557005927, + "learning_rate": 1.223077374630646e-08, + "logits": -1.1904551982879639, + "logps": -89.31713104248047, + "loss": 0.0865, + "objective": 0.08571527153253555, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.016878489404916763, + "step": 4825 + }, + { + "dpo_loss": 0.6899958848953247, + "epoch": 9.126121870571563, + "grad_norm": 63.79886310014757, + "learning_rate": 1.1977235197671104e-08, + "logits": -1.2142480611801147, + "logps": -88.31678771972656, + "loss": 0.0862, + "objective": 0.08748110383749008, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.018481507897377014, + "step": 4830 + }, + { + "dpo_loss": 0.688530445098877, + "epoch": 9.13556920170052, + "grad_norm": 64.41359209913031, + "learning_rate": 1.1726287613715441e-08, + "logits": -1.1784677505493164, + "logps": -87.01527404785156, + "loss": 0.088, + "objective": 0.08827238529920578, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.019419338554143906, + "step": 4835 + }, + { + "dpo_loss": 0.6870613098144531, + "epoch": 9.145016532829477, + "grad_norm": 67.04834827209164, + "learning_rate": 1.147793372609554e-08, + "logits": -1.1923383474349976, + "logps": -90.55400848388672, + "loss": 0.0868, + "objective": 0.0859370082616806, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.017230866476893425, + "step": 4840 + }, + { + "dpo_loss": 0.6901251673698425, + "epoch": 9.154463863958432, + "grad_norm": 65.08175102578413, + "learning_rate": 1.1232176238234109e-08, + "logits": -1.1326186656951904, + "logps": -87.73515319824219, + "loss": 0.0845, + "objective": 0.08397001028060913, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.014957490377128124, + "step": 4845 + }, + { + "dpo_loss": 0.6900922060012817, + "epoch": 9.163911195087389, + "grad_norm": 69.47769734946932, + "learning_rate": 1.0989017825291159e-08, + "logits": -1.2422475814819336, + "logps": -90.37779235839844, + "loss": 0.0862, + "objective": 0.08490969240665436, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.015900466591119766, + "step": 4850 + }, + { + "epoch": 9.163911195087389, + "eval_dpo_loss": 0.7081762552261353, + "eval_logits": -1.1894125938415527, + "eval_logps": -93.8492431640625, + "eval_loss": 0.34226810932159424, + "eval_objective": 0.340609073638916, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.520703911781311, + "eval_regularize": 0.26979145407676697, + "eval_runtime": 157.3259, + "eval_samples_per_second": 36.803, + "eval_steps_per_second": 3.07, + "step": 4850 + }, + { + "dpo_loss": 0.6883929967880249, + "epoch": 9.173358526216344, + "grad_norm": 67.70285222701611, + "learning_rate": 1.0748461134134746e-08, + "logits": -1.1807101964950562, + "logps": -88.82564544677734, + "loss": 0.0851, + "objective": 0.08623015880584717, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.017390865832567215, + "step": 4855 + }, + { + "dpo_loss": 0.6887978911399841, + "epoch": 9.1828058573453, + "grad_norm": 70.304720336417, + "learning_rate": 1.0510508783312221e-08, + "logits": -1.1540125608444214, + "logps": -88.77113342285156, + "loss": 0.0862, + "objective": 0.0866982564330101, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.5, + "regularize": 0.017818449065089226, + "step": 4860 + }, + { + "dpo_loss": 0.689242959022522, + "epoch": 9.192253188474256, + "grad_norm": 71.42256439142606, + "learning_rate": 1.0275163363021865e-08, + "logits": -1.275807499885559, + "logps": -87.94988250732422, + "loss": 0.0878, + "objective": 0.09097422659397125, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.0220499224960804, + "step": 4865 + }, + { + "dpo_loss": 0.6900553107261658, + "epoch": 9.201700519603213, + "grad_norm": 63.33565072444968, + "learning_rate": 1.0042427435084433e-08, + "logits": -1.1935269832611084, + "logps": -89.87028503417969, + "loss": 0.0855, + "objective": 0.08483897894620895, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.5062500238418579, + "regularize": 0.01583344303071499, + "step": 4870 + }, + { + "dpo_loss": 0.6895790100097656, + "epoch": 9.211147850732168, + "grad_norm": 66.58911224536496, + "learning_rate": 9.812303532915483e-09, + "logits": -1.0958967208862305, + "logps": -88.036376953125, + "loss": 0.086, + "objective": 0.08777010440826416, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.6312500238418579, + "ranking_simple": 0.6312500238418579, + "regularize": 0.018812203779816628, + "step": 4875 + }, + { + "dpo_loss": 0.6897131204605103, + "epoch": 9.220595181861125, + "grad_norm": 67.84574166096418, + "learning_rate": 9.584794161497656e-09, + "logits": -1.234770655632019, + "logps": -88.74861145019531, + "loss": 0.0879, + "objective": 0.08676841855049133, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.017797108739614487, + "step": 4880 + }, + { + "dpo_loss": 0.6893714666366577, + "epoch": 9.23004251299008, + "grad_norm": 65.59648173263587, + "learning_rate": 9.359901797353465e-09, + "logits": -1.24154794216156, + "logps": -84.87870025634766, + "loss": 0.0879, + "objective": 0.08753690123558044, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.01859975792467594, + "step": 4885 + }, + { + "dpo_loss": 0.6893272399902344, + "epoch": 9.239489844119037, + "grad_norm": 70.44821865321937, + "learning_rate": 9.13762888851849e-09, + "logits": -1.277855634689331, + "logps": -87.74569702148438, + "loss": 0.0865, + "objective": 0.0885370597243309, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.019604332745075226, + "step": 4890 + }, + { + "dpo_loss": 0.6910862326622009, + "epoch": 9.248937175247992, + "grad_norm": 67.9554426716988, + "learning_rate": 8.917977854514452e-09, + "logits": -1.1983087062835693, + "logps": -88.98271179199219, + "loss": 0.0881, + "objective": 0.08856123685836792, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.01945260725915432, + "step": 4895 + }, + { + "dpo_loss": 0.6897703409194946, + "epoch": 9.258384506376949, + "grad_norm": 66.2538061963501, + "learning_rate": 8.700951086323016e-09, + "logits": -1.2099605798721313, + "logps": -88.5633773803711, + "loss": 0.0856, + "objective": 0.08235849440097809, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.4625000059604645, + "regularize": 0.013381464406847954, + "step": 4900 + }, + { + "epoch": 9.258384506376949, + "eval_dpo_loss": 0.7080788016319275, + "eval_logits": -1.1882998943328857, + "eval_logps": -93.84528350830078, + "eval_loss": 0.3417190909385681, + "eval_objective": 0.3403823971748352, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.26957446336746216, + "eval_runtime": 155.2113, + "eval_samples_per_second": 37.304, + "eval_steps_per_second": 3.112, + "step": 4900 + }, + { + "dpo_loss": 0.6926195025444031, + "epoch": 9.267831837505904, + "grad_norm": 62.961062924907445, + "learning_rate": 8.486550946359777e-09, + "logits": -1.233040452003479, + "logps": -85.98373413085938, + "loss": 0.0863, + "objective": 0.08583666384220123, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4437499940395355, + "ranking_simple": 0.4437499940395355, + "regularize": 0.01657470501959324, + "step": 4905 + }, + { + "dpo_loss": 0.6889017820358276, + "epoch": 9.27727916863486, + "grad_norm": 68.99308296487503, + "learning_rate": 8.274779768448482e-09, + "logits": -1.1674201488494873, + "logps": -86.79928588867188, + "loss": 0.0858, + "objective": 0.08492042124271393, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.016030244529247284, + "step": 4910 + }, + { + "dpo_loss": 0.6884921789169312, + "epoch": 9.286726499763816, + "grad_norm": 68.69978118047693, + "learning_rate": 8.065639857795791e-09, + "logits": -1.1256113052368164, + "logps": -87.06742095947266, + "loss": 0.0876, + "objective": 0.08750703185796738, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.01865781843662262, + "step": 4915 + }, + { + "dpo_loss": 0.6900587677955627, + "epoch": 9.296173830892773, + "grad_norm": 64.29991429398892, + "learning_rate": 7.85913349096587e-09, + "logits": -1.2107371091842651, + "logps": -89.1092529296875, + "loss": 0.0864, + "objective": 0.08681637793779373, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5562499761581421, + "regularize": 0.017810503020882607, + "step": 4920 + }, + { + "dpo_loss": 0.6903694868087769, + "epoch": 9.30562116202173, + "grad_norm": 68.53547054793829, + "learning_rate": 7.655262915855998e-09, + "logits": -1.1062363386154175, + "logps": -87.64759063720703, + "loss": 0.0856, + "objective": 0.08366571366786957, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.014628760516643524, + "step": 4925 + }, + { + "dpo_loss": 0.6893365979194641, + "epoch": 9.315068493150685, + "grad_norm": 66.0146634728705, + "learning_rate": 7.454030351671748e-09, + "logits": -1.1078659296035767, + "logps": -90.8432388305664, + "loss": 0.087, + "objective": 0.08709698170423508, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5687500238418579, + "regularize": 0.018163317814469337, + "step": 4930 + }, + { + "dpo_loss": 0.6889593005180359, + "epoch": 9.324515824279642, + "grad_norm": 69.06842401030131, + "learning_rate": 7.255437988903046e-09, + "logits": -1.1686851978302002, + "logps": -90.20281219482422, + "loss": 0.0859, + "objective": 0.08375723659992218, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.014861305244266987, + "step": 4935 + }, + { + "dpo_loss": 0.6888924837112427, + "epoch": 9.333963155408597, + "grad_norm": 65.29947499443276, + "learning_rate": 7.059487989300256e-09, + "logits": -1.1724879741668701, + "logps": -86.79241943359375, + "loss": 0.0857, + "objective": 0.08658678084611893, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.017697524279356003, + "step": 4940 + }, + { + "dpo_loss": 0.6898183822631836, + "epoch": 9.343410486537554, + "grad_norm": 64.63096002919885, + "learning_rate": 6.866182485850663e-09, + "logits": -1.15725576877594, + "logps": -88.48844909667969, + "loss": 0.0863, + "objective": 0.08518817275762558, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.016206329688429832, + "step": 4945 + }, + { + "dpo_loss": 0.6934045553207397, + "epoch": 9.352857817666509, + "grad_norm": 69.34382914541001, + "learning_rate": 6.6755235827552215e-09, + "logits": -1.1436889171600342, + "logps": -89.59980773925781, + "loss": 0.0883, + "objective": 0.08872490376234055, + "ranking_idealized": 0.4937500059604645, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4625000059604645, + "regularize": 0.019384462386369705, + "step": 4950 + }, + { + "epoch": 9.352857817666509, + "eval_dpo_loss": 0.7080335021018982, + "eval_logits": -1.1885579824447632, + "eval_logps": -93.87728118896484, + "eval_loss": 0.3414333164691925, + "eval_objective": 0.3400879502296448, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5201863646507263, + "eval_regularize": 0.26928460597991943, + "eval_runtime": 154.478, + "eval_samples_per_second": 37.481, + "eval_steps_per_second": 3.127, + "step": 4950 + }, + { + "dpo_loss": 0.6904705762863159, + "epoch": 9.362305148795466, + "grad_norm": 66.85204937019242, + "learning_rate": 6.487513355405705e-09, + "logits": -1.117248773574829, + "logps": -88.00492858886719, + "loss": 0.0857, + "objective": 0.08591008931398392, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.016863025724887848, + "step": 4955 + }, + { + "dpo_loss": 0.6898153424263, + "epoch": 9.371752479924421, + "grad_norm": 62.29407776791308, + "learning_rate": 6.3021538503620075e-09, + "logits": -1.203830599784851, + "logps": -90.52922821044922, + "loss": 0.0855, + "objective": 0.08534902334213257, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.016367487609386444, + "step": 4960 + }, + { + "dpo_loss": 0.6877592206001282, + "epoch": 9.381199811053378, + "grad_norm": 62.92032475377164, + "learning_rate": 6.119447085330048e-09, + "logits": -1.2359322309494019, + "logps": -89.5292739868164, + "loss": 0.0884, + "objective": 0.0865122601389885, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.017736343666911125, + "step": 4965 + }, + { + "dpo_loss": 0.6872529983520508, + "epoch": 9.390647142182333, + "grad_norm": 69.50126731961326, + "learning_rate": 5.939395049139678e-09, + "logits": -1.2220096588134766, + "logps": -89.81663513183594, + "loss": 0.0871, + "objective": 0.0878627598285675, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.019137457013130188, + "step": 4970 + }, + { + "dpo_loss": 0.689234733581543, + "epoch": 9.40009447331129, + "grad_norm": 67.6468971762299, + "learning_rate": 5.761999701723003e-09, + "logits": -1.2347389459609985, + "logps": -88.4090347290039, + "loss": 0.0856, + "objective": 0.08394335210323334, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.015019873157143593, + "step": 4975 + }, + { + "dpo_loss": 0.689188539981842, + "epoch": 9.409541804440245, + "grad_norm": 69.29456704780728, + "learning_rate": 5.5872629740931765e-09, + "logits": -1.157482624053955, + "logps": -87.40362548828125, + "loss": 0.0838, + "objective": 0.08270417898893356, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.59375, + "regularize": 0.013785332441329956, + "step": 4980 + }, + { + "dpo_loss": 0.6888192892074585, + "epoch": 9.418989135569202, + "grad_norm": 64.98356997489397, + "learning_rate": 5.415186768323171e-09, + "logits": -1.21011483669281, + "logps": -87.56295013427734, + "loss": 0.0864, + "objective": 0.08518704771995544, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5062500238418579, + "regularize": 0.01630512624979019, + "step": 4985 + }, + { + "dpo_loss": 0.6896821856498718, + "epoch": 9.428436466698157, + "grad_norm": 78.49982821743039, + "learning_rate": 5.245772957525402e-09, + "logits": -1.18148672580719, + "logps": -88.74088287353516, + "loss": 0.0861, + "objective": 0.08624227344989777, + "ranking_idealized": 0.543749988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.017274051904678345, + "step": 4990 + }, + { + "dpo_loss": 0.6909292936325073, + "epoch": 9.437883797827114, + "grad_norm": 65.18298031190771, + "learning_rate": 5.079023385830938e-09, + "logits": -1.1286699771881104, + "logps": -87.91972351074219, + "loss": 0.0849, + "objective": 0.08399278670549393, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.581250011920929, + "regularize": 0.014899860136210918, + "step": 4995 + }, + { + "dpo_loss": 0.6874872446060181, + "epoch": 9.447331128956069, + "grad_norm": 70.39639898119631, + "learning_rate": 4.914939868369855e-09, + "logits": -1.1657907962799072, + "logps": -87.12093353271484, + "loss": 0.0866, + "objective": 0.08511563390493393, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.53125, + "regularize": 0.016366904601454735, + "step": 5000 + }, + { + "epoch": 9.447331128956069, + "eval_dpo_loss": 0.7080661654472351, + "eval_logits": -1.1880360841751099, + "eval_logps": -93.85932922363281, + "eval_loss": 0.3414228558540344, + "eval_objective": 0.34020325541496277, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.26939666271209717, + "eval_runtime": 154.8973, + "eval_samples_per_second": 37.38, + "eval_steps_per_second": 3.118, + "step": 5000 + }, + { + "dpo_loss": 0.6903454661369324, + "epoch": 9.456778460085026, + "grad_norm": 68.84991082457346, + "learning_rate": 4.753524191251052e-09, + "logits": -1.2496955394744873, + "logps": -87.48423767089844, + "loss": 0.0846, + "objective": 0.08353155106306076, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.014497010037302971, + "step": 5005 + }, + { + "dpo_loss": 0.6901063323020935, + "epoch": 9.466225791213983, + "grad_norm": 66.2623062946029, + "learning_rate": 4.5947781115431015e-09, + "logits": -1.1387720108032227, + "logps": -87.15850830078125, + "loss": 0.0857, + "objective": 0.08711527287960052, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4437499940395355, + "regularize": 0.018104637041687965, + "step": 5010 + }, + { + "dpo_loss": 0.6906918287277222, + "epoch": 9.475673122342938, + "grad_norm": 71.40206387464819, + "learning_rate": 4.438703357255047e-09, + "logits": -1.1895406246185303, + "logps": -89.1908950805664, + "loss": 0.0843, + "objective": 0.08377130329608917, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4937500059604645, + "regularize": 0.014702118933200836, + "step": 5015 + }, + { + "dpo_loss": 0.6910804510116577, + "epoch": 9.485120453471895, + "grad_norm": 63.96037085446238, + "learning_rate": 4.285301627317578e-09, + "logits": -1.2459213733673096, + "logps": -90.33760833740234, + "loss": 0.0857, + "objective": 0.08324258774518967, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5562499761581421, + "regularize": 0.0141345439478755, + "step": 5020 + }, + { + "dpo_loss": 0.6904935240745544, + "epoch": 9.49456778460085, + "grad_norm": 65.44362349914937, + "learning_rate": 4.1345745915644935e-09, + "logits": -1.1675902605056763, + "logps": -87.94022369384766, + "loss": 0.0851, + "objective": 0.08558507263660431, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4625000059604645, + "regularize": 0.016535717993974686, + "step": 5025 + }, + { + "dpo_loss": 0.6895044445991516, + "epoch": 9.504015115729807, + "grad_norm": 66.31698226564012, + "learning_rate": 3.986523890714605e-09, + "logits": -1.2078769207000732, + "logps": -90.2032241821289, + "loss": 0.0854, + "objective": 0.08553291857242584, + "ranking_idealized": 0.45625001192092896, + "ranking_idealized_expo": 0.4000000059604645, + "ranking_simple": 0.4000000059604645, + "regularize": 0.01658247597515583, + "step": 5030 + }, + { + "dpo_loss": 0.6903690695762634, + "epoch": 9.513462446858762, + "grad_norm": 67.61769988129738, + "learning_rate": 3.841151136353804e-09, + "logits": -1.188755750656128, + "logps": -88.5759506225586, + "loss": 0.0862, + "objective": 0.08529649674892426, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.016259578987956047, + "step": 5035 + }, + { + "dpo_loss": 0.6889756917953491, + "epoch": 9.522909777987719, + "grad_norm": 70.645766914286, + "learning_rate": 3.6984579109176072e-09, + "logits": -1.246675729751587, + "logps": -90.35582733154297, + "loss": 0.0835, + "objective": 0.08555910736322403, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.01666153408586979, + "step": 5040 + }, + { + "dpo_loss": 0.6908430457115173, + "epoch": 9.532357109116674, + "grad_norm": 66.7833057778863, + "learning_rate": 3.5584457676738898e-09, + "logits": -1.114335298538208, + "logps": -89.66558837890625, + "loss": 0.0873, + "objective": 0.08752139657735825, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.018437087535858154, + "step": 5045 + }, + { + "dpo_loss": 0.6902520060539246, + "epoch": 9.541804440245631, + "grad_norm": 66.89948855068245, + "learning_rate": 3.421116230705928e-09, + "logits": -1.195339560508728, + "logps": -87.08709716796875, + "loss": 0.0843, + "objective": 0.08465549349784851, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.015630286186933517, + "step": 5050 + }, + { + "epoch": 9.541804440245631, + "eval_dpo_loss": 0.7080516219139099, + "eval_logits": -1.1880168914794922, + "eval_logps": -93.8241195678711, + "eval_loss": 0.34173211455345154, + "eval_objective": 0.3405408561229706, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.520703911781311, + "eval_regularize": 0.269735723733902, + "eval_runtime": 154.4639, + "eval_samples_per_second": 37.484, + "eval_steps_per_second": 3.127, + "step": 5050 + }, + { + "dpo_loss": 0.6894997954368591, + "epoch": 9.551251771374586, + "grad_norm": 67.22054719561658, + "learning_rate": 3.286470794895857e-09, + "logits": -1.2233834266662598, + "logps": -89.50907897949219, + "loss": 0.0856, + "objective": 0.08771258592605591, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.59375, + "ranking_simple": 0.5874999761581421, + "regularize": 0.018762610852718353, + "step": 5055 + }, + { + "dpo_loss": 0.6897779107093811, + "epoch": 9.560699102503543, + "grad_norm": 74.81621860274835, + "learning_rate": 3.154510925908349e-09, + "logits": -1.0932340621948242, + "logps": -85.86308288574219, + "loss": 0.0842, + "objective": 0.08236806094646454, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5625, + "regularize": 0.01339026726782322, + "step": 5060 + }, + { + "dpo_loss": 0.6881380081176758, + "epoch": 9.570146433632498, + "grad_norm": 68.7579606426006, + "learning_rate": 3.025238060174795e-09, + "logits": -1.2063125371932983, + "logps": -90.78138732910156, + "loss": 0.0857, + "objective": 0.08496876806020737, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.4375, + "ranking_simple": 0.4375, + "regularize": 0.01615496352314949, + "step": 5065 + }, + { + "dpo_loss": 0.6897650957107544, + "epoch": 9.579593764761455, + "grad_norm": 65.67261342906198, + "learning_rate": 2.898653604877538e-09, + "logits": -1.17462158203125, + "logps": -89.63032531738281, + "loss": 0.088, + "objective": 0.09223799407482147, + "ranking_idealized": 0.48124998807907104, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.02326149307191372, + "step": 5070 + }, + { + "dpo_loss": 0.6891880631446838, + "epoch": 9.58904109589041, + "grad_norm": 66.21421194658043, + "learning_rate": 2.7747589379345514e-09, + "logits": -1.1750421524047852, + "logps": -88.36298370361328, + "loss": 0.0865, + "objective": 0.08409323543310165, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.015174436382949352, + "step": 5075 + }, + { + "dpo_loss": 0.690551221370697, + "epoch": 9.598488427019367, + "grad_norm": 71.18902970881412, + "learning_rate": 2.653555407984509e-09, + "logits": -1.1492376327514648, + "logps": -88.63973236083984, + "loss": 0.0851, + "objective": 0.08727878332138062, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.018223661929368973, + "step": 5080 + }, + { + "dpo_loss": 0.689854085445404, + "epoch": 9.607935758148322, + "grad_norm": 70.01820762620369, + "learning_rate": 2.535044334372072e-09, + "logits": -1.2707594633102417, + "logps": -88.87725830078125, + "loss": 0.0843, + "objective": 0.08458703011274338, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.48124998807907104, + "ranking_simple": 0.48124998807907104, + "regularize": 0.0156016256660223, + "step": 5085 + }, + { + "dpo_loss": 0.6904751062393188, + "epoch": 9.61738308927728, + "grad_norm": 68.87022111268584, + "learning_rate": 2.4192270071335676e-09, + "logits": -1.2080779075622559, + "logps": -87.8804702758789, + "loss": 0.0867, + "objective": 0.0866638720035553, + "ranking_idealized": 0.581250011920929, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.543749988079071, + "regularize": 0.01761636883020401, + "step": 5090 + }, + { + "dpo_loss": 0.6896811723709106, + "epoch": 9.626830420406236, + "grad_norm": 68.19822090549287, + "learning_rate": 2.3061046869828903e-09, + "logits": -1.2898871898651123, + "logps": -91.15736389160156, + "loss": 0.0866, + "objective": 0.0855109840631485, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5375000238418579, + "regularize": 0.01654285192489624, + "step": 5095 + }, + { + "dpo_loss": 0.691383421421051, + "epoch": 9.636277751535191, + "grad_norm": 69.69113013678992, + "learning_rate": 2.195678605297735e-09, + "logits": -1.2627365589141846, + "logps": -91.48534393310547, + "loss": 0.0862, + "objective": 0.08452753722667694, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.015389196574687958, + "step": 5100 + }, + { + "epoch": 9.636277751535191, + "eval_dpo_loss": 0.7081097364425659, + "eval_logits": -1.1884174346923828, + "eval_logps": -93.82679748535156, + "eval_loss": 0.34187451004981995, + "eval_objective": 0.3404352366924286, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.2696242928504944, + "eval_runtime": 154.6575, + "eval_samples_per_second": 37.438, + "eval_steps_per_second": 3.123, + "step": 5100 + }, + { + "dpo_loss": 0.6893877983093262, + "epoch": 9.645725082664148, + "grad_norm": 68.29870104426664, + "learning_rate": 2.087949964106328e-09, + "logits": -1.1205192804336548, + "logps": -88.82121276855469, + "loss": 0.0853, + "objective": 0.08397676050662994, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.015037978067994118, + "step": 5105 + }, + { + "dpo_loss": 0.690714418888092, + "epoch": 9.655172413793103, + "grad_norm": 71.86357129119948, + "learning_rate": 1.9829199360742456e-09, + "logits": -1.2247989177703857, + "logps": -89.22515869140625, + "loss": 0.0869, + "objective": 0.08558894693851471, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.016517508774995804, + "step": 5110 + }, + { + "dpo_loss": 0.6895348429679871, + "epoch": 9.66461974492206, + "grad_norm": 64.50124718922551, + "learning_rate": 1.8805896644916997e-09, + "logits": -1.1435085535049438, + "logps": -87.88530731201172, + "loss": 0.0834, + "objective": 0.08410073816776276, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.015147256664931774, + "step": 5115 + }, + { + "dpo_loss": 0.6883980631828308, + "epoch": 9.674067076051015, + "grad_norm": 66.48144330096997, + "learning_rate": 1.7809602632610776e-09, + "logits": -1.1729907989501953, + "logps": -88.49473571777344, + "loss": 0.0854, + "objective": 0.08492030203342438, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.016080478206276894, + "step": 5120 + }, + { + "dpo_loss": 0.6892448663711548, + "epoch": 9.683514407179972, + "grad_norm": 66.87664115159657, + "learning_rate": 1.6840328168847284e-09, + "logits": -1.2348103523254395, + "logps": -88.95825958251953, + "loss": 0.0851, + "objective": 0.08360938727855682, + "ranking_idealized": 0.59375, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.014684900641441345, + "step": 5125 + }, + { + "dpo_loss": 0.6895814538002014, + "epoch": 9.692961738308927, + "grad_norm": 68.06390176075568, + "learning_rate": 1.5898083804533059e-09, + "logits": -1.1406044960021973, + "logps": -88.6418228149414, + "loss": 0.0858, + "objective": 0.0848851129412651, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.015926973894238472, + "step": 5130 + }, + { + "dpo_loss": 0.6889828443527222, + "epoch": 9.702409069437884, + "grad_norm": 66.21295619445583, + "learning_rate": 1.4982879796341662e-09, + "logits": -1.1915266513824463, + "logps": -87.96595764160156, + "loss": 0.0851, + "objective": 0.08489920198917389, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.016000913456082344, + "step": 5135 + }, + { + "dpo_loss": 0.6904030442237854, + "epoch": 9.71185640056684, + "grad_norm": 70.56807563983075, + "learning_rate": 1.4094726106603504e-09, + "logits": -1.2663993835449219, + "logps": -88.0069808959961, + "loss": 0.0841, + "objective": 0.08480189740657806, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.015761589631438255, + "step": 5140 + }, + { + "dpo_loss": 0.6891473531723022, + "epoch": 9.721303731695796, + "grad_norm": 65.03693590968845, + "learning_rate": 1.3233632403195083e-09, + "logits": -1.1926839351654053, + "logps": -89.36128997802734, + "loss": 0.0865, + "objective": 0.0885232537984848, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.543749988079071, + "ranking_simple": 0.5375000238418579, + "regularize": 0.019608518108725548, + "step": 5145 + }, + { + "dpo_loss": 0.6895589232444763, + "epoch": 9.730751062824751, + "grad_norm": 68.29866148384843, + "learning_rate": 1.239960805943574e-09, + "logits": -1.199296474456787, + "logps": -87.64341735839844, + "loss": 0.0851, + "objective": 0.08560691028833389, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.5249999761581421, + "regularize": 0.016651010140776634, + "step": 5150 + }, + { + "epoch": 9.730751062824751, + "eval_dpo_loss": 0.7081645131111145, + "eval_logits": -1.1881499290466309, + "eval_logps": -93.82471466064453, + "eval_loss": 0.3417753279209137, + "eval_objective": 0.34053075313568115, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5191511511802673, + "eval_regularize": 0.2697142958641052, + "eval_runtime": 154.0822, + "eval_samples_per_second": 37.577, + "eval_steps_per_second": 3.135, + "step": 5150 + }, + { + "dpo_loss": 0.6904730200767517, + "epoch": 9.740198393953708, + "grad_norm": 66.34791283998516, + "learning_rate": 1.1592662153985245e-09, + "logits": -1.1837701797485352, + "logps": -89.02965545654297, + "loss": 0.0857, + "objective": 0.08642591536045074, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.518750011920929, + "regularize": 0.017378611490130424, + "step": 5155 + }, + { + "dpo_loss": 0.6907302737236023, + "epoch": 9.749645725082663, + "grad_norm": 63.32858046981145, + "learning_rate": 1.081280347074387e-09, + "logits": -1.2325102090835571, + "logps": -89.9759750366211, + "loss": 0.0865, + "objective": 0.08908991515636444, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.020016871392726898, + "step": 5160 + }, + { + "dpo_loss": 0.6881371736526489, + "epoch": 9.75909305621162, + "grad_norm": 67.24496793857094, + "learning_rate": 1.0060040498758583e-09, + "logits": -1.274625301361084, + "logps": -88.36674499511719, + "loss": 0.0851, + "objective": 0.08519413322210312, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4937500059604645, + "ranking_simple": 0.4937500059604645, + "regularize": 0.01638040691614151, + "step": 5165 + }, + { + "dpo_loss": 0.6903113126754761, + "epoch": 9.768540387340575, + "grad_norm": 67.50347263114672, + "learning_rate": 9.334381432128946e-10, + "logits": -1.1394083499908447, + "logps": -88.5200424194336, + "loss": 0.086, + "objective": 0.0858432799577713, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.016812153160572052, + "step": 5170 + }, + { + "dpo_loss": 0.6890208721160889, + "epoch": 9.777987718469532, + "grad_norm": 72.11725269729324, + "learning_rate": 8.63583416991831e-10, + "logits": -1.1778769493103027, + "logps": -88.27137756347656, + "loss": 0.0839, + "objective": 0.0835142731666565, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.01461219321936369, + "step": 5175 + }, + { + "dpo_loss": 0.6891745328903198, + "epoch": 9.78743504959849, + "grad_norm": 69.23279225600632, + "learning_rate": 7.96440631606915e-10, + "logits": -1.1942167282104492, + "logps": -89.7216796875, + "loss": 0.0871, + "objective": 0.08584664762020111, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.01692918874323368, + "step": 5180 + }, + { + "dpo_loss": 0.6901405453681946, + "epoch": 9.796882380727444, + "grad_norm": 65.88174648724309, + "learning_rate": 7.320105179318414e-10, + "logits": -1.2572628259658813, + "logps": -89.32684326171875, + "loss": 0.0849, + "objective": 0.08371184766292572, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.53125, + "ranking_simple": 0.53125, + "regularize": 0.0146977873519063, + "step": 5185 + }, + { + "dpo_loss": 0.691258430480957, + "epoch": 9.806329711856401, + "grad_norm": 66.16278545830859, + "learning_rate": 6.702937773119532e-10, + "logits": -1.1129093170166016, + "logps": -87.53398132324219, + "loss": 0.0851, + "objective": 0.0858643501996994, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.581250011920929, + "ranking_simple": 0.581250011920929, + "regularize": 0.01673850044608116, + "step": 5190 + }, + { + "dpo_loss": 0.6866841316223145, + "epoch": 9.815777042985356, + "grad_norm": 66.81451713561216, + "learning_rate": 6.112910815564698e-10, + "logits": -1.1755070686340332, + "logps": -91.15867614746094, + "loss": 0.0856, + "objective": 0.084581658244133, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.01591324433684349, + "step": 5195 + }, + { + "dpo_loss": 0.6875885725021362, + "epoch": 9.825224374114313, + "grad_norm": 65.97696465942497, + "learning_rate": 5.550030729312705e-10, + "logits": -1.2532360553741455, + "logps": -88.8522720336914, + "loss": 0.0852, + "objective": 0.08520559966564178, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.016446739435195923, + "step": 5200 + }, + { + "epoch": 9.825224374114313, + "eval_dpo_loss": 0.7080548405647278, + "eval_logits": -1.1885803937911987, + "eval_logps": -93.82567596435547, + "eval_loss": 0.34153902530670166, + "eval_objective": 0.3401677906513214, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.26936233043670654, + "eval_runtime": 154.8959, + "eval_samples_per_second": 37.38, + "eval_steps_per_second": 3.118, + "step": 5200 + }, + { + "dpo_loss": 0.6900717616081238, + "epoch": 9.834671705243268, + "grad_norm": 66.45010839760512, + "learning_rate": 5.014303641519002e-10, + "logits": -1.2506177425384521, + "logps": -88.95404815673828, + "loss": 0.0867, + "objective": 0.08865327388048172, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.518750011920929, + "ranking_simple": 0.518750011920929, + "regularize": 0.019646089524030685, + "step": 5205 + }, + { + "dpo_loss": 0.6904046535491943, + "epoch": 9.844119036372225, + "grad_norm": 64.94882365693898, + "learning_rate": 4.505735383768527e-10, + "logits": -1.1922829151153564, + "logps": -88.02415466308594, + "loss": 0.0894, + "objective": 0.08971310406923294, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.02067263238132, + "step": 5210 + }, + { + "dpo_loss": 0.6909619569778442, + "epoch": 9.85356636750118, + "grad_norm": 65.59047455604453, + "learning_rate": 4.0243314920118677e-10, + "logits": -1.1949775218963623, + "logps": -87.81748962402344, + "loss": 0.0857, + "objective": 0.08444318920373917, + "ranking_idealized": 0.53125, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5062500238418579, + "regularize": 0.015346983447670937, + "step": 5215 + }, + { + "dpo_loss": 0.6882283687591553, + "epoch": 9.863013698630137, + "grad_norm": 70.42798711656708, + "learning_rate": 3.5700972065066946e-10, + "logits": -1.2317819595336914, + "logps": -88.4919204711914, + "loss": 0.0849, + "objective": 0.08625314384698868, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.01743030548095703, + "step": 5220 + }, + { + "dpo_loss": 0.6860737800598145, + "epoch": 9.872461029759092, + "grad_norm": 66.20596185239998, + "learning_rate": 3.143037471758925e-10, + "logits": -1.1786277294158936, + "logps": -89.03819274902344, + "loss": 0.0871, + "objective": 0.09018915891647339, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5687500238418579, + "ranking_simple": 0.5687500238418579, + "regularize": 0.021581783890724182, + "step": 5225 + }, + { + "dpo_loss": 0.6909530758857727, + "epoch": 9.88190836088805, + "grad_norm": 65.01306248417197, + "learning_rate": 2.743156936469426e-10, + "logits": -1.1898537874221802, + "logps": -86.34603118896484, + "loss": 0.0843, + "objective": 0.08352215588092804, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.543749988079071, + "regularize": 0.014426849782466888, + "step": 5230 + }, + { + "dpo_loss": 0.6891884803771973, + "epoch": 9.891355692017004, + "grad_norm": 71.66449902562967, + "learning_rate": 2.370459953483783e-10, + "logits": -1.1369574069976807, + "logps": -86.77111053466797, + "loss": 0.0852, + "objective": 0.08359196782112122, + "ranking_idealized": 0.606249988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5687500238418579, + "regularize": 0.014673106372356415, + "step": 5235 + }, + { + "dpo_loss": 0.6891953349113464, + "epoch": 9.900803023145961, + "grad_norm": 69.17506467154475, + "learning_rate": 2.024950579744833e-10, + "logits": -1.2471169233322144, + "logps": -89.45270538330078, + "loss": 0.0848, + "objective": 0.08721388876438141, + "ranking_idealized": 0.5062500238418579, + "ranking_idealized_expo": 0.5062500238418579, + "ranking_simple": 0.5, + "regularize": 0.018294354900717735, + "step": 5240 + }, + { + "dpo_loss": 0.6880627274513245, + "epoch": 9.910250354274917, + "grad_norm": 68.78965633618786, + "learning_rate": 1.706632576248257e-10, + "logits": -1.231979250907898, + "logps": -89.65391540527344, + "loss": 0.0869, + "objective": 0.08603748679161072, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.518750011920929, + "regularize": 0.01723121479153633, + "step": 5245 + }, + { + "dpo_loss": 0.6887436509132385, + "epoch": 9.919697685403873, + "grad_norm": 67.12352931570797, + "learning_rate": 1.4155094080017804e-10, + "logits": -1.152923583984375, + "logps": -88.67755889892578, + "loss": 0.0873, + "objective": 0.08699290454387665, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.018118537962436676, + "step": 5250 + }, + { + "epoch": 9.919697685403873, + "eval_dpo_loss": 0.708267092704773, + "eval_logits": -1.1885186433792114, + "eval_logps": -93.82195281982422, + "eval_loss": 0.34177881479263306, + "eval_objective": 0.3403994143009186, + "eval_ranking_idealized": 0.5398550629615784, + "eval_ranking_idealized_expo": 0.5243270993232727, + "eval_ranking_simple": 0.5196687579154968, + "eval_regularize": 0.26957273483276367, + "eval_runtime": 154.5383, + "eval_samples_per_second": 37.466, + "eval_steps_per_second": 3.125, + "step": 5250 + }, + { + "dpo_loss": 0.6899495720863342, + "epoch": 9.929145016532829, + "grad_norm": 66.40637873315407, + "learning_rate": 1.151584243987147e-10, + "logits": -1.2185667753219604, + "logps": -88.12953186035156, + "loss": 0.0876, + "objective": 0.08826954662799835, + "ranking_idealized": 0.518750011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.019274592399597168, + "step": 5255 + }, + { + "dpo_loss": 0.6879016757011414, + "epoch": 9.938592347661785, + "grad_norm": 66.23811942100768, + "learning_rate": 9.148599571262572e-11, + "logits": -1.167487382888794, + "logps": -89.59779357910156, + "loss": 0.0873, + "objective": 0.08775386959314346, + "ranking_idealized": 0.5562499761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.53125, + "regularize": 0.018963690847158432, + "step": 5260 + }, + { + "dpo_loss": 0.6897536516189575, + "epoch": 9.948039678790742, + "grad_norm": 70.44524154982453, + "learning_rate": 7.05339124249249e-11, + "logits": -1.1897327899932861, + "logps": -90.16870880126953, + "loss": 0.0855, + "objective": 0.08374921977519989, + "ranking_idealized": 0.6187499761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5625, + "regularize": 0.014773854985833168, + "step": 5265 + }, + { + "dpo_loss": 0.6920146942138672, + "epoch": 9.957487009919697, + "grad_norm": 66.76085795213623, + "learning_rate": 5.2302402606702044e-11, + "logits": -1.076805591583252, + "logps": -91.03254699707031, + "loss": 0.0876, + "objective": 0.0894809290766716, + "ranking_idealized": 0.5687500238418579, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.02027946338057518, + "step": 5270 + }, + { + "dpo_loss": 0.6896114349365234, + "epoch": 9.966934341048654, + "grad_norm": 70.14959939061058, + "learning_rate": 3.679166471459716e-11, + "logits": -1.2547842264175415, + "logps": -88.06056213378906, + "loss": 0.0844, + "objective": 0.08376505225896835, + "ranking_idealized": 0.46875, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.014803911559283733, + "step": 5275 + }, + { + "dpo_loss": 0.6923837661743164, + "epoch": 9.97638167217761, + "grad_norm": 64.43154839869437, + "learning_rate": 2.4001867588663293e-11, + "logits": -1.1902258396148682, + "logps": -91.7777328491211, + "loss": 0.0856, + "objective": 0.08763132989406586, + "ranking_idealized": 0.643750011920929, + "ranking_idealized_expo": 0.6000000238418579, + "ranking_simple": 0.6000000238418579, + "regularize": 0.018392954021692276, + "step": 5280 + }, + { + "dpo_loss": 0.6896318197250366, + "epoch": 9.985829003306566, + "grad_norm": 65.45521151304239, + "learning_rate": 1.3933150450479159e-11, + "logits": -1.1867663860321045, + "logps": -87.43783569335938, + "loss": 0.0853, + "objective": 0.0830325335264206, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.46875, + "ranking_simple": 0.46875, + "regularize": 0.01406935416162014, + "step": 5285 + }, + { + "dpo_loss": 0.6888291239738464, + "epoch": 9.995276334435522, + "grad_norm": 65.46742735182674, + "learning_rate": 6.585622901705834e-12, + "logits": -1.1299092769622803, + "logps": -89.06723022460938, + "loss": 0.0866, + "objective": 0.0879444032907486, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5562499761581421, + "ranking_simple": 0.5562499761581421, + "regularize": 0.019061479717493057, + "step": 5290 + }, + { + "epoch": 9.995276334435522, + "step": 5290, + "total_flos": 0.0, + "train_loss": 0.15836801591183153, + "train_runtime": 49865.2618, + "train_samples_per_second": 10.188, + "train_steps_per_second": 0.106 + } + ], + "logging_steps": 5, + "max_steps": 5290, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}