|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996020692399522, |
|
"eval_steps": 100, |
|
"global_step": 1256, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/losses": 0.23031963407993317, |
|
"debug/policy_weights": 0.3322809934616089, |
|
"debug/raw_losses": 0.6931471824645996, |
|
"epoch": 0.0007958615200955034, |
|
"grad_norm": 1.6287391185739195, |
|
"learning_rate": 3.968253968253968e-09, |
|
"logits/chosen": -2.735659122467041, |
|
"logits/rejected": -2.7581238746643066, |
|
"logps/chosen": -124.62968444824219, |
|
"logps/rejected": -168.09475708007812, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/losses": 0.23633037507534027, |
|
"debug/policy_weights": 0.34083470702171326, |
|
"debug/raw_losses": 0.693356990814209, |
|
"epoch": 0.007958615200955034, |
|
"grad_norm": 1.6857448656689296, |
|
"learning_rate": 3.968253968253968e-08, |
|
"logits/chosen": -2.738861560821533, |
|
"logits/rejected": -2.7278800010681152, |
|
"logps/chosen": -146.718994140625, |
|
"logps/rejected": -131.18580627441406, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.00016815567505545914, |
|
"rewards/margins": -0.00041737209539860487, |
|
"rewards/rejected": 0.0002492164494469762, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/losses": 0.22639703750610352, |
|
"debug/policy_weights": 0.3266511857509613, |
|
"debug/raw_losses": 0.6931136250495911, |
|
"epoch": 0.01591723040191007, |
|
"grad_norm": 1.5624222510478807, |
|
"learning_rate": 7.936507936507936e-08, |
|
"logits/chosen": -2.7067270278930664, |
|
"logits/rejected": -2.703731060028076, |
|
"logps/chosen": -129.4856414794922, |
|
"logps/rejected": -130.27786254882812, |
|
"loss": 0.2238, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0001734123652568087, |
|
"rewards/margins": 6.990063411649317e-05, |
|
"rewards/rejected": -0.00024331299937330186, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/losses": 0.21324554085731506, |
|
"debug/policy_weights": 0.3076760470867157, |
|
"debug/raw_losses": 0.6930493116378784, |
|
"epoch": 0.0238758456028651, |
|
"grad_norm": 1.5412879090281855, |
|
"learning_rate": 1.1904761904761903e-07, |
|
"logits/chosen": -2.6839892864227295, |
|
"logits/rejected": -2.6810474395751953, |
|
"logps/chosen": -141.8278045654297, |
|
"logps/rejected": -155.67654418945312, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 3.942887997254729e-05, |
|
"rewards/margins": 0.0001982362737180665, |
|
"rewards/rejected": -0.00015880735008977354, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/losses": 0.2176821529865265, |
|
"debug/policy_weights": 0.3141413629055023, |
|
"debug/raw_losses": 0.6929017305374146, |
|
"epoch": 0.03183446080382014, |
|
"grad_norm": 1.6385196184238553, |
|
"learning_rate": 1.5873015873015872e-07, |
|
"logits/chosen": -2.69197678565979, |
|
"logits/rejected": -2.6842830181121826, |
|
"logps/chosen": -154.9615936279297, |
|
"logps/rejected": -164.14413452148438, |
|
"loss": 0.221, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0013954730238765478, |
|
"rewards/margins": 0.0004961603553965688, |
|
"rewards/rejected": -0.0018916334956884384, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/losses": 0.229542538523674, |
|
"debug/policy_weights": 0.3317711055278778, |
|
"debug/raw_losses": 0.6918389797210693, |
|
"epoch": 0.03979307600477517, |
|
"grad_norm": 1.48113487801622, |
|
"learning_rate": 1.984126984126984e-07, |
|
"logits/chosen": -2.7066245079040527, |
|
"logits/rejected": -2.6878674030303955, |
|
"logps/chosen": -143.980224609375, |
|
"logps/rejected": -137.73158264160156, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0025663406122475863, |
|
"rewards/margins": 0.0026327171362936497, |
|
"rewards/rejected": -0.005199057050049305, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/losses": 0.22738368809223175, |
|
"debug/policy_weights": 0.32868558168411255, |
|
"debug/raw_losses": 0.6915279626846313, |
|
"epoch": 0.0477516912057302, |
|
"grad_norm": 1.487501706200083, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -2.7156598567962646, |
|
"logits/rejected": -2.716393232345581, |
|
"logps/chosen": -145.95175170898438, |
|
"logps/rejected": -159.5319366455078, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0032509162556380033, |
|
"rewards/margins": 0.00328192301094532, |
|
"rewards/rejected": -0.00653283903375268, |
|
"step": 60 |
|
}, |
|
{ |
|
"debug/losses": 0.21748514473438263, |
|
"debug/policy_weights": 0.31510016322135925, |
|
"debug/raw_losses": 0.6903446316719055, |
|
"epoch": 0.055710306406685235, |
|
"grad_norm": 1.5343429096282828, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -2.736347198486328, |
|
"logits/rejected": -2.7274653911590576, |
|
"logps/chosen": -149.3427276611328, |
|
"logps/rejected": -143.45547485351562, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.01704435609281063, |
|
"rewards/margins": 0.005850088782608509, |
|
"rewards/rejected": -0.022894445806741714, |
|
"step": 70 |
|
}, |
|
{ |
|
"debug/losses": 0.1880975216627121, |
|
"debug/policy_weights": 0.2722209393978119, |
|
"debug/raw_losses": 0.6913992166519165, |
|
"epoch": 0.06366892160764027, |
|
"grad_norm": 1.533336025559628, |
|
"learning_rate": 3.1746031746031743e-07, |
|
"logits/chosen": -2.7107605934143066, |
|
"logits/rejected": -2.6922690868377686, |
|
"logps/chosen": -158.02011108398438, |
|
"logps/rejected": -149.45602416992188, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04885869473218918, |
|
"rewards/margins": 0.004143272526562214, |
|
"rewards/rejected": -0.05300196260213852, |
|
"step": 80 |
|
}, |
|
{ |
|
"debug/losses": 0.1863638460636139, |
|
"debug/policy_weights": 0.2727832794189453, |
|
"debug/raw_losses": 0.6830354928970337, |
|
"epoch": 0.07162753680859531, |
|
"grad_norm": 1.6114760928044485, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"logits/chosen": -2.7209115028381348, |
|
"logits/rejected": -2.7276768684387207, |
|
"logps/chosen": -152.76400756835938, |
|
"logps/rejected": -173.5898895263672, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.06917759776115417, |
|
"rewards/margins": 0.02194269374012947, |
|
"rewards/rejected": -0.09112029522657394, |
|
"step": 90 |
|
}, |
|
{ |
|
"debug/losses": 0.16769352555274963, |
|
"debug/policy_weights": 0.24745841324329376, |
|
"debug/raw_losses": 0.6783273816108704, |
|
"epoch": 0.07958615200955034, |
|
"grad_norm": 1.5208024805782128, |
|
"learning_rate": 3.968253968253968e-07, |
|
"logits/chosen": -2.689220666885376, |
|
"logits/rejected": -2.672532081604004, |
|
"logps/chosen": -149.3437042236328, |
|
"logps/rejected": -143.38467407226562, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11531722545623779, |
|
"rewards/margins": 0.033182911574840546, |
|
"rewards/rejected": -0.14850012958049774, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07958615200955034, |
|
"eval_debug/losses": 0.16172632575035095, |
|
"eval_debug/policy_weights": 0.2382642775774002, |
|
"eval_debug/raw_losses": 0.6786961555480957, |
|
"eval_logits/chosen": -2.7119109630584717, |
|
"eval_logits/rejected": -2.703813076019287, |
|
"eval_logps/chosen": -158.47622680664062, |
|
"eval_logps/rejected": -168.33502197265625, |
|
"eval_loss": 0.16316837072372437, |
|
"eval_rewards/accuracies": 0.5960820913314819, |
|
"eval_rewards/chosen": -0.14232736825942993, |
|
"eval_rewards/margins": 0.03403294086456299, |
|
"eval_rewards/rejected": -0.17636029422283173, |
|
"eval_runtime": 153.0553, |
|
"eval_samples_per_second": 55.875, |
|
"eval_steps_per_second": 0.876, |
|
"step": 100 |
|
}, |
|
{ |
|
"debug/losses": 0.1508895754814148, |
|
"debug/policy_weights": 0.2189808338880539, |
|
"debug/raw_losses": 0.6909345984458923, |
|
"epoch": 0.08754476721050537, |
|
"grad_norm": 1.5035580854434272, |
|
"learning_rate": 4.365079365079365e-07, |
|
"logits/chosen": -2.6903061866760254, |
|
"logits/rejected": -2.6716604232788086, |
|
"logps/chosen": -179.5548553466797, |
|
"logps/rejected": -165.21966552734375, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2126411497592926, |
|
"rewards/margins": 0.011448127217590809, |
|
"rewards/rejected": -0.22408926486968994, |
|
"step": 110 |
|
}, |
|
{ |
|
"debug/losses": 0.137036994099617, |
|
"debug/policy_weights": 0.2047518789768219, |
|
"debug/raw_losses": 0.678392231464386, |
|
"epoch": 0.0955033824114604, |
|
"grad_norm": 1.3984947799152403, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -2.663886785507202, |
|
"logits/rejected": -2.651045560836792, |
|
"logps/chosen": -168.24790954589844, |
|
"logps/rejected": -173.3909149169922, |
|
"loss": 0.1331, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.29116135835647583, |
|
"rewards/margins": 0.0431181900203228, |
|
"rewards/rejected": -0.33427953720092773, |
|
"step": 120 |
|
}, |
|
{ |
|
"debug/losses": 0.11775548756122589, |
|
"debug/policy_weights": 0.1796623170375824, |
|
"debug/raw_losses": 0.6566502451896667, |
|
"epoch": 0.10346199761241544, |
|
"grad_norm": 2.374984413363097, |
|
"learning_rate": 4.999845414634076e-07, |
|
"logits/chosen": -2.6745553016662598, |
|
"logits/rejected": -2.6470043659210205, |
|
"logps/chosen": -188.0329132080078, |
|
"logps/rejected": -178.14801025390625, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.33123353123664856, |
|
"rewards/margins": 0.09330518543720245, |
|
"rewards/rejected": -0.4245387017726898, |
|
"step": 130 |
|
}, |
|
{ |
|
"debug/losses": 0.09425052255392075, |
|
"debug/policy_weights": 0.1469433605670929, |
|
"debug/raw_losses": 0.6522111892700195, |
|
"epoch": 0.11142061281337047, |
|
"grad_norm": 1.7466151736267947, |
|
"learning_rate": 4.998106548810311e-07, |
|
"logits/chosen": -2.627020835876465, |
|
"logits/rejected": -2.603726625442505, |
|
"logps/chosen": -197.0404510498047, |
|
"logps/rejected": -187.454833984375, |
|
"loss": 0.106, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.514377772808075, |
|
"rewards/margins": 0.10739537328481674, |
|
"rewards/rejected": -0.6217731237411499, |
|
"step": 140 |
|
}, |
|
{ |
|
"debug/losses": 0.08491900563240051, |
|
"debug/policy_weights": 0.1424042284488678, |
|
"debug/raw_losses": 0.6084787845611572, |
|
"epoch": 0.1193792280143255, |
|
"grad_norm": 2.8243407531109206, |
|
"learning_rate": 4.994436933879359e-07, |
|
"logits/chosen": -2.5916829109191895, |
|
"logits/rejected": -2.5848000049591064, |
|
"logps/chosen": -188.14651489257812, |
|
"logps/rejected": -216.0280303955078, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4860725998878479, |
|
"rewards/margins": 0.2143338918685913, |
|
"rewards/rejected": -0.7004064321517944, |
|
"step": 150 |
|
}, |
|
{ |
|
"debug/losses": 0.08054587990045547, |
|
"debug/policy_weights": 0.13052348792552948, |
|
"debug/raw_losses": 0.6103022694587708, |
|
"epoch": 0.12733784321528055, |
|
"grad_norm": 2.235321236398355, |
|
"learning_rate": 4.988839406031596e-07, |
|
"logits/chosen": -2.584826946258545, |
|
"logits/rejected": -2.5921337604522705, |
|
"logps/chosen": -179.30166625976562, |
|
"logps/rejected": -230.94314575195312, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5866313576698303, |
|
"rewards/margins": 0.24938829243183136, |
|
"rewards/rejected": -0.8360196352005005, |
|
"step": 160 |
|
}, |
|
{ |
|
"debug/losses": 0.053926557302474976, |
|
"debug/policy_weights": 0.08985424786806107, |
|
"debug/raw_losses": 0.6291411519050598, |
|
"epoch": 0.13529645841623558, |
|
"grad_norm": 1.7353626869477405, |
|
"learning_rate": 4.981318291512395e-07, |
|
"logits/chosen": -2.5308010578155518, |
|
"logits/rejected": -2.5215961933135986, |
|
"logps/chosen": -233.59939575195312, |
|
"logps/rejected": -265.3108215332031, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.971887469291687, |
|
"rewards/margins": 0.23178091645240784, |
|
"rewards/rejected": -1.203668475151062, |
|
"step": 170 |
|
}, |
|
{ |
|
"debug/losses": 0.03688238933682442, |
|
"debug/policy_weights": 0.05372762680053711, |
|
"debug/raw_losses": 0.6907342672348022, |
|
"epoch": 0.14325507361719061, |
|
"grad_norm": 1.7970759832571033, |
|
"learning_rate": 4.971879403278432e-07, |
|
"logits/chosen": -2.511756181716919, |
|
"logits/rejected": -2.4959020614624023, |
|
"logps/chosen": -288.7469177246094, |
|
"logps/rejected": -290.37908935546875, |
|
"loss": 0.0354, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.4022036790847778, |
|
"rewards/margins": 0.062146056443452835, |
|
"rewards/rejected": -1.464349627494812, |
|
"step": 180 |
|
}, |
|
{ |
|
"debug/losses": 0.05163710191845894, |
|
"debug/policy_weights": 0.08004944026470184, |
|
"debug/raw_losses": 0.6670282483100891, |
|
"epoch": 0.15121368881814565, |
|
"grad_norm": 1.6751904674481997, |
|
"learning_rate": 4.960530036504941e-07, |
|
"logits/chosen": -2.5430991649627686, |
|
"logits/rejected": -2.5239815711975098, |
|
"logps/chosen": -263.212158203125, |
|
"logps/rejected": -268.65362548828125, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.188627004623413, |
|
"rewards/margins": 0.11968141794204712, |
|
"rewards/rejected": -1.3083083629608154, |
|
"step": 190 |
|
}, |
|
{ |
|
"debug/losses": 0.07679580897092819, |
|
"debug/policy_weights": 0.12060348689556122, |
|
"debug/raw_losses": 0.6367899775505066, |
|
"epoch": 0.15917230401910068, |
|
"grad_norm": 1.8015846859226716, |
|
"learning_rate": 4.947278962947386e-07, |
|
"logits/chosen": -2.5448789596557617, |
|
"logits/rejected": -2.5480525493621826, |
|
"logps/chosen": -223.87393188476562, |
|
"logps/rejected": -254.6379852294922, |
|
"loss": 0.077, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7472453713417053, |
|
"rewards/margins": 0.16983875632286072, |
|
"rewards/rejected": -0.9170840382575989, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15917230401910068, |
|
"eval_debug/losses": 0.0807570293545723, |
|
"eval_debug/policy_weights": 0.12652097642421722, |
|
"eval_debug/raw_losses": 0.6429691910743713, |
|
"eval_logits/chosen": -2.545888662338257, |
|
"eval_logits/rejected": -2.5351974964141846, |
|
"eval_logps/chosen": -218.20616149902344, |
|
"eval_logps/rejected": -243.5860595703125, |
|
"eval_loss": 0.08196824789047241, |
|
"eval_rewards/accuracies": 0.6296641826629639, |
|
"eval_rewards/chosen": -0.7396268248558044, |
|
"eval_rewards/margins": 0.18924373388290405, |
|
"eval_rewards/rejected": -0.9288705587387085, |
|
"eval_runtime": 152.9399, |
|
"eval_samples_per_second": 55.917, |
|
"eval_steps_per_second": 0.876, |
|
"step": 200 |
|
}, |
|
{ |
|
"debug/losses": 0.06875108927488327, |
|
"debug/policy_weights": 0.11560399830341339, |
|
"debug/raw_losses": 0.6261113882064819, |
|
"epoch": 0.1671309192200557, |
|
"grad_norm": 1.923749397238511, |
|
"learning_rate": 4.932136424161899e-07, |
|
"logits/chosen": -2.508167028427124, |
|
"logits/rejected": -2.493638038635254, |
|
"logps/chosen": -204.44461059570312, |
|
"logps/rejected": -234.84375, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7885208129882812, |
|
"rewards/margins": 0.24285559356212616, |
|
"rewards/rejected": -1.0313764810562134, |
|
"step": 210 |
|
}, |
|
{ |
|
"debug/losses": 0.07057208567857742, |
|
"debug/policy_weights": 0.1248287707567215, |
|
"debug/raw_losses": 0.5716283321380615, |
|
"epoch": 0.17508953442101075, |
|
"grad_norm": 2.155645087454887, |
|
"learning_rate": 4.915114123589732e-07, |
|
"logits/chosen": -2.5536115169525146, |
|
"logits/rejected": -2.532297134399414, |
|
"logps/chosen": -200.5375518798828, |
|
"logps/rejected": -235.3711700439453, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.636379063129425, |
|
"rewards/margins": 0.36098265647888184, |
|
"rewards/rejected": -0.9973617792129517, |
|
"step": 220 |
|
}, |
|
{ |
|
"debug/losses": 0.0636017918586731, |
|
"debug/policy_weights": 0.10361097007989883, |
|
"debug/raw_losses": 0.6123852133750916, |
|
"epoch": 0.18304814962196578, |
|
"grad_norm": 2.8331926004473647, |
|
"learning_rate": 4.896225217511849e-07, |
|
"logits/chosen": -2.5085248947143555, |
|
"logits/rejected": -2.5012524127960205, |
|
"logps/chosen": -241.10537719726562, |
|
"logps/rejected": -279.1767883300781, |
|
"loss": 0.0691, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0180869102478027, |
|
"rewards/margins": 0.2863315939903259, |
|
"rewards/rejected": -1.3044183254241943, |
|
"step": 230 |
|
}, |
|
{ |
|
"debug/losses": 0.049545757472515106, |
|
"debug/policy_weights": 0.08214031159877777, |
|
"debug/raw_losses": 0.6215575933456421, |
|
"epoch": 0.1910067648229208, |
|
"grad_norm": 1.5874644335916546, |
|
"learning_rate": 4.875484304880629e-07, |
|
"logits/chosen": -2.4870269298553467, |
|
"logits/rejected": -2.46873140335083, |
|
"logps/chosen": -294.48333740234375, |
|
"logps/rejected": -322.12176513671875, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3272528648376465, |
|
"rewards/margins": 0.3347854018211365, |
|
"rewards/rejected": -1.6620384454727173, |
|
"step": 240 |
|
}, |
|
{ |
|
"debug/losses": 0.060823164880275726, |
|
"debug/policy_weights": 0.10039641708135605, |
|
"debug/raw_losses": 0.63308185338974, |
|
"epoch": 0.19896538002387584, |
|
"grad_norm": 1.78198841800004, |
|
"learning_rate": 4.852907416036558e-07, |
|
"logits/chosen": -2.4755935668945312, |
|
"logits/rejected": -2.471782922744751, |
|
"logps/chosen": -233.53451538085938, |
|
"logps/rejected": -271.2043762207031, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9423769116401672, |
|
"rewards/margins": 0.25337982177734375, |
|
"rewards/rejected": -1.1957566738128662, |
|
"step": 250 |
|
}, |
|
{ |
|
"debug/losses": 0.06635858118534088, |
|
"debug/policy_weights": 0.11175660043954849, |
|
"debug/raw_losses": 0.5817192196846008, |
|
"epoch": 0.20692399522483088, |
|
"grad_norm": 1.8058774364103256, |
|
"learning_rate": 4.828512000318616e-07, |
|
"logits/chosen": -2.4792320728302, |
|
"logits/rejected": -2.426579713821411, |
|
"logps/chosen": -257.08880615234375, |
|
"logps/rejected": -285.12872314453125, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9238799214363098, |
|
"rewards/margins": 0.37587353587150574, |
|
"rewards/rejected": -1.2997533082962036, |
|
"step": 260 |
|
}, |
|
{ |
|
"debug/losses": 0.05608036369085312, |
|
"debug/policy_weights": 0.09485211223363876, |
|
"debug/raw_losses": 0.6099108457565308, |
|
"epoch": 0.2148826104257859, |
|
"grad_norm": 1.7589439888686003, |
|
"learning_rate": 4.802316912577946e-07, |
|
"logits/chosen": -2.371746778488159, |
|
"logits/rejected": -2.3314661979675293, |
|
"logps/chosen": -249.49166870117188, |
|
"logps/rejected": -269.15106201171875, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.018430471420288, |
|
"rewards/margins": 0.33081451058387756, |
|
"rewards/rejected": -1.3492449522018433, |
|
"step": 270 |
|
}, |
|
{ |
|
"debug/losses": 0.052897512912750244, |
|
"debug/policy_weights": 0.07982214540243149, |
|
"debug/raw_losses": 0.6378833055496216, |
|
"epoch": 0.22284122562674094, |
|
"grad_norm": 1.5995617528843298, |
|
"learning_rate": 4.774342398605221e-07, |
|
"logits/chosen": -2.2725443840026855, |
|
"logits/rejected": -2.2477705478668213, |
|
"logps/chosen": -273.08953857421875, |
|
"logps/rejected": -294.1094665527344, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.2748507261276245, |
|
"rewards/margins": 0.26763075590133667, |
|
"rewards/rejected": -1.542481541633606, |
|
"step": 280 |
|
}, |
|
{ |
|
"debug/losses": 0.06544710695743561, |
|
"debug/policy_weights": 0.10770060122013092, |
|
"debug/raw_losses": 0.5902801752090454, |
|
"epoch": 0.23079984082769597, |
|
"grad_norm": 2.5512893652177375, |
|
"learning_rate": 4.744610079482978e-07, |
|
"logits/chosen": -2.2364134788513184, |
|
"logits/rejected": -2.1829159259796143, |
|
"logps/chosen": -278.95745849609375, |
|
"logps/rejected": -308.3731384277344, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1328349113464355, |
|
"rewards/margins": 0.3837105631828308, |
|
"rewards/rejected": -1.5165454149246216, |
|
"step": 290 |
|
}, |
|
{ |
|
"debug/losses": 0.037609830498695374, |
|
"debug/policy_weights": 0.06786644458770752, |
|
"debug/raw_losses": 0.6028710603713989, |
|
"epoch": 0.238758456028651, |
|
"grad_norm": 1.753065545131153, |
|
"learning_rate": 4.713142934875005e-07, |
|
"logits/chosen": -2.1160764694213867, |
|
"logits/rejected": -2.0635263919830322, |
|
"logps/chosen": -289.21832275390625, |
|
"logps/rejected": -311.9188537597656, |
|
"loss": 0.0465, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.416634202003479, |
|
"rewards/margins": 0.3938707709312439, |
|
"rewards/rejected": -1.8105049133300781, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.238758456028651, |
|
"eval_debug/losses": 0.04409417510032654, |
|
"eval_debug/policy_weights": 0.07315339893102646, |
|
"eval_debug/raw_losses": 0.6074807643890381, |
|
"eval_logits/chosen": -2.1516358852386475, |
|
"eval_logits/rejected": -2.1315433979034424, |
|
"eval_logps/chosen": -289.1007995605469, |
|
"eval_logps/rejected": -331.553466796875, |
|
"eval_loss": 0.04599982127547264, |
|
"eval_rewards/accuracies": 0.66697758436203, |
|
"eval_rewards/chosen": -1.448573112487793, |
|
"eval_rewards/margins": 0.3599713444709778, |
|
"eval_rewards/rejected": -1.808544397354126, |
|
"eval_runtime": 152.7829, |
|
"eval_samples_per_second": 55.975, |
|
"eval_steps_per_second": 0.877, |
|
"step": 300 |
|
}, |
|
{ |
|
"debug/losses": 0.03696579486131668, |
|
"debug/policy_weights": 0.06142206862568855, |
|
"debug/raw_losses": 0.5807094573974609, |
|
"epoch": 0.24671707122960604, |
|
"grad_norm": 1.4236707076819513, |
|
"learning_rate": 4.679965285265706e-07, |
|
"logits/chosen": -2.129534959793091, |
|
"logits/rejected": -2.1084346771240234, |
|
"logps/chosen": -263.85357666015625, |
|
"logps/rejected": -315.09295654296875, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4629967212677002, |
|
"rewards/margins": 0.39182788133621216, |
|
"rewards/rejected": -1.8548247814178467, |
|
"step": 310 |
|
}, |
|
{ |
|
"debug/losses": 0.039889153093099594, |
|
"debug/policy_weights": 0.07385966926813126, |
|
"debug/raw_losses": 0.515781044960022, |
|
"epoch": 0.2546756864305611, |
|
"grad_norm": 1.8086670273747332, |
|
"learning_rate": 4.64510277316316e-07, |
|
"logits/chosen": -2.105729103088379, |
|
"logits/rejected": -2.050633192062378, |
|
"logps/chosen": -284.87628173828125, |
|
"logps/rejected": -340.914794921875, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3924423456192017, |
|
"rewards/margins": 0.624489963054657, |
|
"rewards/rejected": -2.016932249069214, |
|
"step": 320 |
|
}, |
|
{ |
|
"debug/losses": 0.03575636073946953, |
|
"debug/policy_weights": 0.05730568245053291, |
|
"debug/raw_losses": 0.6370795369148254, |
|
"epoch": 0.26263430163151613, |
|
"grad_norm": 1.4742792675036147, |
|
"learning_rate": 4.6085823432804137e-07, |
|
"logits/chosen": -2.0471785068511963, |
|
"logits/rejected": -2.015995740890503, |
|
"logps/chosen": -330.13970947265625, |
|
"logps/rejected": -357.7228088378906, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8637893199920654, |
|
"rewards/margins": 0.2782483696937561, |
|
"rewards/rejected": -2.1420376300811768, |
|
"step": 330 |
|
}, |
|
{ |
|
"debug/losses": 0.026058007031679153, |
|
"debug/policy_weights": 0.044492077082395554, |
|
"debug/raw_losses": 0.5921159982681274, |
|
"epoch": 0.27059291683247116, |
|
"grad_norm": 0.8069991257279925, |
|
"learning_rate": 4.570432221710314e-07, |
|
"logits/chosen": -2.0316271781921387, |
|
"logits/rejected": -2.0155906677246094, |
|
"logps/chosen": -339.8232727050781, |
|
"logps/rejected": -390.1024475097656, |
|
"loss": 0.0253, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.936417579650879, |
|
"rewards/margins": 0.40920180082321167, |
|
"rewards/rejected": -2.3456194400787354, |
|
"step": 340 |
|
}, |
|
{ |
|
"debug/losses": 0.03554385155439377, |
|
"debug/policy_weights": 0.06352627277374268, |
|
"debug/raw_losses": 0.5905269384384155, |
|
"epoch": 0.2785515320334262, |
|
"grad_norm": 1.8338764172051625, |
|
"learning_rate": 4.5306818941099866e-07, |
|
"logits/chosen": -2.1141629219055176, |
|
"logits/rejected": -2.056692123413086, |
|
"logps/chosen": -321.1317138671875, |
|
"logps/rejected": -345.6830749511719, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.7090749740600586, |
|
"rewards/margins": 0.4097229838371277, |
|
"rewards/rejected": -2.118797779083252, |
|
"step": 350 |
|
}, |
|
{ |
|
"debug/losses": 0.039255283772945404, |
|
"debug/policy_weights": 0.06526477634906769, |
|
"debug/raw_losses": 0.5919562578201294, |
|
"epoch": 0.28651014723438123, |
|
"grad_norm": 1.341401556005613, |
|
"learning_rate": 4.4893620829118124e-07, |
|
"logits/chosen": -2.1538476943969727, |
|
"logits/rejected": -2.1225638389587402, |
|
"logps/chosen": -299.4391174316406, |
|
"logps/rejected": -328.9165954589844, |
|
"loss": 0.042, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.477020025253296, |
|
"rewards/margins": 0.358177125453949, |
|
"rewards/rejected": -1.8351972103118896, |
|
"step": 360 |
|
}, |
|
{ |
|
"debug/losses": 0.03851500526070595, |
|
"debug/policy_weights": 0.0639430433511734, |
|
"debug/raw_losses": 0.6564086079597473, |
|
"epoch": 0.29446876243533626, |
|
"grad_norm": 1.6981934763772588, |
|
"learning_rate": 4.4465047235785185e-07, |
|
"logits/chosen": -2.1684069633483887, |
|
"logits/rejected": -2.133373498916626, |
|
"logps/chosen": -311.69146728515625, |
|
"logps/rejected": -320.17266845703125, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.6684709787368774, |
|
"rewards/margins": 0.2505863904953003, |
|
"rewards/rejected": -1.9190574884414673, |
|
"step": 370 |
|
}, |
|
{ |
|
"debug/losses": 0.03426089510321617, |
|
"debug/policy_weights": 0.05481856316328049, |
|
"debug/raw_losses": 0.636349081993103, |
|
"epoch": 0.3024273776362913, |
|
"grad_norm": 1.88904780932561, |
|
"learning_rate": 4.40214293992074e-07, |
|
"logits/chosen": -2.0536434650421143, |
|
"logits/rejected": -2.0290040969848633, |
|
"logps/chosen": -313.34796142578125, |
|
"logps/rejected": -339.9054260253906, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.7595481872558594, |
|
"rewards/margins": 0.2933933436870575, |
|
"rewards/rejected": -2.0529415607452393, |
|
"step": 380 |
|
}, |
|
{ |
|
"debug/losses": 0.049106162041425705, |
|
"debug/policy_weights": 0.07827477902173996, |
|
"debug/raw_losses": 0.6187797784805298, |
|
"epoch": 0.3103859928372463, |
|
"grad_norm": 1.386576352552282, |
|
"learning_rate": 4.3563110184961234e-07, |
|
"logits/chosen": -2.0677030086517334, |
|
"logits/rejected": -2.041632890701294, |
|
"logps/chosen": -306.24053955078125, |
|
"logps/rejected": -343.28515625, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5703617334365845, |
|
"rewards/margins": 0.34101730585098267, |
|
"rewards/rejected": -1.9113788604736328, |
|
"step": 390 |
|
}, |
|
{ |
|
"debug/losses": 0.029186096042394638, |
|
"debug/policy_weights": 0.054026149213314056, |
|
"debug/raw_losses": 0.5490237474441528, |
|
"epoch": 0.31834460803820136, |
|
"grad_norm": 1.1347734675549332, |
|
"learning_rate": 4.3090443821097566e-07, |
|
"logits/chosen": -2.0278306007385254, |
|
"logits/rejected": -1.9964557886123657, |
|
"logps/chosen": -312.5970458984375, |
|
"logps/rejected": -376.74591064453125, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.765263319015503, |
|
"rewards/margins": 0.4999556541442871, |
|
"rewards/rejected": -2.265219211578369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31834460803820136, |
|
"eval_debug/losses": 0.027978185564279556, |
|
"eval_debug/policy_weights": 0.04763055220246315, |
|
"eval_debug/raw_losses": 0.606778085231781, |
|
"eval_logits/chosen": -2.021036386489868, |
|
"eval_logits/rejected": -1.9971833229064941, |
|
"eval_logps/chosen": -334.9689025878906, |
|
"eval_logps/rejected": -375.8979797363281, |
|
"eval_loss": 0.0302441269159317, |
|
"eval_rewards/accuracies": 0.6604477763175964, |
|
"eval_rewards/chosen": -1.9072539806365967, |
|
"eval_rewards/margins": 0.34473562240600586, |
|
"eval_rewards/rejected": -2.2519896030426025, |
|
"eval_runtime": 152.8965, |
|
"eval_samples_per_second": 55.933, |
|
"eval_steps_per_second": 0.876, |
|
"step": 400 |
|
}, |
|
{ |
|
"debug/losses": 0.033793386071920395, |
|
"debug/policy_weights": 0.05818073824048042, |
|
"debug/raw_losses": 0.5851433277130127, |
|
"epoch": 0.3263032232391564, |
|
"grad_norm": 1.5661418376002596, |
|
"learning_rate": 4.2603795624364195e-07, |
|
"logits/chosen": -2.012786865234375, |
|
"logits/rejected": -1.9587138891220093, |
|
"logps/chosen": -307.849365234375, |
|
"logps/rejected": -336.9012756347656, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.714755654335022, |
|
"rewards/margins": 0.37940770387649536, |
|
"rewards/rejected": -2.094163417816162, |
|
"step": 410 |
|
}, |
|
{ |
|
"debug/losses": 0.06058833748102188, |
|
"debug/policy_weights": 0.09707297384738922, |
|
"debug/raw_losses": 0.5794335603713989, |
|
"epoch": 0.3342618384401114, |
|
"grad_norm": 1.8472937892928514, |
|
"learning_rate": 4.210354171785795e-07, |
|
"logits/chosen": -2.123286724090576, |
|
"logits/rejected": -2.116464138031006, |
|
"logps/chosen": -268.4242248535156, |
|
"logps/rejected": -322.94091796875, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.262597918510437, |
|
"rewards/margins": 0.42006754875183105, |
|
"rewards/rejected": -1.6826655864715576, |
|
"step": 420 |
|
}, |
|
{ |
|
"debug/losses": 0.046358704566955566, |
|
"debug/policy_weights": 0.08018581569194794, |
|
"debug/raw_losses": 0.6013703942298889, |
|
"epoch": 0.34222045364106646, |
|
"grad_norm": 1.3294109687375146, |
|
"learning_rate": 4.15900687403248e-07, |
|
"logits/chosen": -2.1637144088745117, |
|
"logits/rejected": -2.14619779586792, |
|
"logps/chosen": -269.3055114746094, |
|
"logps/rejected": -311.72271728515625, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3137115240097046, |
|
"rewards/margins": 0.40074974298477173, |
|
"rewards/rejected": -1.714461326599121, |
|
"step": 430 |
|
}, |
|
{ |
|
"debug/losses": 0.028133749961853027, |
|
"debug/policy_weights": 0.04740751534700394, |
|
"debug/raw_losses": 0.6014739871025085, |
|
"epoch": 0.3501790688420215, |
|
"grad_norm": 0.7809537654588382, |
|
"learning_rate": 4.1063773547332584e-07, |
|
"logits/chosen": -2.0739526748657227, |
|
"logits/rejected": -2.0483832359313965, |
|
"logps/chosen": -318.5735168457031, |
|
"logps/rejected": -358.01007080078125, |
|
"loss": 0.0305, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7163457870483398, |
|
"rewards/margins": 0.38432344794273376, |
|
"rewards/rejected": -2.1006693840026855, |
|
"step": 440 |
|
}, |
|
{ |
|
"debug/losses": 0.020220816135406494, |
|
"debug/policy_weights": 0.03701246529817581, |
|
"debug/raw_losses": 0.5830426812171936, |
|
"epoch": 0.3581376840429765, |
|
"grad_norm": 0.9223432546306553, |
|
"learning_rate": 4.0525062904547276e-07, |
|
"logits/chosen": -2.0108728408813477, |
|
"logits/rejected": -1.9766314029693604, |
|
"logps/chosen": -317.477783203125, |
|
"logps/rejected": -354.3273010253906, |
|
"loss": 0.028, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.849148154258728, |
|
"rewards/margins": 0.4447658658027649, |
|
"rewards/rejected": -2.293914318084717, |
|
"step": 450 |
|
}, |
|
{ |
|
"debug/losses": 0.032562464475631714, |
|
"debug/policy_weights": 0.055046629160642624, |
|
"debug/raw_losses": 0.5713292360305786, |
|
"epoch": 0.36609629924393156, |
|
"grad_norm": 1.046176079788828, |
|
"learning_rate": 3.997435317334988e-07, |
|
"logits/chosen": -2.074617385864258, |
|
"logits/rejected": -2.0565450191497803, |
|
"logps/chosen": -322.37738037109375, |
|
"logps/rejected": -373.4345703125, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.753626823425293, |
|
"rewards/margins": 0.44904619455337524, |
|
"rewards/rejected": -2.2026727199554443, |
|
"step": 460 |
|
}, |
|
{ |
|
"debug/losses": 0.03133855015039444, |
|
"debug/policy_weights": 0.05432797595858574, |
|
"debug/raw_losses": 0.5810926556587219, |
|
"epoch": 0.3740549144448866, |
|
"grad_norm": 0.9700020848498487, |
|
"learning_rate": 3.941206998903701e-07, |
|
"logits/chosen": -2.1040358543395996, |
|
"logits/rejected": -2.0743563175201416, |
|
"logps/chosen": -331.5384826660156, |
|
"logps/rejected": -365.0169677734375, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7676904201507568, |
|
"rewards/margins": 0.36116519570350647, |
|
"rewards/rejected": -2.1288554668426514, |
|
"step": 470 |
|
}, |
|
{ |
|
"debug/losses": 0.03259238973259926, |
|
"debug/policy_weights": 0.052516065537929535, |
|
"debug/raw_losses": 0.5992477536201477, |
|
"epoch": 0.3820135296458416, |
|
"grad_norm": 1.3154459086844248, |
|
"learning_rate": 3.8838647931853684e-07, |
|
"logits/chosen": -2.0499539375305176, |
|
"logits/rejected": -2.0263118743896484, |
|
"logps/chosen": -289.03106689453125, |
|
"logps/rejected": -331.93133544921875, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6713088750839233, |
|
"rewards/margins": 0.3960718512535095, |
|
"rewards/rejected": -2.067380666732788, |
|
"step": 480 |
|
}, |
|
{ |
|
"debug/losses": 0.03394859656691551, |
|
"debug/policy_weights": 0.0627564936876297, |
|
"debug/raw_losses": 0.5517407655715942, |
|
"epoch": 0.38997214484679665, |
|
"grad_norm": 1.4921908216218414, |
|
"learning_rate": 3.825453019111281e-07, |
|
"logits/chosen": -2.1080005168914795, |
|
"logits/rejected": -2.0806961059570312, |
|
"logps/chosen": -297.0895080566406, |
|
"logps/rejected": -362.0596618652344, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.5262044668197632, |
|
"rewards/margins": 0.549400269985199, |
|
"rewards/rejected": -2.0756049156188965, |
|
"step": 490 |
|
}, |
|
{ |
|
"debug/losses": 0.04039504751563072, |
|
"debug/policy_weights": 0.07268974184989929, |
|
"debug/raw_losses": 0.543070375919342, |
|
"epoch": 0.3979307600477517, |
|
"grad_norm": 1.5546856632601864, |
|
"learning_rate": 3.7660168222660824e-07, |
|
"logits/chosen": -2.1996207237243652, |
|
"logits/rejected": -2.1463570594787598, |
|
"logps/chosen": -322.22967529296875, |
|
"logps/rejected": -356.3480224609375, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.537804365158081, |
|
"rewards/margins": 0.5032481551170349, |
|
"rewards/rejected": -2.0410525798797607, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3979307600477517, |
|
"eval_debug/losses": 0.03992186114192009, |
|
"eval_debug/policy_weights": 0.06951306015253067, |
|
"eval_debug/raw_losses": 0.5867676734924316, |
|
"eval_logits/chosen": -2.1866133213043213, |
|
"eval_logits/rejected": -2.1609621047973633, |
|
"eval_logps/chosen": -295.57781982421875, |
|
"eval_logps/rejected": -349.751708984375, |
|
"eval_loss": 0.04235580936074257, |
|
"eval_rewards/accuracies": 0.683768630027771, |
|
"eval_rewards/chosen": -1.513343334197998, |
|
"eval_rewards/margins": 0.477183997631073, |
|
"eval_rewards/rejected": -1.9905272722244263, |
|
"eval_runtime": 152.8577, |
|
"eval_samples_per_second": 55.947, |
|
"eval_steps_per_second": 0.877, |
|
"step": 500 |
|
}, |
|
{ |
|
"debug/losses": 0.041816793382167816, |
|
"debug/policy_weights": 0.07070693373680115, |
|
"debug/raw_losses": 0.5931949615478516, |
|
"epoch": 0.4058893752487067, |
|
"grad_norm": 1.8707198573695687, |
|
"learning_rate": 3.705602139995416e-07, |
|
"logits/chosen": -2.1828060150146484, |
|
"logits/rejected": -2.147474765777588, |
|
"logps/chosen": -293.61553955078125, |
|
"logps/rejected": -337.2850036621094, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4135316610336304, |
|
"rewards/margins": 0.45567187666893005, |
|
"rewards/rejected": -1.8692035675048828, |
|
"step": 510 |
|
}, |
|
{ |
|
"debug/losses": 0.04330951347947121, |
|
"debug/policy_weights": 0.06736676394939423, |
|
"debug/raw_losses": 0.6137939691543579, |
|
"epoch": 0.41384799044966175, |
|
"grad_norm": 1.4619434139696437, |
|
"learning_rate": 3.6442556659016475e-07, |
|
"logits/chosen": -2.185202121734619, |
|
"logits/rejected": -2.165663003921509, |
|
"logps/chosen": -298.6885681152344, |
|
"logps/rejected": -335.07366943359375, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5118178129196167, |
|
"rewards/margins": 0.34012988209724426, |
|
"rewards/rejected": -1.8519477844238281, |
|
"step": 520 |
|
}, |
|
{ |
|
"debug/losses": 0.04355059936642647, |
|
"debug/policy_weights": 0.06907118856906891, |
|
"debug/raw_losses": 0.6439172029495239, |
|
"epoch": 0.4218066056506168, |
|
"grad_norm": 1.3367245541718495, |
|
"learning_rate": 3.582024813755076e-07, |
|
"logits/chosen": -2.108959436416626, |
|
"logits/rejected": -2.086933135986328, |
|
"logps/chosen": -321.8992614746094, |
|
"logps/rejected": -345.1759033203125, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.71266770362854, |
|
"rewards/margins": 0.27295029163360596, |
|
"rewards/rejected": -1.985618233680725, |
|
"step": 530 |
|
}, |
|
{ |
|
"debug/losses": 0.03872833028435707, |
|
"debug/policy_weights": 0.06447508931159973, |
|
"debug/raw_losses": 0.5817040205001831, |
|
"epoch": 0.4297652208515718, |
|
"grad_norm": 1.3025243070628232, |
|
"learning_rate": 3.5189576808485404e-07, |
|
"logits/chosen": -2.1122708320617676, |
|
"logits/rejected": -2.088346004486084, |
|
"logps/chosen": -310.7181701660156, |
|
"logps/rejected": -361.2312316894531, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.601313829421997, |
|
"rewards/margins": 0.45615309476852417, |
|
"rewards/rejected": -2.057466983795166, |
|
"step": 540 |
|
}, |
|
{ |
|
"debug/losses": 0.03740643709897995, |
|
"debug/policy_weights": 0.05903150886297226, |
|
"debug/raw_losses": 0.6242271065711975, |
|
"epoch": 0.43772383605252685, |
|
"grad_norm": 1.8261628628746243, |
|
"learning_rate": 3.4551030108237433e-07, |
|
"logits/chosen": -2.097672462463379, |
|
"logits/rejected": -2.042898654937744, |
|
"logps/chosen": -325.98193359375, |
|
"logps/rejected": -347.8229675292969, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.7341053485870361, |
|
"rewards/margins": 0.3862043023109436, |
|
"rewards/rejected": -2.120309591293335, |
|
"step": 550 |
|
}, |
|
{ |
|
"debug/losses": 0.028903227299451828, |
|
"debug/policy_weights": 0.05456990748643875, |
|
"debug/raw_losses": 0.5671552419662476, |
|
"epoch": 0.4456824512534819, |
|
"grad_norm": 1.3969384254784958, |
|
"learning_rate": 3.390510155998023e-07, |
|
"logits/chosen": -2.142831325531006, |
|
"logits/rejected": -2.1043479442596436, |
|
"logps/chosen": -335.3097229003906, |
|
"logps/rejected": -388.9020080566406, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8105127811431885, |
|
"rewards/margins": 0.5163544416427612, |
|
"rewards/rejected": -2.32686710357666, |
|
"step": 560 |
|
}, |
|
{ |
|
"debug/losses": 0.031607236713171005, |
|
"debug/policy_weights": 0.05318068340420723, |
|
"debug/raw_losses": 0.5985600352287292, |
|
"epoch": 0.4536410664544369, |
|
"grad_norm": 1.647741129882177, |
|
"learning_rate": 3.325229039220684e-07, |
|
"logits/chosen": -2.085341691970825, |
|
"logits/rejected": -2.064438581466675, |
|
"logps/chosen": -350.662353515625, |
|
"logps/rejected": -387.7210998535156, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9661823511123657, |
|
"rewards/margins": 0.3896261155605316, |
|
"rewards/rejected": -2.3558084964752197, |
|
"step": 570 |
|
}, |
|
{ |
|
"debug/losses": 0.03731871768832207, |
|
"debug/policy_weights": 0.0585642084479332, |
|
"debug/raw_losses": 0.5958778858184814, |
|
"epoch": 0.46159968165539195, |
|
"grad_norm": 1.4791769718410137, |
|
"learning_rate": 3.2593101152883795e-07, |
|
"logits/chosen": -2.095475435256958, |
|
"logits/rejected": -2.058140277862549, |
|
"logps/chosen": -331.99822998046875, |
|
"logps/rejected": -371.34918212890625, |
|
"loss": 0.0269, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.8306747674942017, |
|
"rewards/margins": 0.4158768653869629, |
|
"rewards/rejected": -2.246551513671875, |
|
"step": 580 |
|
}, |
|
{ |
|
"debug/losses": 0.03436756879091263, |
|
"debug/policy_weights": 0.05549240857362747, |
|
"debug/raw_losses": 0.5947594046592712, |
|
"epoch": 0.469558296856347, |
|
"grad_norm": 1.311005334728305, |
|
"learning_rate": 3.192804331949349e-07, |
|
"logits/chosen": -2.0892815589904785, |
|
"logits/rejected": -2.064006805419922, |
|
"logps/chosen": -329.79632568359375, |
|
"logps/rejected": -367.3946228027344, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8716663122177124, |
|
"rewards/margins": 0.40899592638015747, |
|
"rewards/rejected": -2.2806622982025146, |
|
"step": 590 |
|
}, |
|
{ |
|
"debug/losses": 0.02820407785475254, |
|
"debug/policy_weights": 0.05099906399846077, |
|
"debug/raw_losses": 0.572905421257019, |
|
"epoch": 0.477516912057302, |
|
"grad_norm": 1.2557571913266798, |
|
"learning_rate": 3.125763090526674e-07, |
|
"logits/chosen": -2.1351561546325684, |
|
"logits/rejected": -2.0871920585632324, |
|
"logps/chosen": -335.27978515625, |
|
"logps/rejected": -378.8961486816406, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.804330587387085, |
|
"rewards/margins": 0.49522823095321655, |
|
"rewards/rejected": -2.299558639526367, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.477516912057302, |
|
"eval_debug/losses": 0.028778724372386932, |
|
"eval_debug/policy_weights": 0.05042354390025139, |
|
"eval_debug/raw_losses": 0.5833220481872559, |
|
"eval_logits/chosen": -2.128926992416382, |
|
"eval_logits/rejected": -2.1045045852661133, |
|
"eval_logps/chosen": -328.2419128417969, |
|
"eval_logps/rejected": -378.8386535644531, |
|
"eval_loss": 0.030947599560022354, |
|
"eval_rewards/accuracies": 0.6772388219833374, |
|
"eval_rewards/chosen": -1.839984655380249, |
|
"eval_rewards/margins": 0.4414121210575104, |
|
"eval_rewards/rejected": -2.2813963890075684, |
|
"eval_runtime": 152.7586, |
|
"eval_samples_per_second": 55.984, |
|
"eval_steps_per_second": 0.877, |
|
"step": 600 |
|
}, |
|
{ |
|
"debug/losses": 0.028522927314043045, |
|
"debug/policy_weights": 0.04586270451545715, |
|
"debug/raw_losses": 0.6142871379852295, |
|
"epoch": 0.48547552725825704, |
|
"grad_norm": 1.182590096149601, |
|
"learning_rate": 3.0582382061909623e-07, |
|
"logits/chosen": -2.143110752105713, |
|
"logits/rejected": -2.1091790199279785, |
|
"logps/chosen": -331.83636474609375, |
|
"logps/rejected": -368.21844482421875, |
|
"loss": 0.0302, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9071013927459717, |
|
"rewards/margins": 0.3452487289905548, |
|
"rewards/rejected": -2.252350091934204, |
|
"step": 610 |
|
}, |
|
{ |
|
"debug/losses": 0.03037545084953308, |
|
"debug/policy_weights": 0.0514327809214592, |
|
"debug/raw_losses": 0.5810720324516296, |
|
"epoch": 0.4934341424592121, |
|
"grad_norm": 1.593561174181043, |
|
"learning_rate": 2.9902818679131775e-07, |
|
"logits/chosen": -2.1445069313049316, |
|
"logits/rejected": -2.091860294342041, |
|
"logps/chosen": -326.50469970703125, |
|
"logps/rejected": -367.2829284667969, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7178184986114502, |
|
"rewards/margins": 0.4634013772010803, |
|
"rewards/rejected": -2.1812198162078857, |
|
"step": 620 |
|
}, |
|
{ |
|
"debug/losses": 0.030525147914886475, |
|
"debug/policy_weights": 0.05667508766055107, |
|
"debug/raw_losses": 0.5397539734840393, |
|
"epoch": 0.5013927576601671, |
|
"grad_norm": 2.8222395351404543, |
|
"learning_rate": 2.921946598128571e-07, |
|
"logits/chosen": -2.10142183303833, |
|
"logits/rejected": -2.067218780517578, |
|
"logps/chosen": -305.54437255859375, |
|
"logps/rejected": -338.94415283203125, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5276521444320679, |
|
"rewards/margins": 0.5411427617073059, |
|
"rewards/rejected": -2.0687947273254395, |
|
"step": 630 |
|
}, |
|
{ |
|
"debug/losses": 0.03578261286020279, |
|
"debug/policy_weights": 0.05523357912898064, |
|
"debug/raw_losses": 0.62241131067276, |
|
"epoch": 0.5093513728611222, |
|
"grad_norm": 1.8331946866279472, |
|
"learning_rate": 2.8532852121428733e-07, |
|
"logits/chosen": -2.059525728225708, |
|
"logits/rejected": -2.024341344833374, |
|
"logps/chosen": -295.8885192871094, |
|
"logps/rejected": -332.0082092285156, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6036468744277954, |
|
"rewards/margins": 0.38395947217941284, |
|
"rewards/rejected": -1.987606406211853, |
|
"step": 640 |
|
}, |
|
{ |
|
"debug/losses": 0.0353676900267601, |
|
"debug/policy_weights": 0.0586540512740612, |
|
"debug/raw_losses": 0.5654557943344116, |
|
"epoch": 0.5173099880620772, |
|
"grad_norm": 1.6263620123295128, |
|
"learning_rate": 2.7843507773121414e-07, |
|
"logits/chosen": -2.0878262519836426, |
|
"logits/rejected": -2.050427198410034, |
|
"logps/chosen": -297.3978271484375, |
|
"logps/rejected": -357.1239929199219, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6018199920654297, |
|
"rewards/margins": 0.5254247784614563, |
|
"rewards/rejected": -2.1272449493408203, |
|
"step": 650 |
|
}, |
|
{ |
|
"debug/losses": 0.029138093814253807, |
|
"debug/policy_weights": 0.06430795043706894, |
|
"debug/raw_losses": 0.5185871720314026, |
|
"epoch": 0.5252686032630323, |
|
"grad_norm": 1.4058645383465609, |
|
"learning_rate": 2.715196572027789e-07, |
|
"logits/chosen": -2.038132667541504, |
|
"logits/rejected": -2.007830858230591, |
|
"logps/chosen": -304.67694091796875, |
|
"logps/rejected": -377.6889343261719, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.602113127708435, |
|
"rewards/margins": 0.6335779428482056, |
|
"rewards/rejected": -2.2356910705566406, |
|
"step": 660 |
|
}, |
|
{ |
|
"debug/losses": 0.03085913695394993, |
|
"debug/policy_weights": 0.05442778393626213, |
|
"debug/raw_losses": 0.6082974672317505, |
|
"epoch": 0.5332272184639872, |
|
"grad_norm": 1.6041217842152309, |
|
"learning_rate": 2.645876044538521e-07, |
|
"logits/chosen": -2.0396082401275635, |
|
"logits/rejected": -2.0031583309173584, |
|
"logps/chosen": -327.6360168457031, |
|
"logps/rejected": -360.5151062011719, |
|
"loss": 0.033, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.7850401401519775, |
|
"rewards/margins": 0.4116973280906677, |
|
"rewards/rejected": -2.19673752784729, |
|
"step": 670 |
|
}, |
|
{ |
|
"debug/losses": 0.034388747066259384, |
|
"debug/policy_weights": 0.057945240288972855, |
|
"debug/raw_losses": 0.5781614184379578, |
|
"epoch": 0.5411858336649423, |
|
"grad_norm": 1.2956026443956346, |
|
"learning_rate": 2.5764427716409815e-07, |
|
"logits/chosen": -2.0535218715667725, |
|
"logits/rejected": -2.0209546089172363, |
|
"logps/chosen": -322.48492431640625, |
|
"logps/rejected": -363.35797119140625, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.67715322971344, |
|
"rewards/margins": 0.41988006234169006, |
|
"rewards/rejected": -2.0970332622528076, |
|
"step": 680 |
|
}, |
|
{ |
|
"debug/losses": 0.0360778272151947, |
|
"debug/policy_weights": 0.059746015816926956, |
|
"debug/raw_losses": 0.6053534150123596, |
|
"epoch": 0.5491444488658973, |
|
"grad_norm": 1.6135200911375513, |
|
"learning_rate": 2.5069504172710494e-07, |
|
"logits/chosen": -2.088912010192871, |
|
"logits/rejected": -2.0784342288970947, |
|
"logps/chosen": -319.89501953125, |
|
"logps/rejected": -373.1657409667969, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.7156000137329102, |
|
"rewards/margins": 0.3546966016292572, |
|
"rewards/rejected": -2.0702967643737793, |
|
"step": 690 |
|
}, |
|
{ |
|
"debug/losses": 0.049276988953351974, |
|
"debug/policy_weights": 0.07480012625455856, |
|
"debug/raw_losses": 0.6235750913619995, |
|
"epoch": 0.5571030640668524, |
|
"grad_norm": 1.5808440748566825, |
|
"learning_rate": 2.4374526910277886e-07, |
|
"logits/chosen": -2.0845532417297363, |
|
"logits/rejected": -2.0596041679382324, |
|
"logps/chosen": -284.52301025390625, |
|
"logps/rejected": -313.69476318359375, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.411106824874878, |
|
"rewards/margins": 0.33600515127182007, |
|
"rewards/rejected": -1.7471120357513428, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5571030640668524, |
|
"eval_debug/losses": 0.04337022453546524, |
|
"eval_debug/policy_weights": 0.07531096041202545, |
|
"eval_debug/raw_losses": 0.5755950212478638, |
|
"eval_logits/chosen": -2.0900442600250244, |
|
"eval_logits/rejected": -2.0673747062683105, |
|
"eval_logps/chosen": -280.4930114746094, |
|
"eval_logps/rejected": -331.8508605957031, |
|
"eval_loss": 0.04617752134799957, |
|
"eval_rewards/accuracies": 0.691231369972229, |
|
"eval_rewards/chosen": -1.3624950647354126, |
|
"eval_rewards/margins": 0.449023574590683, |
|
"eval_rewards/rejected": -1.811518669128418, |
|
"eval_runtime": 152.8491, |
|
"eval_samples_per_second": 55.951, |
|
"eval_steps_per_second": 0.877, |
|
"step": 700 |
|
}, |
|
{ |
|
"debug/losses": 0.0449453704059124, |
|
"debug/policy_weights": 0.0735592395067215, |
|
"debug/raw_losses": 0.5990960597991943, |
|
"epoch": 0.5650616792678074, |
|
"grad_norm": 2.20481488012055, |
|
"learning_rate": 2.368003306662104e-07, |
|
"logits/chosen": -2.0511984825134277, |
|
"logits/rejected": -2.01592755317688, |
|
"logps/chosen": -304.49285888671875, |
|
"logps/rejected": -334.17608642578125, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4889942407608032, |
|
"rewards/margins": 0.4087558686733246, |
|
"rewards/rejected": -1.8977501392364502, |
|
"step": 710 |
|
}, |
|
{ |
|
"debug/losses": 0.037745922803878784, |
|
"debug/policy_weights": 0.07329835742712021, |
|
"debug/raw_losses": 0.5745851397514343, |
|
"epoch": 0.5730202944687625, |
|
"grad_norm": 1.6814472517336854, |
|
"learning_rate": 2.2986559405621886e-07, |
|
"logits/chosen": -2.0232553482055664, |
|
"logits/rejected": -1.9815248250961304, |
|
"logps/chosen": -316.2637634277344, |
|
"logps/rejected": -357.62969970703125, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.5382474660873413, |
|
"rewards/margins": 0.46971410512924194, |
|
"rewards/rejected": -2.0079617500305176, |
|
"step": 720 |
|
}, |
|
{ |
|
"debug/losses": 0.021795693784952164, |
|
"debug/policy_weights": 0.03945660963654518, |
|
"debug/raw_losses": 0.5574513673782349, |
|
"epoch": 0.5809789096697174, |
|
"grad_norm": 1.2278801956439722, |
|
"learning_rate": 2.2294641902678443e-07, |
|
"logits/chosen": -1.9514557123184204, |
|
"logits/rejected": -1.9241917133331299, |
|
"logps/chosen": -324.55279541015625, |
|
"logps/rejected": -370.191650390625, |
|
"loss": 0.0231, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.8938844203948975, |
|
"rewards/margins": 0.4893426299095154, |
|
"rewards/rejected": -2.3832271099090576, |
|
"step": 730 |
|
}, |
|
{ |
|
"debug/losses": 0.021131381392478943, |
|
"debug/policy_weights": 0.040012426674366, |
|
"debug/raw_losses": 0.5742698907852173, |
|
"epoch": 0.5889375248706725, |
|
"grad_norm": 1.454963087082975, |
|
"learning_rate": 2.160481533045751e-07, |
|
"logits/chosen": -1.9214550256729126, |
|
"logits/rejected": -1.877915382385254, |
|
"logps/chosen": -347.30078125, |
|
"logps/rejected": -377.1596984863281, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9716631174087524, |
|
"rewards/margins": 0.39278116822242737, |
|
"rewards/rejected": -2.3644442558288574, |
|
"step": 740 |
|
}, |
|
{ |
|
"debug/losses": 0.027965540066361427, |
|
"debug/policy_weights": 0.05136016011238098, |
|
"debug/raw_losses": 0.570824146270752, |
|
"epoch": 0.5968961400716275, |
|
"grad_norm": 1.3911526389472733, |
|
"learning_rate": 2.0917612845576882e-07, |
|
"logits/chosen": -1.9628822803497314, |
|
"logits/rejected": -1.8985135555267334, |
|
"logps/chosen": -327.02178955078125, |
|
"logps/rejected": -358.8661804199219, |
|
"loss": 0.03, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.705392599105835, |
|
"rewards/margins": 0.5323067903518677, |
|
"rewards/rejected": -2.237699270248413, |
|
"step": 750 |
|
}, |
|
{ |
|
"debug/losses": 0.029450882226228714, |
|
"debug/policy_weights": 0.05074804276227951, |
|
"debug/raw_losses": 0.5880511403083801, |
|
"epoch": 0.6048547552725826, |
|
"grad_norm": 2.2333339678589184, |
|
"learning_rate": 2.0233565576536564e-07, |
|
"logits/chosen": -1.9341914653778076, |
|
"logits/rejected": -1.9149129390716553, |
|
"logps/chosen": -303.7951965332031, |
|
"logps/rejected": -353.73541259765625, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.618255376815796, |
|
"rewards/margins": 0.4489392340183258, |
|
"rewards/rejected": -2.067194700241089, |
|
"step": 760 |
|
}, |
|
{ |
|
"debug/losses": 0.03321235626935959, |
|
"debug/policy_weights": 0.06097835302352905, |
|
"debug/raw_losses": 0.5398764610290527, |
|
"epoch": 0.6128133704735376, |
|
"grad_norm": 1.6557317661926283, |
|
"learning_rate": 1.9553202213217537e-07, |
|
"logits/chosen": -1.9118038415908813, |
|
"logits/rejected": -1.8792883157730103, |
|
"logps/chosen": -286.04901123046875, |
|
"logps/rejected": -348.7887268066406, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.5565006732940674, |
|
"rewards/margins": 0.5775460004806519, |
|
"rewards/rejected": -2.134046792984009, |
|
"step": 770 |
|
}, |
|
{ |
|
"debug/losses": 0.03244846314191818, |
|
"debug/policy_weights": 0.061699897050857544, |
|
"debug/raw_losses": 0.5690463185310364, |
|
"epoch": 0.6207719856744927, |
|
"grad_norm": 1.4912972959386048, |
|
"learning_rate": 1.887704859826528e-07, |
|
"logits/chosen": -1.927679419517517, |
|
"logits/rejected": -1.8800761699676514, |
|
"logps/chosen": -330.97998046875, |
|
"logps/rejected": -388.7947692871094, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6972744464874268, |
|
"rewards/margins": 0.5525250434875488, |
|
"rewards/rejected": -2.2497994899749756, |
|
"step": 780 |
|
}, |
|
{ |
|
"debug/losses": 0.031903307884931564, |
|
"debug/policy_weights": 0.05280442163348198, |
|
"debug/raw_losses": 0.5780637860298157, |
|
"epoch": 0.6287306008754476, |
|
"grad_norm": 1.4895600667578988, |
|
"learning_rate": 1.8205627320673836e-07, |
|
"logits/chosen": -1.8867695331573486, |
|
"logits/rejected": -1.8334290981292725, |
|
"logps/chosen": -339.6418762207031, |
|
"logps/rejected": -387.8199462890625, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8608417510986328, |
|
"rewards/margins": 0.5528916120529175, |
|
"rewards/rejected": -2.4137332439422607, |
|
"step": 790 |
|
}, |
|
{ |
|
"debug/losses": 0.027057424187660217, |
|
"debug/policy_weights": 0.049826182425022125, |
|
"debug/raw_losses": 0.5931520462036133, |
|
"epoch": 0.6366892160764027, |
|
"grad_norm": 1.1976056858635522, |
|
"learning_rate": 1.7539457311884675e-07, |
|
"logits/chosen": -1.8889570236206055, |
|
"logits/rejected": -1.8268101215362549, |
|
"logps/chosen": -345.4725036621094, |
|
"logps/rejected": -377.6209411621094, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8778314590454102, |
|
"rewards/margins": 0.4333206117153168, |
|
"rewards/rejected": -2.311152219772339, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6366892160764027, |
|
"eval_debug/losses": 0.02687034010887146, |
|
"eval_debug/policy_weights": 0.048049140721559525, |
|
"eval_debug/raw_losses": 0.5768545269966125, |
|
"eval_logits/chosen": -1.8538771867752075, |
|
"eval_logits/rejected": -1.8270140886306763, |
|
"eval_logps/chosen": -329.3051452636719, |
|
"eval_logps/rejected": -385.80999755859375, |
|
"eval_loss": 0.029471097514033318, |
|
"eval_rewards/accuracies": 0.6977611780166626, |
|
"eval_rewards/chosen": -1.8506169319152832, |
|
"eval_rewards/margins": 0.5004932284355164, |
|
"eval_rewards/rejected": -2.3511102199554443, |
|
"eval_runtime": 152.9352, |
|
"eval_samples_per_second": 55.919, |
|
"eval_steps_per_second": 0.876, |
|
"step": 800 |
|
}, |
|
{ |
|
"debug/losses": 0.03517676144838333, |
|
"debug/policy_weights": 0.05810894817113876, |
|
"debug/raw_losses": 0.6028115153312683, |
|
"epoch": 0.6446478312773577, |
|
"grad_norm": 1.3469519811703725, |
|
"learning_rate": 1.687905344471226e-07, |
|
"logits/chosen": -1.871289610862732, |
|
"logits/rejected": -1.8393253087997437, |
|
"logps/chosen": -336.5460510253906, |
|
"logps/rejected": -381.5574645996094, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.7289113998413086, |
|
"rewards/margins": 0.43413639068603516, |
|
"rewards/rejected": -2.1630477905273438, |
|
"step": 810 |
|
}, |
|
{ |
|
"debug/losses": 0.03737642616033554, |
|
"debug/policy_weights": 0.053779907524585724, |
|
"debug/raw_losses": 0.6097862124443054, |
|
"epoch": 0.6526064464783128, |
|
"grad_norm": 1.2305683677129626, |
|
"learning_rate": 1.6224926135406693e-07, |
|
"logits/chosen": -1.8764880895614624, |
|
"logits/rejected": -1.8340880870819092, |
|
"logps/chosen": -320.94805908203125, |
|
"logps/rejected": -350.2289123535156, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7637560367584229, |
|
"rewards/margins": 0.3951038420200348, |
|
"rewards/rejected": -2.158859968185425, |
|
"step": 820 |
|
}, |
|
{ |
|
"debug/losses": 0.031071752309799194, |
|
"debug/policy_weights": 0.05692852661013603, |
|
"debug/raw_losses": 0.5603014826774597, |
|
"epoch": 0.6605650616792678, |
|
"grad_norm": 1.4651619498114408, |
|
"learning_rate": 1.557758094916053e-07, |
|
"logits/chosen": -1.906121015548706, |
|
"logits/rejected": -1.8540939092636108, |
|
"logps/chosen": -327.9610900878906, |
|
"logps/rejected": -378.42193603515625, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7102444171905518, |
|
"rewards/margins": 0.5098624229431152, |
|
"rewards/rejected": -2.220106601715088, |
|
"step": 830 |
|
}, |
|
{ |
|
"debug/losses": 0.03221515566110611, |
|
"debug/policy_weights": 0.05942277982831001, |
|
"debug/raw_losses": 0.5662818551063538, |
|
"epoch": 0.6685236768802229, |
|
"grad_norm": 1.4409855909584606, |
|
"learning_rate": 1.4937518209365108e-07, |
|
"logits/chosen": -1.925093650817871, |
|
"logits/rejected": -1.8490371704101562, |
|
"logps/chosen": -352.86773681640625, |
|
"logps/rejected": -377.3597412109375, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7442684173583984, |
|
"rewards/margins": 0.4963502287864685, |
|
"rewards/rejected": -2.2406187057495117, |
|
"step": 840 |
|
}, |
|
{ |
|
"debug/losses": 0.02671756222844124, |
|
"debug/policy_weights": 0.04855426400899887, |
|
"debug/raw_losses": 0.6005613207817078, |
|
"epoch": 0.6764822920811778, |
|
"grad_norm": 1.4529693634395395, |
|
"learning_rate": 1.4305232610918045e-07, |
|
"logits/chosen": -1.8812288045883179, |
|
"logits/rejected": -1.8472543954849243, |
|
"logps/chosen": -335.77374267578125, |
|
"logps/rejected": -373.2962951660156, |
|
"loss": 0.0302, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8972389698028564, |
|
"rewards/margins": 0.40869230031967163, |
|
"rewards/rejected": -2.3059310913085938, |
|
"step": 850 |
|
}, |
|
{ |
|
"debug/losses": 0.029225418344140053, |
|
"debug/policy_weights": 0.049057330936193466, |
|
"debug/raw_losses": 0.6105281114578247, |
|
"epoch": 0.6844409072821329, |
|
"grad_norm": 1.1419302383364476, |
|
"learning_rate": 1.3681212837880977e-07, |
|
"logits/chosen": -1.8963346481323242, |
|
"logits/rejected": -1.8906829357147217, |
|
"logps/chosen": -313.63372802734375, |
|
"logps/rejected": -370.35540771484375, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7957327365875244, |
|
"rewards/margins": 0.38477063179016113, |
|
"rewards/rejected": -2.1805036067962646, |
|
"step": 860 |
|
}, |
|
{ |
|
"debug/losses": 0.026798686012625694, |
|
"debug/policy_weights": 0.044307220727205276, |
|
"debug/raw_losses": 0.5882238745689392, |
|
"epoch": 0.6923995224830879, |
|
"grad_norm": 1.3414173043772086, |
|
"learning_rate": 1.3065941185782977e-07, |
|
"logits/chosen": -1.8557084798812866, |
|
"logits/rejected": -1.815911054611206, |
|
"logps/chosen": -340.4590759277344, |
|
"logps/rejected": -364.7593078613281, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.882323980331421, |
|
"rewards/margins": 0.4049120843410492, |
|
"rewards/rejected": -2.287236213684082, |
|
"step": 870 |
|
}, |
|
{ |
|
"debug/losses": 0.02769031748175621, |
|
"debug/policy_weights": 0.06145339086651802, |
|
"debug/raw_losses": 0.48969849944114685, |
|
"epoch": 0.700358137684043, |
|
"grad_norm": 1.3672144922117222, |
|
"learning_rate": 1.2459893188861613e-07, |
|
"logits/chosen": -1.9420058727264404, |
|
"logits/rejected": -1.9016504287719727, |
|
"logps/chosen": -313.2261657714844, |
|
"logps/rejected": -395.3963317871094, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.6474010944366455, |
|
"rewards/margins": 0.6819612383842468, |
|
"rewards/rejected": -2.329362154006958, |
|
"step": 880 |
|
}, |
|
{ |
|
"debug/losses": 0.030598634853959084, |
|
"debug/policy_weights": 0.05649831146001816, |
|
"debug/raw_losses": 0.5550761222839355, |
|
"epoch": 0.708316752884998, |
|
"grad_norm": 1.515347005278819, |
|
"learning_rate": 1.1863537252529548e-07, |
|
"logits/chosen": -1.8932174444198608, |
|
"logits/rejected": -1.8344228267669678, |
|
"logps/chosen": -336.3061218261719, |
|
"logps/rejected": -373.91888427734375, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7803919315338135, |
|
"rewards/margins": 0.45331424474716187, |
|
"rewards/rejected": -2.233705997467041, |
|
"step": 890 |
|
}, |
|
{ |
|
"debug/losses": 0.029710734263062477, |
|
"debug/policy_weights": 0.057327818125486374, |
|
"debug/raw_losses": 0.5665749311447144, |
|
"epoch": 0.716275368085953, |
|
"grad_norm": 1.3873792779779055, |
|
"learning_rate": 1.1277334291351145e-07, |
|
"logits/chosen": -1.8338550329208374, |
|
"logits/rejected": -1.802924394607544, |
|
"logps/chosen": -308.0507507324219, |
|
"logps/rejected": -367.1876220703125, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.7070906162261963, |
|
"rewards/margins": 0.5322864055633545, |
|
"rewards/rejected": -2.239377021789551, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.716275368085953, |
|
"eval_debug/losses": 0.03087555803358555, |
|
"eval_debug/policy_weights": 0.05567449331283569, |
|
"eval_debug/raw_losses": 0.5706273913383484, |
|
"eval_logits/chosen": -1.867327094078064, |
|
"eval_logits/rejected": -1.8414264917373657, |
|
"eval_logps/chosen": -317.96832275390625, |
|
"eval_logps/rejected": -375.444091796875, |
|
"eval_loss": 0.0338822603225708, |
|
"eval_rewards/accuracies": 0.6893656849861145, |
|
"eval_rewards/chosen": -1.7372483015060425, |
|
"eval_rewards/margins": 0.5102024078369141, |
|
"eval_rewards/rejected": -2.247450590133667, |
|
"eval_runtime": 152.7672, |
|
"eval_samples_per_second": 55.981, |
|
"eval_steps_per_second": 0.877, |
|
"step": 900 |
|
}, |
|
{ |
|
"debug/losses": 0.03138936683535576, |
|
"debug/policy_weights": 0.06450549513101578, |
|
"debug/raw_losses": 0.5024587512016296, |
|
"epoch": 0.724233983286908, |
|
"grad_norm": 1.472288824809844, |
|
"learning_rate": 1.0701737372808431e-07, |
|
"logits/chosen": -1.8538185358047485, |
|
"logits/rejected": -1.8261350393295288, |
|
"logps/chosen": -295.5444030761719, |
|
"logps/rejected": -376.8030700683594, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5179767608642578, |
|
"rewards/margins": 0.6834074258804321, |
|
"rewards/rejected": -2.2013843059539795, |
|
"step": 910 |
|
}, |
|
{ |
|
"debug/losses": 0.031300343573093414, |
|
"debug/policy_weights": 0.055743057280778885, |
|
"debug/raw_losses": 0.61448734998703, |
|
"epoch": 0.7321925984878631, |
|
"grad_norm": 1.7081189272422854, |
|
"learning_rate": 1.0137191367132078e-07, |
|
"logits/chosen": -1.8605804443359375, |
|
"logits/rejected": -1.8302139043807983, |
|
"logps/chosen": -348.89617919921875, |
|
"logps/rejected": -396.13519287109375, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.9280294179916382, |
|
"rewards/margins": 0.42716842889785767, |
|
"rewards/rejected": -2.3551979064941406, |
|
"step": 920 |
|
}, |
|
{ |
|
"debug/losses": 0.03180256113409996, |
|
"debug/policy_weights": 0.05098626762628555, |
|
"debug/raw_losses": 0.6134781837463379, |
|
"epoch": 0.7401512136888182, |
|
"grad_norm": 1.2410467414958612, |
|
"learning_rate": 9.584132603467827e-08, |
|
"logits/chosen": -1.845842719078064, |
|
"logits/rejected": -1.7887229919433594, |
|
"logps/chosen": -364.4795227050781, |
|
"logps/rejected": -396.46282958984375, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.973096251487732, |
|
"rewards/margins": 0.44869351387023926, |
|
"rewards/rejected": -2.4217896461486816, |
|
"step": 930 |
|
}, |
|
{ |
|
"debug/losses": 0.02910168096423149, |
|
"debug/policy_weights": 0.04697355628013611, |
|
"debug/raw_losses": 0.584063708782196, |
|
"epoch": 0.7481098288897732, |
|
"grad_norm": 1.6349371306622613, |
|
"learning_rate": 9.042988532644249e-08, |
|
"logits/chosen": -1.8313400745391846, |
|
"logits/rejected": -1.8036092519760132, |
|
"logps/chosen": -337.94219970703125, |
|
"logps/rejected": -399.6029357910156, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.945892333984375, |
|
"rewards/margins": 0.5027505159378052, |
|
"rewards/rejected": -2.4486427307128906, |
|
"step": 940 |
|
}, |
|
{ |
|
"debug/losses": 0.027888696640729904, |
|
"debug/policy_weights": 0.046953700482845306, |
|
"debug/raw_losses": 0.5813563466072083, |
|
"epoch": 0.7560684440907283, |
|
"grad_norm": 1.026768048816564, |
|
"learning_rate": 8.514177396802428e-08, |
|
"logits/chosen": -1.8680336475372314, |
|
"logits/rejected": -1.840049386024475, |
|
"logps/chosen": -344.02215576171875, |
|
"logps/rejected": -397.464111328125, |
|
"loss": 0.0271, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0415139198303223, |
|
"rewards/margins": 0.4642157554626465, |
|
"rewards/rejected": -2.505729913711548, |
|
"step": 950 |
|
}, |
|
{ |
|
"debug/losses": 0.026357349008321762, |
|
"debug/policy_weights": 0.048806965351104736, |
|
"debug/raw_losses": 0.5880208015441895, |
|
"epoch": 0.7640270592916832, |
|
"grad_norm": 1.050274133658511, |
|
"learning_rate": 7.998107906142839e-08, |
|
"logits/chosen": -1.8624064922332764, |
|
"logits/rejected": -1.8342987298965454, |
|
"logps/chosen": -333.5138244628906, |
|
"logps/rejected": -370.5243225097656, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.9049221277236938, |
|
"rewards/margins": 0.43430835008621216, |
|
"rewards/rejected": -2.3392302989959717, |
|
"step": 960 |
|
}, |
|
{ |
|
"debug/losses": 0.02456454373896122, |
|
"debug/policy_weights": 0.044480856508016586, |
|
"debug/raw_losses": 0.5699892640113831, |
|
"epoch": 0.7719856744926383, |
|
"grad_norm": 1.5164224739604937, |
|
"learning_rate": 7.495178923039396e-08, |
|
"logits/chosen": -1.8286335468292236, |
|
"logits/rejected": -1.851117491722107, |
|
"logps/chosen": -319.8497009277344, |
|
"logps/rejected": -401.72100830078125, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.911757469177246, |
|
"rewards/margins": 0.5482187867164612, |
|
"rewards/rejected": -2.4599764347076416, |
|
"step": 970 |
|
}, |
|
{ |
|
"debug/losses": 0.026758376508951187, |
|
"debug/policy_weights": 0.047099605202674866, |
|
"debug/raw_losses": 0.5511727333068848, |
|
"epoch": 0.7799442896935933, |
|
"grad_norm": 2.2837305176620237, |
|
"learning_rate": 7.005779153764682e-08, |
|
"logits/chosen": -1.8559293746948242, |
|
"logits/rejected": -1.802053689956665, |
|
"logps/chosen": -325.4739074707031, |
|
"logps/rejected": -375.23028564453125, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.826904535293579, |
|
"rewards/margins": 0.5505177974700928, |
|
"rewards/rejected": -2.37742280960083, |
|
"step": 980 |
|
}, |
|
{ |
|
"debug/losses": 0.030480515211820602, |
|
"debug/policy_weights": 0.05681230500340462, |
|
"debug/raw_losses": 0.5534275770187378, |
|
"epoch": 0.7879029048945484, |
|
"grad_norm": 1.8186545450772766, |
|
"learning_rate": 6.530286848064698e-08, |
|
"logits/chosen": -1.839967966079712, |
|
"logits/rejected": -1.8160829544067383, |
|
"logps/chosen": -325.0166320800781, |
|
"logps/rejected": -384.3879699707031, |
|
"loss": 0.0292, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7186723947525024, |
|
"rewards/margins": 0.5559494495391846, |
|
"rewards/rejected": -2.2746217250823975, |
|
"step": 990 |
|
}, |
|
{ |
|
"debug/losses": 0.026850074529647827, |
|
"debug/policy_weights": 0.05034085363149643, |
|
"debug/raw_losses": 0.5474573373794556, |
|
"epoch": 0.7958615200955034, |
|
"grad_norm": 1.3975900876939507, |
|
"learning_rate": 6.069069506815325e-08, |
|
"logits/chosen": -1.8527164459228516, |
|
"logits/rejected": -1.805755615234375, |
|
"logps/chosen": -324.24761962890625, |
|
"logps/rejected": -378.9568176269531, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7933601140975952, |
|
"rewards/margins": 0.5918117165565491, |
|
"rewards/rejected": -2.385171890258789, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7958615200955034, |
|
"eval_debug/losses": 0.027858305722475052, |
|
"eval_debug/policy_weights": 0.050272345542907715, |
|
"eval_debug/raw_losses": 0.5695627331733704, |
|
"eval_logits/chosen": -1.8391083478927612, |
|
"eval_logits/rejected": -1.8125532865524292, |
|
"eval_logps/chosen": -329.7293701171875, |
|
"eval_logps/rejected": -386.5124816894531, |
|
"eval_loss": 0.030572954565286636, |
|
"eval_rewards/accuracies": 0.6865671873092651, |
|
"eval_rewards/chosen": -1.8548587560653687, |
|
"eval_rewards/margins": 0.5032761096954346, |
|
"eval_rewards/rejected": -2.3581347465515137, |
|
"eval_runtime": 152.9123, |
|
"eval_samples_per_second": 55.927, |
|
"eval_steps_per_second": 0.876, |
|
"step": 1000 |
|
}, |
|
{ |
|
"debug/losses": 0.031778767704963684, |
|
"debug/policy_weights": 0.05145453289151192, |
|
"debug/raw_losses": 0.6123412251472473, |
|
"epoch": 0.8038201352964585, |
|
"grad_norm": 1.2716238320400024, |
|
"learning_rate": 5.6224835979863714e-08, |
|
"logits/chosen": -1.8510977029800415, |
|
"logits/rejected": -1.8080089092254639, |
|
"logps/chosen": -336.0456237792969, |
|
"logps/rejected": -369.1909484863281, |
|
"loss": 0.0295, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8573215007781982, |
|
"rewards/margins": 0.4095400273799896, |
|
"rewards/rejected": -2.2668616771698, |
|
"step": 1010 |
|
}, |
|
{ |
|
"debug/losses": 0.023703234270215034, |
|
"debug/policy_weights": 0.046647168695926666, |
|
"debug/raw_losses": 0.5592247247695923, |
|
"epoch": 0.8117787504974134, |
|
"grad_norm": 1.946635889826808, |
|
"learning_rate": 5.190874281132851e-08, |
|
"logits/chosen": -1.8227264881134033, |
|
"logits/rejected": -1.8052421808242798, |
|
"logps/chosen": -316.83843994140625, |
|
"logps/rejected": -374.4837951660156, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.771314263343811, |
|
"rewards/margins": 0.5482533574104309, |
|
"rewards/rejected": -2.319567918777466, |
|
"step": 1020 |
|
}, |
|
{ |
|
"debug/losses": 0.02848326787352562, |
|
"debug/policy_weights": 0.05005430057644844, |
|
"debug/raw_losses": 0.5512452721595764, |
|
"epoch": 0.8197373656983685, |
|
"grad_norm": 1.5331091718163976, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -1.8013317584991455, |
|
"logits/rejected": -1.7473710775375366, |
|
"logps/chosen": -313.5793151855469, |
|
"logps/rejected": -363.24713134765625, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7351596355438232, |
|
"rewards/margins": 0.5731841921806335, |
|
"rewards/rejected": -2.3083438873291016, |
|
"step": 1030 |
|
}, |
|
{ |
|
"debug/losses": 0.033693552017211914, |
|
"debug/policy_weights": 0.05701867491006851, |
|
"debug/raw_losses": 0.6127216815948486, |
|
"epoch": 0.8276959808993235, |
|
"grad_norm": 1.8494763538425973, |
|
"learning_rate": 4.373907927832513e-08, |
|
"logits/chosen": -1.835228681564331, |
|
"logits/rejected": -1.8134419918060303, |
|
"logps/chosen": -295.58941650390625, |
|
"logps/rejected": -349.1560363769531, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6788638830184937, |
|
"rewards/margins": 0.423389732837677, |
|
"rewards/rejected": -2.1022536754608154, |
|
"step": 1040 |
|
}, |
|
{ |
|
"debug/losses": 0.03326871246099472, |
|
"debug/policy_weights": 0.06307505071163177, |
|
"debug/raw_losses": 0.5278674364089966, |
|
"epoch": 0.8356545961002786, |
|
"grad_norm": 1.6774079365802337, |
|
"learning_rate": 3.9891823124345665e-08, |
|
"logits/chosen": -1.874322533607483, |
|
"logits/rejected": -1.834237813949585, |
|
"logps/chosen": -303.7854919433594, |
|
"logps/rejected": -363.41619873046875, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5276892185211182, |
|
"rewards/margins": 0.6272332072257996, |
|
"rewards/rejected": -2.1549224853515625, |
|
"step": 1050 |
|
}, |
|
{ |
|
"debug/losses": 0.03040960431098938, |
|
"debug/policy_weights": 0.057738013565540314, |
|
"debug/raw_losses": 0.5673826932907104, |
|
"epoch": 0.8436132113012336, |
|
"grad_norm": 1.2896520617276703, |
|
"learning_rate": 3.620695643093924e-08, |
|
"logits/chosen": -1.873903512954712, |
|
"logits/rejected": -1.8679378032684326, |
|
"logps/chosen": -292.12353515625, |
|
"logps/rejected": -355.63787841796875, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5533664226531982, |
|
"rewards/margins": 0.4991677701473236, |
|
"rewards/rejected": -2.0525341033935547, |
|
"step": 1060 |
|
}, |
|
{ |
|
"debug/losses": 0.03549133986234665, |
|
"debug/policy_weights": 0.058590926229953766, |
|
"debug/raw_losses": 0.5505832433700562, |
|
"epoch": 0.8515718265021887, |
|
"grad_norm": 1.6406733477612518, |
|
"learning_rate": 3.268732717634032e-08, |
|
"logits/chosen": -1.8545564413070679, |
|
"logits/rejected": -1.8187520503997803, |
|
"logps/chosen": -293.1727600097656, |
|
"logps/rejected": -343.296142578125, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5677841901779175, |
|
"rewards/margins": 0.5242434740066528, |
|
"rewards/rejected": -2.0920276641845703, |
|
"step": 1070 |
|
}, |
|
{ |
|
"debug/losses": 0.03740120679140091, |
|
"debug/policy_weights": 0.06145843118429184, |
|
"debug/raw_losses": 0.6025527715682983, |
|
"epoch": 0.8595304417031436, |
|
"grad_norm": 1.603935612215664, |
|
"learning_rate": 2.9335655629243645e-08, |
|
"logits/chosen": -1.8837159872055054, |
|
"logits/rejected": -1.8695827722549438, |
|
"logps/chosen": -321.89208984375, |
|
"logps/rejected": -373.8929443359375, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.7150942087173462, |
|
"rewards/margins": 0.40743288397789, |
|
"rewards/rejected": -2.1225271224975586, |
|
"step": 1080 |
|
}, |
|
{ |
|
"debug/losses": 0.026631182059645653, |
|
"debug/policy_weights": 0.04555311053991318, |
|
"debug/raw_losses": 0.5978758335113525, |
|
"epoch": 0.8674890569040987, |
|
"grad_norm": 1.3056333450149697, |
|
"learning_rate": 2.6154532246349476e-08, |
|
"logits/chosen": -1.8537206649780273, |
|
"logits/rejected": -1.791595220565796, |
|
"logps/chosen": -319.4361572265625, |
|
"logps/rejected": -342.8656311035156, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.778660535812378, |
|
"rewards/margins": 0.41385021805763245, |
|
"rewards/rejected": -2.1925110816955566, |
|
"step": 1090 |
|
}, |
|
{ |
|
"debug/losses": 0.03684794157743454, |
|
"debug/policy_weights": 0.061137206852436066, |
|
"debug/raw_losses": 0.6424818634986877, |
|
"epoch": 0.8754476721050537, |
|
"grad_norm": 1.8597954368327896, |
|
"learning_rate": 2.31464156702382e-08, |
|
"logits/chosen": -1.856898546218872, |
|
"logits/rejected": -1.8056995868682861, |
|
"logps/chosen": -335.59161376953125, |
|
"logps/rejected": -364.3418884277344, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.7900125980377197, |
|
"rewards/margins": 0.3661695420742035, |
|
"rewards/rejected": -2.156182050704956, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8754476721050537, |
|
"eval_debug/losses": 0.029432397335767746, |
|
"eval_debug/policy_weights": 0.052661605179309845, |
|
"eval_debug/raw_losses": 0.5672317147254944, |
|
"eval_logits/chosen": -1.8722801208496094, |
|
"eval_logits/rejected": -1.8470919132232666, |
|
"eval_logps/chosen": -319.0127258300781, |
|
"eval_logps/rejected": -375.38702392578125, |
|
"eval_loss": 0.031895771622657776, |
|
"eval_rewards/accuracies": 0.6921641826629639, |
|
"eval_rewards/chosen": -1.7476924657821655, |
|
"eval_rewards/margins": 0.4991871416568756, |
|
"eval_rewards/rejected": -2.2468795776367188, |
|
"eval_runtime": 152.9853, |
|
"eval_samples_per_second": 55.901, |
|
"eval_steps_per_second": 0.876, |
|
"step": 1100 |
|
}, |
|
{ |
|
"debug/losses": 0.027387287467718124, |
|
"debug/policy_weights": 0.05117439478635788, |
|
"debug/raw_losses": 0.5718464255332947, |
|
"epoch": 0.8834062873060088, |
|
"grad_norm": 1.236953286612134, |
|
"learning_rate": 2.031363082912252e-08, |
|
"logits/chosen": -1.8504539728164673, |
|
"logits/rejected": -1.8379875421524048, |
|
"logps/chosen": -304.44122314453125, |
|
"logps/rejected": -359.46954345703125, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.7290661334991455, |
|
"rewards/margins": 0.4590110182762146, |
|
"rewards/rejected": -2.188077211380005, |
|
"step": 1110 |
|
}, |
|
{ |
|
"debug/losses": 0.029294824227690697, |
|
"debug/policy_weights": 0.05012016370892525, |
|
"debug/raw_losses": 0.5968630313873291, |
|
"epoch": 0.8913649025069638, |
|
"grad_norm": 1.5652814842138667, |
|
"learning_rate": 1.7658367139945228e-08, |
|
"logits/chosen": -1.8701921701431274, |
|
"logits/rejected": -1.8391071557998657, |
|
"logps/chosen": -316.2760009765625, |
|
"logps/rejected": -361.0913391113281, |
|
"loss": 0.0295, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7196756601333618, |
|
"rewards/margins": 0.4586181044578552, |
|
"rewards/rejected": -2.1782937049865723, |
|
"step": 1120 |
|
}, |
|
{ |
|
"debug/losses": 0.029790222644805908, |
|
"debug/policy_weights": 0.05559501796960831, |
|
"debug/raw_losses": 0.5522772073745728, |
|
"epoch": 0.8993235177079189, |
|
"grad_norm": 1.4128987302177969, |
|
"learning_rate": 1.5182676816211632e-08, |
|
"logits/chosen": -1.8724491596221924, |
|
"logits/rejected": -1.8452503681182861, |
|
"logps/chosen": -319.95953369140625, |
|
"logps/rejected": -386.1808166503906, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7754627466201782, |
|
"rewards/margins": 0.524121880531311, |
|
"rewards/rejected": -2.2995846271514893, |
|
"step": 1130 |
|
}, |
|
{ |
|
"debug/losses": 0.04000743851065636, |
|
"debug/policy_weights": 0.06320012360811234, |
|
"debug/raw_losses": 0.6184248328208923, |
|
"epoch": 0.9072821329088738, |
|
"grad_norm": 1.3375871919682243, |
|
"learning_rate": 1.2888473281864597e-08, |
|
"logits/chosen": -1.8942562341690063, |
|
"logits/rejected": -1.8619248867034912, |
|
"logps/chosen": -329.1838684082031, |
|
"logps/rejected": -361.75543212890625, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.7136873006820679, |
|
"rewards/margins": 0.3745079040527344, |
|
"rewards/rejected": -2.0881950855255127, |
|
"step": 1140 |
|
}, |
|
{ |
|
"debug/losses": 0.0304880253970623, |
|
"debug/policy_weights": 0.05398009344935417, |
|
"debug/raw_losses": 0.5579678416252136, |
|
"epoch": 0.9152407481098289, |
|
"grad_norm": 1.164265065566766, |
|
"learning_rate": 1.0777529692427679e-08, |
|
"logits/chosen": -1.8475040197372437, |
|
"logits/rejected": -1.8063409328460693, |
|
"logps/chosen": -312.86041259765625, |
|
"logps/rejected": -353.0240173339844, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6208263635635376, |
|
"rewards/margins": 0.5101144909858704, |
|
"rewards/rejected": -2.1309409141540527, |
|
"step": 1150 |
|
}, |
|
{ |
|
"debug/losses": 0.03380966559052467, |
|
"debug/policy_weights": 0.054825879633426666, |
|
"debug/raw_losses": 0.5997228622436523, |
|
"epoch": 0.9231993633107839, |
|
"grad_norm": 1.3666307813144103, |
|
"learning_rate": 8.851477564560061e-09, |
|
"logits/chosen": -1.831713318824768, |
|
"logits/rejected": -1.7982580661773682, |
|
"logps/chosen": -308.39483642578125, |
|
"logps/rejected": -368.4901428222656, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.6622002124786377, |
|
"rewards/margins": 0.4915587306022644, |
|
"rewards/rejected": -2.153759002685547, |
|
"step": 1160 |
|
}, |
|
{ |
|
"debug/losses": 0.030011435970664024, |
|
"debug/policy_weights": 0.05814291164278984, |
|
"debug/raw_losses": 0.5802719593048096, |
|
"epoch": 0.931157978511739, |
|
"grad_norm": 1.436341263317072, |
|
"learning_rate": 7.111805515081531e-09, |
|
"logits/chosen": -1.8729360103607178, |
|
"logits/rejected": -1.8116334676742554, |
|
"logps/chosen": -339.6989440917969, |
|
"logps/rejected": -385.967041015625, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.8521687984466553, |
|
"rewards/margins": 0.4960354268550873, |
|
"rewards/rejected": -2.3482041358947754, |
|
"step": 1170 |
|
}, |
|
{ |
|
"debug/losses": 0.03459464758634567, |
|
"debug/policy_weights": 0.06046764925122261, |
|
"debug/raw_losses": 0.5656360387802124, |
|
"epoch": 0.939116593712694, |
|
"grad_norm": 1.3207132160045205, |
|
"learning_rate": 5.559858110443016e-09, |
|
"logits/chosen": -1.8872379064559937, |
|
"logits/rejected": -1.8523366451263428, |
|
"logps/chosen": -322.80029296875, |
|
"logps/rejected": -378.6051025390625, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6689224243164062, |
|
"rewards/margins": 0.5369892716407776, |
|
"rewards/rejected": -2.205911636352539, |
|
"step": 1180 |
|
}, |
|
{ |
|
"debug/losses": 0.023186931386590004, |
|
"debug/policy_weights": 0.0461835041642189, |
|
"debug/raw_losses": 0.5589593052864075, |
|
"epoch": 0.947075208913649, |
|
"grad_norm": 1.3641453732624562, |
|
"learning_rate": 4.196834827531276e-09, |
|
"logits/chosen": -1.8458702564239502, |
|
"logits/rejected": -1.8175357580184937, |
|
"logps/chosen": -324.30230712890625, |
|
"logps/rejected": -383.04608154296875, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.8120216131210327, |
|
"rewards/margins": 0.5915289521217346, |
|
"rewards/rejected": -2.403550624847412, |
|
"step": 1190 |
|
}, |
|
{ |
|
"debug/losses": 0.024858497083187103, |
|
"debug/policy_weights": 0.04600748419761658, |
|
"debug/raw_losses": 0.5523272752761841, |
|
"epoch": 0.955033824114604, |
|
"grad_norm": 1.4362856575591538, |
|
"learning_rate": 3.023789126611137e-09, |
|
"logits/chosen": -1.8598756790161133, |
|
"logits/rejected": -1.8078199625015259, |
|
"logps/chosen": -317.81170654296875, |
|
"logps/rejected": -364.5005798339844, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.704816222190857, |
|
"rewards/margins": 0.5389941930770874, |
|
"rewards/rejected": -2.2438104152679443, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.955033824114604, |
|
"eval_debug/losses": 0.029611436650156975, |
|
"eval_debug/policy_weights": 0.05299457162618637, |
|
"eval_debug/raw_losses": 0.5668274760246277, |
|
"eval_logits/chosen": -1.8642234802246094, |
|
"eval_logits/rejected": -1.8383311033248901, |
|
"eval_logps/chosen": -319.97027587890625, |
|
"eval_logps/rejected": -377.42315673828125, |
|
"eval_loss": 0.03207932412624359, |
|
"eval_rewards/accuracies": 0.6958954930305481, |
|
"eval_rewards/chosen": -1.757267713546753, |
|
"eval_rewards/margins": 0.5099742412567139, |
|
"eval_rewards/rejected": -2.2672417163848877, |
|
"eval_runtime": 152.7819, |
|
"eval_samples_per_second": 55.975, |
|
"eval_steps_per_second": 0.877, |
|
"step": 1200 |
|
}, |
|
{ |
|
"debug/losses": 0.02852838858962059, |
|
"debug/policy_weights": 0.051258690655231476, |
|
"debug/raw_losses": 0.5769435167312622, |
|
"epoch": 0.9629924393155591, |
|
"grad_norm": 1.3807763771834016, |
|
"learning_rate": 2.041627637121929e-09, |
|
"logits/chosen": -1.8483145236968994, |
|
"logits/rejected": -1.8244221210479736, |
|
"logps/chosen": -316.3843994140625, |
|
"logps/rejected": -385.6697692871094, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.7493064403533936, |
|
"rewards/margins": 0.5202833414077759, |
|
"rewards/rejected": -2.269589900970459, |
|
"step": 1210 |
|
}, |
|
{ |
|
"debug/losses": 0.029417548328638077, |
|
"debug/policy_weights": 0.051362644881010056, |
|
"debug/raw_losses": 0.5895043611526489, |
|
"epoch": 0.9709510545165141, |
|
"grad_norm": 1.6916257941886172, |
|
"learning_rate": 1.2511094569571668e-09, |
|
"logits/chosen": -1.8148953914642334, |
|
"logits/rejected": -1.7570714950561523, |
|
"logps/chosen": -321.575927734375, |
|
"logps/rejected": -342.35205078125, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7315263748168945, |
|
"rewards/margins": 0.4397401809692383, |
|
"rewards/rejected": -2.171266794204712, |
|
"step": 1220 |
|
}, |
|
{ |
|
"debug/losses": 0.028057556599378586, |
|
"debug/policy_weights": 0.047226615250110626, |
|
"debug/raw_losses": 0.5787237882614136, |
|
"epoch": 0.9789096697174692, |
|
"grad_norm": 1.2061064281394842, |
|
"learning_rate": 6.528455657691112e-10, |
|
"logits/chosen": -1.817239761352539, |
|
"logits/rejected": -1.8122575283050537, |
|
"logps/chosen": -320.651611328125, |
|
"logps/rejected": -379.8753967285156, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.8191404342651367, |
|
"rewards/margins": 0.4797174036502838, |
|
"rewards/rejected": -2.2988579273223877, |
|
"step": 1230 |
|
}, |
|
{ |
|
"debug/losses": 0.03001987561583519, |
|
"debug/policy_weights": 0.052023641765117645, |
|
"debug/raw_losses": 0.5478265881538391, |
|
"epoch": 0.9868682849184242, |
|
"grad_norm": 1.4769747929733708, |
|
"learning_rate": 2.4729835275189016e-10, |
|
"logits/chosen": -1.8267762660980225, |
|
"logits/rejected": -1.7973419427871704, |
|
"logps/chosen": -319.90771484375, |
|
"logps/rejected": -387.49957275390625, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7920163869857788, |
|
"rewards/margins": 0.612352728843689, |
|
"rewards/rejected": -2.4043688774108887, |
|
"step": 1240 |
|
}, |
|
{ |
|
"debug/losses": 0.027796531096100807, |
|
"debug/policy_weights": 0.051054131239652634, |
|
"debug/raw_losses": 0.5395588278770447, |
|
"epoch": 0.9948269001193792, |
|
"grad_norm": 1.7339598026750096, |
|
"learning_rate": 3.478125926756337e-11, |
|
"logits/chosen": -1.826674222946167, |
|
"logits/rejected": -1.8084548711776733, |
|
"logps/chosen": -319.61077880859375, |
|
"logps/rejected": -389.81427001953125, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7789347171783447, |
|
"rewards/margins": 0.5856661200523376, |
|
"rewards/rejected": -2.364600896835327, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9996020692399522, |
|
"step": 1256, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05414291525817221, |
|
"train_runtime": 10529.1077, |
|
"train_samples_per_second": 15.272, |
|
"train_steps_per_second": 0.119 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1256, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|