{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 478, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -2.6023898124694824, "logits/rejected": -2.49088191986084, "logps/chosen": -330.5306396484375, "logps/rejected": -275.0410461425781, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.624011516571045, "logits/rejected": -2.59273624420166, "logps/chosen": -247.91769409179688, "logps/rejected": -215.07041931152344, "loss": 0.6932, "rewards/accuracies": 0.3541666567325592, "rewards/chosen": -0.00047609664034098387, "rewards/margins": -0.0011458636727184057, "rewards/rejected": 0.0006697670323774219, "step": 10 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.614908218383789, "logits/rejected": -2.573396682739258, "logps/chosen": -273.2959289550781, "logps/rejected": -251.2639617919922, "loss": 0.6925, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.0009359431569464505, "rewards/margins": 0.002007069531828165, "rewards/rejected": -0.0010711264330893755, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.6856637001037598, "logits/rejected": -2.6220130920410156, "logps/chosen": -284.86114501953125, "logps/rejected": -277.53057861328125, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": 0.00352325732819736, "rewards/margins": 0.007650823798030615, "rewards/rejected": -0.0041275653056800365, "step": 30 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.579878807067871, "logits/rejected": -2.5135815143585205, "logps/chosen": -292.1109619140625, "logps/rejected": -274.44683837890625, "loss": 0.6756, "rewards/accuracies": 0.65625, "rewards/chosen": 0.02378256432712078, "rewards/margins": 0.03553395718336105, "rewards/rejected": -0.011751385405659676, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.999733114418725e-07, "logits/chosen": -2.5302300453186035, "logits/rejected": -2.4865477085113525, "logps/chosen": -315.3640441894531, "logps/rejected": -310.5618591308594, "loss": 0.6601, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.014850592240691185, "rewards/margins": 0.06933780014514923, "rewards/rejected": -0.08418838679790497, "step": 50 }, { "epoch": 0.13, "learning_rate": 4.990398100856366e-07, "logits/chosen": -2.461594820022583, "logits/rejected": -2.393406867980957, "logps/chosen": -264.4418640136719, "logps/rejected": -252.02163696289062, "loss": 0.6391, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.06258662045001984, "rewards/margins": 0.1386002004146576, "rewards/rejected": -0.20118682086467743, "step": 60 }, { "epoch": 0.15, "learning_rate": 4.967775735898179e-07, "logits/chosen": -2.5176403522491455, "logits/rejected": -2.444599151611328, "logps/chosen": -308.10845947265625, "logps/rejected": -298.1520690917969, "loss": 0.6219, "rewards/accuracies": 0.65625, "rewards/chosen": -0.26666340231895447, "rewards/margins": 0.21313416957855225, "rewards/rejected": -0.4797976016998291, "step": 70 }, { "epoch": 0.17, "learning_rate": 4.931986719649298e-07, "logits/chosen": -2.4516353607177734, "logits/rejected": -2.4085216522216797, "logps/chosen": -298.8356018066406, "logps/rejected": -325.5304260253906, "loss": 0.611, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2911642789840698, "rewards/margins": 0.20117318630218506, "rewards/rejected": -0.49233752489089966, "step": 80 }, { "epoch": 0.19, "learning_rate": 4.883222001996351e-07, "logits/chosen": -2.426361560821533, "logits/rejected": -2.3368563652038574, "logps/chosen": -293.616943359375, "logps/rejected": -308.7396545410156, "loss": 0.5867, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.2579975724220276, "rewards/margins": 0.30983540415763855, "rewards/rejected": -0.5678330063819885, "step": 90 }, { "epoch": 0.21, "learning_rate": 4.821741763807186e-07, "logits/chosen": -2.488579034805298, "logits/rejected": -2.3800113201141357, "logps/chosen": -328.0105285644531, "logps/rejected": -337.8644104003906, "loss": 0.5723, "rewards/accuracies": 0.65625, "rewards/chosen": -0.4366111755371094, "rewards/margins": 0.3044855296611786, "rewards/rejected": -0.7410967350006104, "step": 100 }, { "epoch": 0.21, "eval_logits/chosen": -2.4070217609405518, "eval_logits/rejected": -2.3494362831115723, "eval_logps/chosen": -304.3812255859375, "eval_logps/rejected": -350.8694763183594, "eval_loss": 0.5851432681083679, "eval_rewards/accuracies": 0.703125, "eval_rewards/chosen": -0.4096587896347046, "eval_rewards/margins": 0.46554654836654663, "eval_rewards/rejected": -0.8752052783966064, "eval_runtime": 91.1907, "eval_samples_per_second": 21.932, "eval_steps_per_second": 0.351, "step": 100 }, { "epoch": 0.23, "learning_rate": 4.747874028753375e-07, "logits/chosen": -2.0290980339050293, "logits/rejected": -1.8976500034332275, "logps/chosen": -374.5489807128906, "logps/rejected": -375.1778869628906, "loss": 0.5723, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5513430833816528, "rewards/margins": 0.49042654037475586, "rewards/rejected": -1.0417697429656982, "step": 110 }, { "epoch": 0.25, "learning_rate": 4.662012913161997e-07, "logits/chosen": -0.8261772990226746, "logits/rejected": -0.4543725550174713, "logps/chosen": -370.54437255859375, "logps/rejected": -376.8744201660156, "loss": 0.546, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.893993079662323, "rewards/margins": 0.5693421363830566, "rewards/rejected": -1.4633351564407349, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.5646165232345103e-07, "logits/chosen": -0.5733903050422668, "logits/rejected": -0.41144052147865295, "logps/chosen": -331.88458251953125, "logps/rejected": -418.39404296875, "loss": 0.5492, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6849642395973206, "rewards/margins": 0.5858219265937805, "rewards/rejected": -1.2707862854003906, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.456204510851956e-07, "logits/chosen": -0.7106949687004089, "logits/rejected": -0.2236645519733429, "logps/chosen": -367.40484619140625, "logps/rejected": -390.296142578125, "loss": 0.5335, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7277344465255737, "rewards/margins": 0.6220408082008362, "rewards/rejected": -1.3497753143310547, "step": 140 }, { "epoch": 0.31, "learning_rate": 4.337355301007335e-07, "logits/chosen": -0.2654598355293274, "logits/rejected": 0.43950486183166504, "logps/chosen": -385.2984924316406, "logps/rejected": -397.6144714355469, "loss": 0.5356, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9714946746826172, "rewards/margins": 0.61899733543396, "rewards/rejected": -1.5904920101165771, "step": 150 }, { "epoch": 0.33, "learning_rate": 4.2087030056579986e-07, "logits/chosen": 0.1484789103269577, "logits/rejected": 0.8263363838195801, "logps/chosen": -369.7867736816406, "logps/rejected": -436.39373779296875, "loss": 0.5065, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8267679214477539, "rewards/margins": 0.8252193331718445, "rewards/rejected": -1.6519873142242432, "step": 160 }, { "epoch": 0.36, "learning_rate": 4.070934040463998e-07, "logits/chosen": 0.2387746274471283, "logits/rejected": 0.7541650533676147, "logps/chosen": -330.07525634765625, "logps/rejected": -366.41204833984375, "loss": 0.5659, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.8212235569953918, "rewards/margins": 0.529572606086731, "rewards/rejected": -1.3507962226867676, "step": 170 }, { "epoch": 0.38, "learning_rate": 3.9247834624635404e-07, "logits/chosen": 0.45646604895591736, "logits/rejected": 0.8084599375724792, "logps/chosen": -366.8728942871094, "logps/rejected": -432.2496032714844, "loss": 0.5249, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6927820444107056, "rewards/margins": 0.8015207052230835, "rewards/rejected": -1.4943029880523682, "step": 180 }, { "epoch": 0.4, "learning_rate": 3.7710310482256523e-07, "logits/chosen": 1.0517617464065552, "logits/rejected": 1.6709725856781006, "logps/chosen": -378.12396240234375, "logps/rejected": -458.1866149902344, "loss": 0.5056, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9326898455619812, "rewards/margins": 0.9154269099235535, "rewards/rejected": -1.8481168746948242, "step": 190 }, { "epoch": 0.42, "learning_rate": 3.610497133404795e-07, "logits/chosen": 0.9935806393623352, "logits/rejected": 1.650398850440979, "logps/chosen": -391.5450744628906, "logps/rejected": -418.3558654785156, "loss": 0.5084, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.0861790180206299, "rewards/margins": 0.634604275226593, "rewards/rejected": -1.7207832336425781, "step": 200 }, { "epoch": 0.42, "eval_logits/chosen": 0.9247687458992004, "eval_logits/rejected": 1.3918358087539673, "eval_logps/chosen": -354.5789794921875, "eval_logps/rejected": -438.0662536621094, "eval_loss": 0.5251370072364807, "eval_rewards/accuracies": 0.7421875, "eval_rewards/chosen": -0.9116362929344177, "eval_rewards/margins": 0.8355368375778198, "eval_rewards/rejected": -1.7471731901168823, "eval_runtime": 91.7577, "eval_samples_per_second": 21.797, "eval_steps_per_second": 0.349, "step": 200 }, { "epoch": 0.44, "learning_rate": 3.4440382358952115e-07, "logits/chosen": 1.0475047826766968, "logits/rejected": 1.849473237991333, "logps/chosen": -367.184814453125, "logps/rejected": -398.2117614746094, "loss": 0.5251, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.8909347653388977, "rewards/margins": 0.6959229707717896, "rewards/rejected": -1.586857557296753, "step": 210 }, { "epoch": 0.46, "learning_rate": 3.272542485937368e-07, "logits/chosen": 1.6884968280792236, "logits/rejected": 2.2008445262908936, "logps/chosen": -353.2514343261719, "logps/rejected": -404.71221923828125, "loss": 0.5269, "rewards/accuracies": 0.75, "rewards/chosen": -0.7567670345306396, "rewards/margins": 0.8415945768356323, "rewards/rejected": -1.5983617305755615, "step": 220 }, { "epoch": 0.48, "learning_rate": 3.096924887558854e-07, "logits/chosen": 1.460933804512024, "logits/rejected": 1.9314343929290771, "logps/chosen": -351.2489318847656, "logps/rejected": -453.9790954589844, "loss": 0.519, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7599745988845825, "rewards/margins": 0.8532025218009949, "rewards/rejected": -1.6131770610809326, "step": 230 }, { "epoch": 0.5, "learning_rate": 2.9181224366319943e-07, "logits/chosen": 1.796936273574829, "logits/rejected": 2.389878988265991, "logps/chosen": -351.67498779296875, "logps/rejected": -421.3821716308594, "loss": 0.5261, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9927783012390137, "rewards/margins": 0.786289632320404, "rewards/rejected": -1.7790677547454834, "step": 240 }, { "epoch": 0.52, "learning_rate": 2.7370891215954565e-07, "logits/chosen": 1.5744327306747437, "logits/rejected": 2.3407230377197266, "logps/chosen": -358.4691467285156, "logps/rejected": -418.01031494140625, "loss": 0.5134, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.903947651386261, "rewards/margins": 0.6940609216690063, "rewards/rejected": -1.5980085134506226, "step": 250 }, { "epoch": 0.54, "learning_rate": 2.55479083351317e-07, "logits/chosen": 1.873732566833496, "logits/rejected": 2.9474740028381348, "logps/chosen": -371.85552978515625, "logps/rejected": -420.95904541015625, "loss": 0.4922, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.9079627990722656, "rewards/margins": 0.8738547563552856, "rewards/rejected": -1.7818174362182617, "step": 260 }, { "epoch": 0.56, "learning_rate": 2.3722002126275822e-07, "logits/chosen": 2.415181875228882, "logits/rejected": 3.162013530731201, "logps/chosen": -388.0815734863281, "logps/rejected": -478.11785888671875, "loss": 0.498, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.128756046295166, "rewards/margins": 1.0180633068084717, "rewards/rejected": -2.146819591522217, "step": 270 }, { "epoch": 0.59, "learning_rate": 2.19029145890313e-07, "logits/chosen": 1.9844467639923096, "logits/rejected": 2.9561781883239746, "logps/chosen": -369.2903747558594, "logps/rejected": -419.6259765625, "loss": 0.5207, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9253425598144531, "rewards/margins": 0.8587535619735718, "rewards/rejected": -1.784096121788025, "step": 280 }, { "epoch": 0.61, "learning_rate": 2.0100351342479216e-07, "logits/chosen": 1.8705106973648071, "logits/rejected": 2.6589739322662354, "logps/chosen": -380.0862731933594, "logps/rejected": -439.79168701171875, "loss": 0.515, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9231119155883789, "rewards/margins": 0.735679030418396, "rewards/rejected": -1.6587913036346436, "step": 290 }, { "epoch": 0.63, "learning_rate": 1.8323929841460178e-07, "logits/chosen": 1.3944432735443115, "logits/rejected": 2.3618969917297363, "logps/chosen": -389.6896057128906, "logps/rejected": -470.2090759277344, "loss": 0.5059, "rewards/accuracies": 0.71875, "rewards/chosen": -0.8919968605041504, "rewards/margins": 0.6746976971626282, "rewards/rejected": -1.5666944980621338, "step": 300 }, { "epoch": 0.63, "eval_logits/chosen": 1.2558308839797974, "eval_logits/rejected": 2.033073902130127, "eval_logps/chosen": -349.8758239746094, "eval_logps/rejected": -438.77349853515625, "eval_loss": 0.5130496621131897, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.8646047711372375, "eval_rewards/margins": 0.8896409273147583, "eval_rewards/rejected": -1.7542455196380615, "eval_runtime": 92.0798, "eval_samples_per_second": 21.72, "eval_steps_per_second": 0.348, "step": 300 }, { "epoch": 0.65, "learning_rate": 1.6583128063291573e-07, "logits/chosen": 1.2974698543548584, "logits/rejected": 2.6388087272644043, "logps/chosen": -382.4002990722656, "logps/rejected": -406.01153564453125, "loss": 0.4978, "rewards/accuracies": 0.75, "rewards/chosen": -1.0794718265533447, "rewards/margins": 0.7805131673812866, "rewards/rejected": -1.8599849939346313, "step": 310 }, { "epoch": 0.67, "learning_rate": 1.488723393865766e-07, "logits/chosen": 1.9306262731552124, "logits/rejected": 2.9958901405334473, "logps/chosen": -357.4389953613281, "logps/rejected": -452.7220764160156, "loss": 0.5064, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0856704711914062, "rewards/margins": 1.057279109954834, "rewards/rejected": -2.1429495811462402, "step": 320 }, { "epoch": 0.69, "learning_rate": 1.3245295796480788e-07, "logits/chosen": 1.4244121313095093, "logits/rejected": 2.2654335498809814, "logps/chosen": -404.91082763671875, "logps/rejected": -450.8277893066406, "loss": 0.5096, "rewards/accuracies": 0.75, "rewards/chosen": -1.1859899759292603, "rewards/margins": 0.7777279019355774, "rewards/rejected": -1.9637176990509033, "step": 330 }, { "epoch": 0.71, "learning_rate": 1.1666074087171627e-07, "logits/chosen": 1.5507278442382812, "logits/rejected": 2.3268961906433105, "logps/chosen": -363.16473388671875, "logps/rejected": -420.6800231933594, "loss": 0.5173, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0014616250991821, "rewards/margins": 0.7089160680770874, "rewards/rejected": -1.7103776931762695, "step": 340 }, { "epoch": 0.73, "learning_rate": 1.0157994641835734e-07, "logits/chosen": 1.5202906131744385, "logits/rejected": 2.6713767051696777, "logps/chosen": -359.4294128417969, "logps/rejected": -433.394287109375, "loss": 0.4787, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0805784463882446, "rewards/margins": 0.9193571209907532, "rewards/rejected": -1.999935507774353, "step": 350 }, { "epoch": 0.75, "learning_rate": 8.729103716819111e-08, "logits/chosen": 1.5974103212356567, "logits/rejected": 3.016284942626953, "logps/chosen": -435.1712951660156, "logps/rejected": -469.9830017089844, "loss": 0.4902, "rewards/accuracies": 0.75, "rewards/chosen": -1.1632494926452637, "rewards/margins": 0.9136824607849121, "rewards/rejected": -2.0769317150115967, "step": 360 }, { "epoch": 0.77, "learning_rate": 7.387025063449081e-08, "logits/chosen": 1.7092777490615845, "logits/rejected": 2.965677261352539, "logps/chosen": -423.5621643066406, "logps/rejected": -466.57196044921875, "loss": 0.5002, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.1220273971557617, "rewards/margins": 0.9678171277046204, "rewards/rejected": -2.0898444652557373, "step": 370 }, { "epoch": 0.79, "learning_rate": 6.138919252022435e-08, "logits/chosen": 1.7860336303710938, "logits/rejected": 2.569241523742676, "logps/chosen": -395.4902648925781, "logps/rejected": -483.0901794433594, "loss": 0.4772, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1515331268310547, "rewards/margins": 0.8898499608039856, "rewards/rejected": -2.0413832664489746, "step": 380 }, { "epoch": 0.82, "learning_rate": 4.991445467064689e-08, "logits/chosen": 2.0826852321624756, "logits/rejected": 2.8060660362243652, "logps/chosen": -398.78375244140625, "logps/rejected": -471.2264099121094, "loss": 0.5066, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2567694187164307, "rewards/margins": 0.7216086983680725, "rewards/rejected": -1.9783780574798584, "step": 390 }, { "epoch": 0.84, "learning_rate": 3.9507259776993954e-08, "logits/chosen": 1.802354097366333, "logits/rejected": 2.5923492908477783, "logps/chosen": -446.500244140625, "logps/rejected": -510.20269775390625, "loss": 0.4853, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.2238214015960693, "rewards/margins": 0.9289990663528442, "rewards/rejected": -2.152820587158203, "step": 400 }, { "epoch": 0.84, "eval_logits/chosen": 1.8194458484649658, "eval_logits/rejected": 2.592175245285034, "eval_logps/chosen": -372.7066650390625, "eval_logps/rejected": -474.1963195800781, "eval_loss": 0.5050143003463745, "eval_rewards/accuracies": 0.75390625, "eval_rewards/chosen": -1.0929131507873535, "eval_rewards/margins": 1.0155609846115112, "eval_rewards/rejected": -2.108474016189575, "eval_runtime": 90.5801, "eval_samples_per_second": 22.08, "eval_steps_per_second": 0.353, "step": 400 }, { "epoch": 0.86, "learning_rate": 3.022313472693447e-08, "logits/chosen": 2.2372403144836426, "logits/rejected": 3.196664333343506, "logps/chosen": -370.81719970703125, "logps/rejected": -452.06549072265625, "loss": 0.5086, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.1016533374786377, "rewards/margins": 0.9261430501937866, "rewards/rejected": -2.0277962684631348, "step": 410 }, { "epoch": 0.88, "learning_rate": 2.2111614344599684e-08, "logits/chosen": 1.831080675125122, "logits/rejected": 2.4410791397094727, "logps/chosen": -385.7922058105469, "logps/rejected": -492.590576171875, "loss": 0.5061, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.084149956703186, "rewards/margins": 0.9615718722343445, "rewards/rejected": -2.0457215309143066, "step": 420 }, { "epoch": 0.9, "learning_rate": 1.521597710086439e-08, "logits/chosen": 1.4260971546173096, "logits/rejected": 2.3162856101989746, "logps/chosen": -407.1165466308594, "logps/rejected": -454.90374755859375, "loss": 0.5059, "rewards/accuracies": 0.75, "rewards/chosen": -1.0966671705245972, "rewards/margins": 0.9018322229385376, "rewards/rejected": -1.9984995126724243, "step": 430 }, { "epoch": 0.92, "learning_rate": 9.57301420397924e-09, "logits/chosen": 1.783463716506958, "logits/rejected": 2.5885117053985596, "logps/chosen": -373.5993347167969, "logps/rejected": -458.12091064453125, "loss": 0.487, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.1429402828216553, "rewards/margins": 0.8700854182243347, "rewards/rejected": -2.0130257606506348, "step": 440 }, { "epoch": 0.94, "learning_rate": 5.212833302556258e-09, "logits/chosen": 1.8070141077041626, "logits/rejected": 2.747885227203369, "logps/chosen": -355.58221435546875, "logps/rejected": -426.42584228515625, "loss": 0.5082, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.0844265222549438, "rewards/margins": 0.8474240303039551, "rewards/rejected": -1.9318506717681885, "step": 450 }, { "epoch": 0.96, "learning_rate": 2.158697848236607e-09, "logits/chosen": 2.0278899669647217, "logits/rejected": 3.022653818130493, "logps/chosen": -362.0993347167969, "logps/rejected": -428.6521911621094, "loss": 0.4861, "rewards/accuracies": 0.75, "rewards/chosen": -1.1751190423965454, "rewards/margins": 0.813240647315979, "rewards/rejected": -1.9883596897125244, "step": 460 }, { "epoch": 0.98, "learning_rate": 4.269029751107489e-10, "logits/chosen": 1.3355131149291992, "logits/rejected": 2.729475736618042, "logps/chosen": -406.28033447265625, "logps/rejected": -480.8604431152344, "loss": 0.4807, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0706168413162231, "rewards/margins": 1.0933626890182495, "rewards/rejected": -2.1639795303344727, "step": 470 }, { "epoch": 1.0, "step": 478, "total_flos": 0.0, "train_loss": 0.5379065808890754, "train_runtime": 5396.8094, "train_samples_per_second": 11.328, "train_steps_per_second": 0.089 } ], "logging_steps": 10, "max_steps": 478, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }