|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.6023898124694824, |
|
"logits/rejected": -2.49088191986084, |
|
"logps/chosen": -330.5306396484375, |
|
"logps/rejected": -275.0410461425781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.624011516571045, |
|
"logits/rejected": -2.59273624420166, |
|
"logps/chosen": -247.91769409179688, |
|
"logps/rejected": -215.07041931152344, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3541666567325592, |
|
"rewards/chosen": -0.00047609664034098387, |
|
"rewards/margins": -0.0011458636727184057, |
|
"rewards/rejected": 0.0006697670323774219, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.614908218383789, |
|
"logits/rejected": -2.573396682739258, |
|
"logps/chosen": -273.2959289550781, |
|
"logps/rejected": -251.2639617919922, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0009359431569464505, |
|
"rewards/margins": 0.002007069531828165, |
|
"rewards/rejected": -0.0010711264330893755, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.6856637001037598, |
|
"logits/rejected": -2.6220130920410156, |
|
"logps/chosen": -284.86114501953125, |
|
"logps/rejected": -277.53057861328125, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.00352325732819736, |
|
"rewards/margins": 0.007650823798030615, |
|
"rewards/rejected": -0.0041275653056800365, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.579878807067871, |
|
"logits/rejected": -2.5135815143585205, |
|
"logps/chosen": -292.1109619140625, |
|
"logps/rejected": -274.44683837890625, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.02378256432712078, |
|
"rewards/margins": 0.03553395718336105, |
|
"rewards/rejected": -0.011751385405659676, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.5302300453186035, |
|
"logits/rejected": -2.4865477085113525, |
|
"logps/chosen": -315.3640441894531, |
|
"logps/rejected": -310.5618591308594, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.014850592240691185, |
|
"rewards/margins": 0.06933780014514923, |
|
"rewards/rejected": -0.08418838679790497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.461594820022583, |
|
"logits/rejected": -2.393406867980957, |
|
"logps/chosen": -264.4418640136719, |
|
"logps/rejected": -252.02163696289062, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.06258662045001984, |
|
"rewards/margins": 0.1386002004146576, |
|
"rewards/rejected": -0.20118682086467743, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.5176403522491455, |
|
"logits/rejected": -2.444599151611328, |
|
"logps/chosen": -308.10845947265625, |
|
"logps/rejected": -298.1520690917969, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.26666340231895447, |
|
"rewards/margins": 0.21313416957855225, |
|
"rewards/rejected": -0.4797976016998291, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.4516353607177734, |
|
"logits/rejected": -2.4085216522216797, |
|
"logps/chosen": -298.8356018066406, |
|
"logps/rejected": -325.5304260253906, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2911642789840698, |
|
"rewards/margins": 0.20117318630218506, |
|
"rewards/rejected": -0.49233752489089966, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.426361560821533, |
|
"logits/rejected": -2.3368563652038574, |
|
"logps/chosen": -293.616943359375, |
|
"logps/rejected": -308.7396545410156, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.2579975724220276, |
|
"rewards/margins": 0.30983540415763855, |
|
"rewards/rejected": -0.5678330063819885, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.488579034805298, |
|
"logits/rejected": -2.3800113201141357, |
|
"logps/chosen": -328.0105285644531, |
|
"logps/rejected": -337.8644104003906, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4366111755371094, |
|
"rewards/margins": 0.3044855296611786, |
|
"rewards/rejected": -0.7410967350006104, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.4070217609405518, |
|
"eval_logits/rejected": -2.3494362831115723, |
|
"eval_logps/chosen": -304.3812255859375, |
|
"eval_logps/rejected": -350.8694763183594, |
|
"eval_loss": 0.5851432681083679, |
|
"eval_rewards/accuracies": 0.703125, |
|
"eval_rewards/chosen": -0.4096587896347046, |
|
"eval_rewards/margins": 0.46554654836654663, |
|
"eval_rewards/rejected": -0.8752052783966064, |
|
"eval_runtime": 91.1907, |
|
"eval_samples_per_second": 21.932, |
|
"eval_steps_per_second": 0.351, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.0290980339050293, |
|
"logits/rejected": -1.8976500034332275, |
|
"logps/chosen": -374.5489807128906, |
|
"logps/rejected": -375.1778869628906, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5513430833816528, |
|
"rewards/margins": 0.49042654037475586, |
|
"rewards/rejected": -1.0417697429656982, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -0.8261772990226746, |
|
"logits/rejected": -0.4543725550174713, |
|
"logps/chosen": -370.54437255859375, |
|
"logps/rejected": -376.8744201660156, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.893993079662323, |
|
"rewards/margins": 0.5693421363830566, |
|
"rewards/rejected": -1.4633351564407349, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -0.5733903050422668, |
|
"logits/rejected": -0.41144052147865295, |
|
"logps/chosen": -331.88458251953125, |
|
"logps/rejected": -418.39404296875, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6849642395973206, |
|
"rewards/margins": 0.5858219265937805, |
|
"rewards/rejected": -1.2707862854003906, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -0.7106949687004089, |
|
"logits/rejected": -0.2236645519733429, |
|
"logps/chosen": -367.40484619140625, |
|
"logps/rejected": -390.296142578125, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7277344465255737, |
|
"rewards/margins": 0.6220408082008362, |
|
"rewards/rejected": -1.3497753143310547, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -0.2654598355293274, |
|
"logits/rejected": 0.43950486183166504, |
|
"logps/chosen": -385.2984924316406, |
|
"logps/rejected": -397.6144714355469, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9714946746826172, |
|
"rewards/margins": 0.61899733543396, |
|
"rewards/rejected": -1.5904920101165771, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": 0.1484789103269577, |
|
"logits/rejected": 0.8263363838195801, |
|
"logps/chosen": -369.7867736816406, |
|
"logps/rejected": -436.39373779296875, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8267679214477539, |
|
"rewards/margins": 0.8252193331718445, |
|
"rewards/rejected": -1.6519873142242432, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": 0.2387746274471283, |
|
"logits/rejected": 0.7541650533676147, |
|
"logps/chosen": -330.07525634765625, |
|
"logps/rejected": -366.41204833984375, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8212235569953918, |
|
"rewards/margins": 0.529572606086731, |
|
"rewards/rejected": -1.3507962226867676, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": 0.45646604895591736, |
|
"logits/rejected": 0.8084599375724792, |
|
"logps/chosen": -366.8728942871094, |
|
"logps/rejected": -432.2496032714844, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6927820444107056, |
|
"rewards/margins": 0.8015207052230835, |
|
"rewards/rejected": -1.4943029880523682, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": 1.0517617464065552, |
|
"logits/rejected": 1.6709725856781006, |
|
"logps/chosen": -378.12396240234375, |
|
"logps/rejected": -458.1866149902344, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9326898455619812, |
|
"rewards/margins": 0.9154269099235535, |
|
"rewards/rejected": -1.8481168746948242, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": 0.9935806393623352, |
|
"logits/rejected": 1.650398850440979, |
|
"logps/chosen": -391.5450744628906, |
|
"logps/rejected": -418.3558654785156, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0861790180206299, |
|
"rewards/margins": 0.634604275226593, |
|
"rewards/rejected": -1.7207832336425781, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.9247687458992004, |
|
"eval_logits/rejected": 1.3918358087539673, |
|
"eval_logps/chosen": -354.5789794921875, |
|
"eval_logps/rejected": -438.0662536621094, |
|
"eval_loss": 0.5251370072364807, |
|
"eval_rewards/accuracies": 0.7421875, |
|
"eval_rewards/chosen": -0.9116362929344177, |
|
"eval_rewards/margins": 0.8355368375778198, |
|
"eval_rewards/rejected": -1.7471731901168823, |
|
"eval_runtime": 91.7577, |
|
"eval_samples_per_second": 21.797, |
|
"eval_steps_per_second": 0.349, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": 1.0475047826766968, |
|
"logits/rejected": 1.849473237991333, |
|
"logps/chosen": -367.184814453125, |
|
"logps/rejected": -398.2117614746094, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8909347653388977, |
|
"rewards/margins": 0.6959229707717896, |
|
"rewards/rejected": -1.586857557296753, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": 1.6884968280792236, |
|
"logits/rejected": 2.2008445262908936, |
|
"logps/chosen": -353.2514343261719, |
|
"logps/rejected": -404.71221923828125, |
|
"loss": 0.5269, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7567670345306396, |
|
"rewards/margins": 0.8415945768356323, |
|
"rewards/rejected": -1.5983617305755615, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": 1.460933804512024, |
|
"logits/rejected": 1.9314343929290771, |
|
"logps/chosen": -351.2489318847656, |
|
"logps/rejected": -453.9790954589844, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7599745988845825, |
|
"rewards/margins": 0.8532025218009949, |
|
"rewards/rejected": -1.6131770610809326, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": 1.796936273574829, |
|
"logits/rejected": 2.389878988265991, |
|
"logps/chosen": -351.67498779296875, |
|
"logps/rejected": -421.3821716308594, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9927783012390137, |
|
"rewards/margins": 0.786289632320404, |
|
"rewards/rejected": -1.7790677547454834, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": 1.5744327306747437, |
|
"logits/rejected": 2.3407230377197266, |
|
"logps/chosen": -358.4691467285156, |
|
"logps/rejected": -418.01031494140625, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.903947651386261, |
|
"rewards/margins": 0.6940609216690063, |
|
"rewards/rejected": -1.5980085134506226, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": 1.873732566833496, |
|
"logits/rejected": 2.9474740028381348, |
|
"logps/chosen": -371.85552978515625, |
|
"logps/rejected": -420.95904541015625, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.9079627990722656, |
|
"rewards/margins": 0.8738547563552856, |
|
"rewards/rejected": -1.7818174362182617, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": 2.415181875228882, |
|
"logits/rejected": 3.162013530731201, |
|
"logps/chosen": -388.0815734863281, |
|
"logps/rejected": -478.11785888671875, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.128756046295166, |
|
"rewards/margins": 1.0180633068084717, |
|
"rewards/rejected": -2.146819591522217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": 1.9844467639923096, |
|
"logits/rejected": 2.9561781883239746, |
|
"logps/chosen": -369.2903747558594, |
|
"logps/rejected": -419.6259765625, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9253425598144531, |
|
"rewards/margins": 0.8587535619735718, |
|
"rewards/rejected": -1.784096121788025, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": 1.8705106973648071, |
|
"logits/rejected": 2.6589739322662354, |
|
"logps/chosen": -380.0862731933594, |
|
"logps/rejected": -439.79168701171875, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9231119155883789, |
|
"rewards/margins": 0.735679030418396, |
|
"rewards/rejected": -1.6587913036346436, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": 1.3944432735443115, |
|
"logits/rejected": 2.3618969917297363, |
|
"logps/chosen": -389.6896057128906, |
|
"logps/rejected": -470.2090759277344, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8919968605041504, |
|
"rewards/margins": 0.6746976971626282, |
|
"rewards/rejected": -1.5666944980621338, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 1.2558308839797974, |
|
"eval_logits/rejected": 2.033073902130127, |
|
"eval_logps/chosen": -349.8758239746094, |
|
"eval_logps/rejected": -438.77349853515625, |
|
"eval_loss": 0.5130496621131897, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.8646047711372375, |
|
"eval_rewards/margins": 0.8896409273147583, |
|
"eval_rewards/rejected": -1.7542455196380615, |
|
"eval_runtime": 92.0798, |
|
"eval_samples_per_second": 21.72, |
|
"eval_steps_per_second": 0.348, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": 1.2974698543548584, |
|
"logits/rejected": 2.6388087272644043, |
|
"logps/chosen": -382.4002990722656, |
|
"logps/rejected": -406.01153564453125, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0794718265533447, |
|
"rewards/margins": 0.7805131673812866, |
|
"rewards/rejected": -1.8599849939346313, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": 1.9306262731552124, |
|
"logits/rejected": 2.9958901405334473, |
|
"logps/chosen": -357.4389953613281, |
|
"logps/rejected": -452.7220764160156, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0856704711914062, |
|
"rewards/margins": 1.057279109954834, |
|
"rewards/rejected": -2.1429495811462402, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": 1.4244121313095093, |
|
"logits/rejected": 2.2654335498809814, |
|
"logps/chosen": -404.91082763671875, |
|
"logps/rejected": -450.8277893066406, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1859899759292603, |
|
"rewards/margins": 0.7777279019355774, |
|
"rewards/rejected": -1.9637176990509033, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": 1.5507278442382812, |
|
"logits/rejected": 2.3268961906433105, |
|
"logps/chosen": -363.16473388671875, |
|
"logps/rejected": -420.6800231933594, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0014616250991821, |
|
"rewards/margins": 0.7089160680770874, |
|
"rewards/rejected": -1.7103776931762695, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": 1.5202906131744385, |
|
"logits/rejected": 2.6713767051696777, |
|
"logps/chosen": -359.4294128417969, |
|
"logps/rejected": -433.394287109375, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.0805784463882446, |
|
"rewards/margins": 0.9193571209907532, |
|
"rewards/rejected": -1.999935507774353, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": 1.5974103212356567, |
|
"logits/rejected": 3.016284942626953, |
|
"logps/chosen": -435.1712951660156, |
|
"logps/rejected": -469.9830017089844, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1632494926452637, |
|
"rewards/margins": 0.9136824607849121, |
|
"rewards/rejected": -2.0769317150115967, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": 1.7092777490615845, |
|
"logits/rejected": 2.965677261352539, |
|
"logps/chosen": -423.5621643066406, |
|
"logps/rejected": -466.57196044921875, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.1220273971557617, |
|
"rewards/margins": 0.9678171277046204, |
|
"rewards/rejected": -2.0898444652557373, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": 1.7860336303710938, |
|
"logits/rejected": 2.569241523742676, |
|
"logps/chosen": -395.4902648925781, |
|
"logps/rejected": -483.0901794433594, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1515331268310547, |
|
"rewards/margins": 0.8898499608039856, |
|
"rewards/rejected": -2.0413832664489746, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": 2.0826852321624756, |
|
"logits/rejected": 2.8060660362243652, |
|
"logps/chosen": -398.78375244140625, |
|
"logps/rejected": -471.2264099121094, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2567694187164307, |
|
"rewards/margins": 0.7216086983680725, |
|
"rewards/rejected": -1.9783780574798584, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": 1.802354097366333, |
|
"logits/rejected": 2.5923492908477783, |
|
"logps/chosen": -446.500244140625, |
|
"logps/rejected": -510.20269775390625, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2238214015960693, |
|
"rewards/margins": 0.9289990663528442, |
|
"rewards/rejected": -2.152820587158203, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 1.8194458484649658, |
|
"eval_logits/rejected": 2.592175245285034, |
|
"eval_logps/chosen": -372.7066650390625, |
|
"eval_logps/rejected": -474.1963195800781, |
|
"eval_loss": 0.5050143003463745, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -1.0929131507873535, |
|
"eval_rewards/margins": 1.0155609846115112, |
|
"eval_rewards/rejected": -2.108474016189575, |
|
"eval_runtime": 90.5801, |
|
"eval_samples_per_second": 22.08, |
|
"eval_steps_per_second": 0.353, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": 2.2372403144836426, |
|
"logits/rejected": 3.196664333343506, |
|
"logps/chosen": -370.81719970703125, |
|
"logps/rejected": -452.06549072265625, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.1016533374786377, |
|
"rewards/margins": 0.9261430501937866, |
|
"rewards/rejected": -2.0277962684631348, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": 1.831080675125122, |
|
"logits/rejected": 2.4410791397094727, |
|
"logps/chosen": -385.7922058105469, |
|
"logps/rejected": -492.590576171875, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.084149956703186, |
|
"rewards/margins": 0.9615718722343445, |
|
"rewards/rejected": -2.0457215309143066, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": 1.4260971546173096, |
|
"logits/rejected": 2.3162856101989746, |
|
"logps/chosen": -407.1165466308594, |
|
"logps/rejected": -454.90374755859375, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0966671705245972, |
|
"rewards/margins": 0.9018322229385376, |
|
"rewards/rejected": -1.9984995126724243, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": 1.783463716506958, |
|
"logits/rejected": 2.5885117053985596, |
|
"logps/chosen": -373.5993347167969, |
|
"logps/rejected": -458.12091064453125, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1429402828216553, |
|
"rewards/margins": 0.8700854182243347, |
|
"rewards/rejected": -2.0130257606506348, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": 1.8070141077041626, |
|
"logits/rejected": 2.747885227203369, |
|
"logps/chosen": -355.58221435546875, |
|
"logps/rejected": -426.42584228515625, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0844265222549438, |
|
"rewards/margins": 0.8474240303039551, |
|
"rewards/rejected": -1.9318506717681885, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": 2.0278899669647217, |
|
"logits/rejected": 3.022653818130493, |
|
"logps/chosen": -362.0993347167969, |
|
"logps/rejected": -428.6521911621094, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1751190423965454, |
|
"rewards/margins": 0.813240647315979, |
|
"rewards/rejected": -1.9883596897125244, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": 1.3355131149291992, |
|
"logits/rejected": 2.729475736618042, |
|
"logps/chosen": -406.28033447265625, |
|
"logps/rejected": -480.8604431152344, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0706168413162231, |
|
"rewards/margins": 1.0933626890182495, |
|
"rewards/rejected": -2.1639795303344727, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5379065808890754, |
|
"train_runtime": 5396.8094, |
|
"train_samples_per_second": 11.328, |
|
"train_steps_per_second": 0.089 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|