Minbyul's picture
Model save
9a23173 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9982238010657194,
"eval_steps": 100,
"global_step": 281,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 17.4636091361885,
"learning_rate": 1.7241379310344825e-08,
"logits/chosen": -1.180719256401062,
"logits/rejected": -0.36153754591941833,
"logps/chosen": -227.51303100585938,
"logps/rejected": -271.6326599121094,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.04,
"grad_norm": 15.871649969398254,
"learning_rate": 1.7241379310344828e-07,
"logits/chosen": -1.6364216804504395,
"logits/rejected": -0.7718086242675781,
"logps/chosen": -188.4293670654297,
"logps/rejected": -273.3389587402344,
"loss": 0.693,
"rewards/accuracies": 0.4930555522441864,
"rewards/chosen": 0.00015968189109116793,
"rewards/margins": 0.0003999065957032144,
"rewards/rejected": -0.00024022474826779217,
"step": 10
},
{
"epoch": 0.07,
"grad_norm": 15.802939122761929,
"learning_rate": 3.4482758620689656e-07,
"logits/chosen": -0.9365239143371582,
"logits/rejected": -0.7781739234924316,
"logps/chosen": -264.11224365234375,
"logps/rejected": -268.46124267578125,
"loss": 0.691,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.0019152642926201224,
"rewards/margins": 0.003832753049209714,
"rewards/rejected": -0.0019174888730049133,
"step": 20
},
{
"epoch": 0.11,
"grad_norm": 16.050018865575062,
"learning_rate": 4.999805731202437e-07,
"logits/chosen": -1.2832940816879272,
"logits/rejected": -0.9560123682022095,
"logps/chosen": -223.49111938476562,
"logps/rejected": -384.7261657714844,
"loss": 0.6818,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 0.009979133494198322,
"rewards/margins": 0.02327062375843525,
"rewards/rejected": -0.013291488401591778,
"step": 30
},
{
"epoch": 0.14,
"grad_norm": 16.173498483987025,
"learning_rate": 4.976529986032632e-07,
"logits/chosen": -1.3671165704727173,
"logits/rejected": -1.1606197357177734,
"logps/chosen": -226.5165557861328,
"logps/rejected": -420.215576171875,
"loss": 0.6557,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.014291724190115929,
"rewards/margins": 0.0719335526227951,
"rewards/rejected": -0.08622527867555618,
"step": 40
},
{
"epoch": 0.18,
"grad_norm": 18.73697306009316,
"learning_rate": 4.91481456572267e-07,
"logits/chosen": -1.0613763332366943,
"logits/rejected": -0.8452272415161133,
"logps/chosen": -228.3408966064453,
"logps/rejected": -345.06158447265625,
"loss": 0.5945,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -0.04819232225418091,
"rewards/margins": 0.2763238251209259,
"rewards/rejected": -0.3245161473751068,
"step": 50
},
{
"epoch": 0.21,
"grad_norm": 36.9064937358061,
"learning_rate": 4.815617391525771e-07,
"logits/chosen": -1.2162449359893799,
"logits/rejected": -0.5397701263427734,
"logps/chosen": -247.07247924804688,
"logps/rejected": -348.9381408691406,
"loss": 0.525,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.17551271617412567,
"rewards/margins": 0.47526684403419495,
"rewards/rejected": -0.6507795453071594,
"step": 60
},
{
"epoch": 0.25,
"grad_norm": 21.124245684409534,
"learning_rate": 4.680478160991513e-07,
"logits/chosen": -1.0888421535491943,
"logits/rejected": -0.7150756120681763,
"logps/chosen": -268.48040771484375,
"logps/rejected": -540.1458129882812,
"loss": 0.4559,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.25963330268859863,
"rewards/margins": 2.0888125896453857,
"rewards/rejected": -2.3484463691711426,
"step": 70
},
{
"epoch": 0.28,
"grad_norm": 19.8677514066161,
"learning_rate": 4.511494449416671e-07,
"logits/chosen": -1.0766880512237549,
"logits/rejected": -0.03768174722790718,
"logps/chosen": -283.8450622558594,
"logps/rejected": -504.466552734375,
"loss": 0.417,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.32856112718582153,
"rewards/margins": 1.6246074438095093,
"rewards/rejected": -1.953168511390686,
"step": 80
},
{
"epoch": 0.32,
"grad_norm": 20.978509774304264,
"learning_rate": 4.3112891521481815e-07,
"logits/chosen": -0.8445385098457336,
"logits/rejected": -0.18864837288856506,
"logps/chosen": -287.674560546875,
"logps/rejected": -594.6647338867188,
"loss": 0.3829,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.3922039568424225,
"rewards/margins": 2.068591833114624,
"rewards/rejected": -2.4607958793640137,
"step": 90
},
{
"epoch": 0.36,
"grad_norm": 24.770830802497013,
"learning_rate": 4.0829697730853494e-07,
"logits/chosen": -0.6940481662750244,
"logits/rejected": 0.17405104637145996,
"logps/chosen": -258.2374572753906,
"logps/rejected": -551.6319580078125,
"loss": 0.3643,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.2993431091308594,
"rewards/margins": 2.340083360671997,
"rewards/rejected": -2.6394264698028564,
"step": 100
},
{
"epoch": 0.36,
"eval_logits/chosen": -1.748563289642334,
"eval_logits/rejected": 0.045369070023298264,
"eval_logps/chosen": -284.61224365234375,
"eval_logps/rejected": -259.5780334472656,
"eval_loss": 0.5978485941886902,
"eval_rewards/accuracies": 0.7593283653259277,
"eval_rewards/chosen": -0.37915343046188354,
"eval_rewards/margins": 0.3338019549846649,
"eval_rewards/rejected": -0.7129553556442261,
"eval_runtime": 219.8632,
"eval_samples_per_second": 9.742,
"eval_steps_per_second": 0.305,
"step": 100
},
{
"epoch": 0.39,
"grad_norm": 26.461913652670617,
"learning_rate": 3.8300801912883414e-07,
"logits/chosen": -0.9723485112190247,
"logits/rejected": 0.37491127848625183,
"logps/chosen": -249.15823364257812,
"logps/rejected": -532.2054443359375,
"loss": 0.3442,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.35393720865249634,
"rewards/margins": 2.0555851459503174,
"rewards/rejected": -2.40952205657959,
"step": 110
},
{
"epoch": 0.43,
"grad_norm": 26.7014694652173,
"learning_rate": 3.5565456543517485e-07,
"logits/chosen": -0.09540247917175293,
"logits/rejected": 0.513046383857727,
"logps/chosen": -261.6147155761719,
"logps/rejected": -637.6137084960938,
"loss": 0.306,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.2667621970176697,
"rewards/margins": 3.23994779586792,
"rewards/rejected": -3.5067100524902344,
"step": 120
},
{
"epoch": 0.46,
"grad_norm": 68.5297134578811,
"learning_rate": 3.266611852333336e-07,
"logits/chosen": -0.16894316673278809,
"logits/rejected": 0.41675764322280884,
"logps/chosen": -267.2029724121094,
"logps/rejected": -793.5310668945312,
"loss": 0.3386,
"rewards/accuracies": 0.90625,
"rewards/chosen": -0.3927966356277466,
"rewards/margins": 4.087599754333496,
"rewards/rejected": -4.480396270751953,
"step": 130
},
{
"epoch": 0.5,
"grad_norm": 24.995403128308034,
"learning_rate": 2.964779017907287e-07,
"logits/chosen": -0.3116651177406311,
"logits/rejected": 0.7034340500831604,
"logps/chosen": -248.13546752929688,
"logps/rejected": -628.6119384765625,
"loss": 0.2752,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.2110290229320526,
"rewards/margins": 3.0275955200195312,
"rewards/rejected": -3.2386245727539062,
"step": 140
},
{
"epoch": 0.53,
"grad_norm": 21.04952742243863,
"learning_rate": 2.6557320756121306e-07,
"logits/chosen": -0.4465761184692383,
"logits/rejected": 0.5097697973251343,
"logps/chosen": -253.65927124023438,
"logps/rejected": -561.6199951171875,
"loss": 0.2543,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.2355656623840332,
"rewards/margins": 2.4431042671203613,
"rewards/rejected": -2.6786696910858154,
"step": 150
},
{
"epoch": 0.57,
"grad_norm": 36.34356588459772,
"learning_rate": 2.3442679243878697e-07,
"logits/chosen": -0.053225208073854446,
"logits/rejected": 0.7030835747718811,
"logps/chosen": -290.45831298828125,
"logps/rejected": -679.306640625,
"loss": 0.254,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.2719694674015045,
"rewards/margins": 3.4167423248291016,
"rewards/rejected": -3.6887118816375732,
"step": 160
},
{
"epoch": 0.6,
"grad_norm": 19.597521304398125,
"learning_rate": 2.0352209820927135e-07,
"logits/chosen": -0.07039432227611542,
"logits/rejected": 1.1628978252410889,
"logps/chosen": -268.78289794921875,
"logps/rejected": -704.7100830078125,
"loss": 0.2347,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.25404995679855347,
"rewards/margins": 3.9025752544403076,
"rewards/rejected": -4.156625747680664,
"step": 170
},
{
"epoch": 0.64,
"grad_norm": 33.193096952517436,
"learning_rate": 1.7333881476666646e-07,
"logits/chosen": -0.859085738658905,
"logits/rejected": 0.6180144548416138,
"logps/chosen": -228.8545379638672,
"logps/rejected": -779.0535888671875,
"loss": 0.2314,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.3073621392250061,
"rewards/margins": 4.101855278015137,
"rewards/rejected": -4.409217834472656,
"step": 180
},
{
"epoch": 0.67,
"grad_norm": 23.061813637320956,
"learning_rate": 1.4434543456482518e-07,
"logits/chosen": -0.6410714387893677,
"logits/rejected": 0.8705042004585266,
"logps/chosen": -246.0118408203125,
"logps/rejected": -682.171142578125,
"loss": 0.2376,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.35869041085243225,
"rewards/margins": 3.1796650886535645,
"rewards/rejected": -3.538356065750122,
"step": 190
},
{
"epoch": 0.71,
"grad_norm": 26.215462134933507,
"learning_rate": 1.1699198087116588e-07,
"logits/chosen": -0.2712032198905945,
"logits/rejected": 0.8643509149551392,
"logps/chosen": -240.9992218017578,
"logps/rejected": -750.1979370117188,
"loss": 0.2041,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.3355063796043396,
"rewards/margins": 4.0295023918151855,
"rewards/rejected": -4.365009307861328,
"step": 200
},
{
"epoch": 0.71,
"eval_logits/chosen": -1.433498740196228,
"eval_logits/rejected": 0.8359671235084534,
"eval_logps/chosen": -334.36553955078125,
"eval_logps/rejected": -351.18157958984375,
"eval_loss": 0.5962842702865601,
"eval_rewards/accuracies": 0.7108209133148193,
"eval_rewards/chosen": -0.8766867518424988,
"eval_rewards/margins": 0.7523040771484375,
"eval_rewards/rejected": -1.628990888595581,
"eval_runtime": 219.6386,
"eval_samples_per_second": 9.752,
"eval_steps_per_second": 0.305,
"step": 200
},
{
"epoch": 0.75,
"grad_norm": 20.46821959291358,
"learning_rate": 9.170302269146507e-08,
"logits/chosen": -0.14280591905117035,
"logits/rejected": 0.9915373921394348,
"logps/chosen": -273.5321960449219,
"logps/rejected": -798.1834716796875,
"loss": 0.1891,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.2832937240600586,
"rewards/margins": 4.233273506164551,
"rewards/rejected": -4.516566753387451,
"step": 210
},
{
"epoch": 0.78,
"grad_norm": 25.672453750455183,
"learning_rate": 6.887108478518183e-08,
"logits/chosen": -0.6617056131362915,
"logits/rejected": 1.1209099292755127,
"logps/chosen": -266.068359375,
"logps/rejected": -728.5145263671875,
"loss": 0.2108,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.27059105038642883,
"rewards/margins": 4.039263725280762,
"rewards/rejected": -4.309854984283447,
"step": 220
},
{
"epoch": 0.82,
"grad_norm": 36.27184670290087,
"learning_rate": 4.885055505833291e-08,
"logits/chosen": -0.10255080461502075,
"logits/rejected": 0.778251051902771,
"logps/chosen": -260.8437194824219,
"logps/rejected": -754.1353149414062,
"loss": 0.2016,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.3063124120235443,
"rewards/margins": 3.9367871284484863,
"rewards/rejected": -4.243099689483643,
"step": 230
},
{
"epoch": 0.85,
"grad_norm": 31.207116090159346,
"learning_rate": 3.1952183900848665e-08,
"logits/chosen": -0.4079841673374176,
"logits/rejected": 0.5633789896965027,
"logps/chosen": -238.26962280273438,
"logps/rejected": -706.0747680664062,
"loss": 0.2235,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.2822808027267456,
"rewards/margins": 3.6717209815979004,
"rewards/rejected": -3.9540016651153564,
"step": 240
},
{
"epoch": 0.89,
"grad_norm": 24.232889457842543,
"learning_rate": 1.8438260847422837e-08,
"logits/chosen": -0.11115667968988419,
"logits/rejected": 1.0082677602767944,
"logps/chosen": -271.56109619140625,
"logps/rejected": -707.6219482421875,
"loss": 0.2049,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.2982483506202698,
"rewards/margins": 3.9781322479248047,
"rewards/rejected": -4.2763800621032715,
"step": 250
},
{
"epoch": 0.92,
"grad_norm": 24.71147868673447,
"learning_rate": 8.518543427732949e-09,
"logits/chosen": -0.2919185161590576,
"logits/rejected": 0.8239177465438843,
"logps/chosen": -285.7502136230469,
"logps/rejected": -721.8952026367188,
"loss": 0.2005,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.3373405635356903,
"rewards/margins": 4.360747337341309,
"rewards/rejected": -4.6980881690979,
"step": 260
},
{
"epoch": 0.96,
"grad_norm": 32.225622448127446,
"learning_rate": 2.3470013967367976e-09,
"logits/chosen": 0.019734883680939674,
"logits/rejected": 0.908920168876648,
"logps/chosen": -254.5631866455078,
"logps/rejected": -786.376708984375,
"loss": 0.1909,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.3218054175376892,
"rewards/margins": 4.211289405822754,
"rewards/rejected": -4.533095359802246,
"step": 270
},
{
"epoch": 0.99,
"grad_norm": 19.533422797206217,
"learning_rate": 1.9426879756284655e-11,
"logits/chosen": -0.17949172854423523,
"logits/rejected": 0.9797495007514954,
"logps/chosen": -278.2919921875,
"logps/rejected": -752.1105346679688,
"loss": 0.173,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.28086012601852417,
"rewards/margins": 4.271262168884277,
"rewards/rejected": -4.552122116088867,
"step": 280
},
{
"epoch": 1.0,
"step": 281,
"total_flos": 0.0,
"train_loss": 0.34737620712175066,
"train_runtime": 4632.3521,
"train_samples_per_second": 3.885,
"train_steps_per_second": 0.061
}
],
"logging_steps": 10,
"max_steps": 281,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}