FouadAI's picture
Training in progress, step 13500, checkpoint
817ff5c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 900,
"global_step": 13500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00022222222222222223,
"grad_norm": 22.625,
"learning_rate": 2.222222222222222e-09,
"logits/chosen": -3.4569907188415527,
"logits/rejected": -3.426312208175659,
"logps/chosen": -237.60638427734375,
"logps/rejected": -149.21551513671875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.2,
"grad_norm": 10.625,
"learning_rate": 9.655172413793103e-07,
"logits/chosen": -3.2259411811828613,
"logits/rejected": -3.2373013496398926,
"logps/chosen": -157.755859375,
"logps/rejected": -135.1826171875,
"loss": 0.7093,
"rewards/accuracies": 0.5077864527702332,
"rewards/chosen": -0.0009299311204813421,
"rewards/margins": 0.0015665229875594378,
"rewards/rejected": -0.0024964541662484407,
"step": 900
},
{
"epoch": 0.2,
"eval_logits/chosen": -3.013110399246216,
"eval_logits/rejected": -3.0272324085235596,
"eval_logps/chosen": -157.49517822265625,
"eval_logps/rejected": -135.0393829345703,
"eval_loss": 0.707986056804657,
"eval_rewards/accuracies": 0.5138888955116272,
"eval_rewards/chosen": 0.009778406471014023,
"eval_rewards/margins": 0.0047084130346775055,
"eval_rewards/rejected": 0.005069993901997805,
"eval_runtime": 621.3179,
"eval_samples_per_second": 1.609,
"eval_steps_per_second": 0.101,
"step": 900
},
{
"epoch": 0.4,
"grad_norm": 9.125,
"learning_rate": 8.96551724137931e-07,
"logits/chosen": -3.2368197441101074,
"logits/rejected": -3.2453553676605225,
"logps/chosen": -156.8467254638672,
"logps/rejected": -132.40830993652344,
"loss": 0.7103,
"rewards/accuracies": 0.49944445490837097,
"rewards/chosen": 0.0005948151228949428,
"rewards/margins": -6.089528324082494e-05,
"rewards/rejected": 0.0006557104061357677,
"step": 1800
},
{
"epoch": 0.4,
"eval_logits/chosen": -3.013019323348999,
"eval_logits/rejected": -3.0271592140197754,
"eval_logps/chosen": -157.4865264892578,
"eval_logps/rejected": -135.03961181640625,
"eval_loss": 0.7029208540916443,
"eval_rewards/accuracies": 0.5059523582458496,
"eval_rewards/chosen": 0.01844022050499916,
"eval_rewards/margins": 0.013589809648692608,
"eval_rewards/rejected": 0.004850410390645266,
"eval_runtime": 618.8158,
"eval_samples_per_second": 1.616,
"eval_steps_per_second": 0.102,
"step": 1800
},
{
"epoch": 0.6,
"grad_norm": 9.5625,
"learning_rate": 8.275862068965517e-07,
"logits/chosen": -3.228179693222046,
"logits/rejected": -3.2370007038116455,
"logps/chosen": -155.81236267089844,
"logps/rejected": -132.52044677734375,
"loss": 0.7057,
"rewards/accuracies": 0.49444442987442017,
"rewards/chosen": 0.01380106434226036,
"rewards/margins": 0.007251843344420195,
"rewards/rejected": 0.006549220532178879,
"step": 2700
},
{
"epoch": 0.6,
"eval_logits/chosen": -3.012927293777466,
"eval_logits/rejected": -3.0271010398864746,
"eval_logps/chosen": -157.47120666503906,
"eval_logps/rejected": -135.03591918945312,
"eval_loss": 0.6978173851966858,
"eval_rewards/accuracies": 0.5128968358039856,
"eval_rewards/chosen": 0.0337512232363224,
"eval_rewards/margins": 0.025207480415701866,
"eval_rewards/rejected": 0.008543744683265686,
"eval_runtime": 616.9849,
"eval_samples_per_second": 1.621,
"eval_steps_per_second": 0.102,
"step": 2700
},
{
"epoch": 0.8,
"grad_norm": 9.4375,
"learning_rate": 7.586206896551724e-07,
"logits/chosen": -3.2351443767547607,
"logits/rejected": -3.2431368827819824,
"logps/chosen": -156.77496337890625,
"logps/rejected": -133.58193969726562,
"loss": 0.7032,
"rewards/accuracies": 0.5188888907432556,
"rewards/chosen": 0.02398551069200039,
"rewards/margins": 0.01356814056634903,
"rewards/rejected": 0.01041737012565136,
"step": 3600
},
{
"epoch": 0.8,
"eval_logits/chosen": -3.013091802597046,
"eval_logits/rejected": -3.0272812843322754,
"eval_logps/chosen": -157.4769744873047,
"eval_logps/rejected": -135.029052734375,
"eval_loss": 0.7038853764533997,
"eval_rewards/accuracies": 0.5,
"eval_rewards/chosen": 0.02797560952603817,
"eval_rewards/margins": 0.01257056463509798,
"eval_rewards/rejected": 0.015405045822262764,
"eval_runtime": 617.4916,
"eval_samples_per_second": 1.619,
"eval_steps_per_second": 0.102,
"step": 3600
},
{
"epoch": 1.0,
"grad_norm": 15.4375,
"learning_rate": 6.896551724137931e-07,
"logits/chosen": -3.2322237491607666,
"logits/rejected": -3.2422921657562256,
"logps/chosen": -158.39981079101562,
"logps/rejected": -134.93020629882812,
"loss": 0.7054,
"rewards/accuracies": 0.5116666555404663,
"rewards/chosen": 0.03937062993645668,
"rewards/margins": 0.009623720310628414,
"rewards/rejected": 0.02974691055715084,
"step": 4500
},
{
"epoch": 1.0,
"eval_logits/chosen": -3.013054609298706,
"eval_logits/rejected": -3.0272459983825684,
"eval_logps/chosen": -157.4499053955078,
"eval_logps/rejected": -135.0231170654297,
"eval_loss": 0.6935561299324036,
"eval_rewards/accuracies": 0.538690447807312,
"eval_rewards/chosen": 0.05505846440792084,
"eval_rewards/margins": 0.03372044861316681,
"eval_rewards/rejected": 0.021338019520044327,
"eval_runtime": 618.0893,
"eval_samples_per_second": 1.618,
"eval_steps_per_second": 0.102,
"step": 4500
},
{
"epoch": 1.2,
"grad_norm": 10.25,
"learning_rate": 6.206896551724138e-07,
"logits/chosen": -3.23718523979187,
"logits/rejected": -3.246558666229248,
"logps/chosen": -157.82701110839844,
"logps/rejected": -135.48013305664062,
"loss": 0.7033,
"rewards/accuracies": 0.5244444608688354,
"rewards/chosen": 0.043098967522382736,
"rewards/margins": 0.014880964532494545,
"rewards/rejected": 0.028218001127243042,
"step": 5400
},
{
"epoch": 1.2,
"eval_logits/chosen": -3.01303768157959,
"eval_logits/rejected": -3.0271825790405273,
"eval_logps/chosen": -157.44830322265625,
"eval_logps/rejected": -135.01748657226562,
"eval_loss": 0.6953737139701843,
"eval_rewards/accuracies": 0.528769850730896,
"eval_rewards/chosen": 0.05664879083633423,
"eval_rewards/margins": 0.029674744233489037,
"eval_rewards/rejected": 0.02697405032813549,
"eval_runtime": 619.6669,
"eval_samples_per_second": 1.614,
"eval_steps_per_second": 0.102,
"step": 5400
},
{
"epoch": 1.4,
"grad_norm": 15.375,
"learning_rate": 5.517241379310344e-07,
"logits/chosen": -3.2300009727478027,
"logits/rejected": -3.239253044128418,
"logps/chosen": -157.90548706054688,
"logps/rejected": -133.99058532714844,
"loss": 0.7052,
"rewards/accuracies": 0.5138888955116272,
"rewards/chosen": 0.043052662163972855,
"rewards/margins": 0.010972147807478905,
"rewards/rejected": 0.0320805124938488,
"step": 6300
},
{
"epoch": 1.4,
"eval_logits/chosen": -3.0130200386047363,
"eval_logits/rejected": -3.0271594524383545,
"eval_logps/chosen": -157.44041442871094,
"eval_logps/rejected": -135.01556396484375,
"eval_loss": 0.6925241351127625,
"eval_rewards/accuracies": 0.5396825671195984,
"eval_rewards/chosen": 0.06454168260097504,
"eval_rewards/margins": 0.03564530983567238,
"eval_rewards/rejected": 0.02889636531472206,
"eval_runtime": 619.0036,
"eval_samples_per_second": 1.615,
"eval_steps_per_second": 0.102,
"step": 6300
},
{
"epoch": 1.6,
"grad_norm": 13.8125,
"learning_rate": 4.827586206896552e-07,
"logits/chosen": -3.227038860321045,
"logits/rejected": -3.237276792526245,
"logps/chosen": -155.9982452392578,
"logps/rejected": -133.17002868652344,
"loss": 0.6945,
"rewards/accuracies": 0.5294444561004639,
"rewards/chosen": 0.06239644065499306,
"rewards/margins": 0.032080113887786865,
"rewards/rejected": 0.030316324904561043,
"step": 7200
},
{
"epoch": 1.6,
"eval_logits/chosen": -3.0130317211151123,
"eval_logits/rejected": -3.0272297859191895,
"eval_logps/chosen": -157.42637634277344,
"eval_logps/rejected": -135.01864624023438,
"eval_loss": 0.6855846643447876,
"eval_rewards/accuracies": 0.5664682388305664,
"eval_rewards/chosen": 0.07857942581176758,
"eval_rewards/margins": 0.05277930945158005,
"eval_rewards/rejected": 0.025800110772252083,
"eval_runtime": 617.3442,
"eval_samples_per_second": 1.62,
"eval_steps_per_second": 0.102,
"step": 7200
},
{
"epoch": 1.8,
"grad_norm": 7.625,
"learning_rate": 4.1379310344827586e-07,
"logits/chosen": -3.233175754547119,
"logits/rejected": -3.2411041259765625,
"logps/chosen": -156.3334503173828,
"logps/rejected": -131.23414611816406,
"loss": 0.7011,
"rewards/accuracies": 0.5183333158493042,
"rewards/chosen": 0.058114588260650635,
"rewards/margins": 0.01674296148121357,
"rewards/rejected": 0.041371628642082214,
"step": 8100
},
{
"epoch": 1.8,
"eval_logits/chosen": -3.01287579536438,
"eval_logits/rejected": -3.027003288269043,
"eval_logps/chosen": -157.41868591308594,
"eval_logps/rejected": -135.01287841796875,
"eval_loss": 0.6833264827728271,
"eval_rewards/accuracies": 0.5605158805847168,
"eval_rewards/chosen": 0.08628029376268387,
"eval_rewards/margins": 0.05471265688538551,
"eval_rewards/rejected": 0.031567640602588654,
"eval_runtime": 617.0438,
"eval_samples_per_second": 1.621,
"eval_steps_per_second": 0.102,
"step": 8100
},
{
"epoch": 2.0,
"grad_norm": 6.0,
"learning_rate": 3.4482758620689656e-07,
"logits/chosen": -3.2326903343200684,
"logits/rejected": -3.241744041442871,
"logps/chosen": -157.41111755371094,
"logps/rejected": -134.63653564453125,
"loss": 0.6943,
"rewards/accuracies": 0.5327777862548828,
"rewards/chosen": 0.07450559735298157,
"rewards/margins": 0.033994805067777634,
"rewards/rejected": 0.04051079601049423,
"step": 9000
},
{
"epoch": 2.0,
"eval_logits/chosen": -3.012874126434326,
"eval_logits/rejected": -3.027069091796875,
"eval_logps/chosen": -157.4245147705078,
"eval_logps/rejected": -135.01060485839844,
"eval_loss": 0.6874573826789856,
"eval_rewards/accuracies": 0.5416666865348816,
"eval_rewards/chosen": 0.08044072240591049,
"eval_rewards/margins": 0.04658132046461105,
"eval_rewards/rejected": 0.033859409391880035,
"eval_runtime": 617.9074,
"eval_samples_per_second": 1.618,
"eval_steps_per_second": 0.102,
"step": 9000
},
{
"epoch": 2.2,
"grad_norm": 11.0625,
"learning_rate": 2.758620689655172e-07,
"logits/chosen": -3.2308294773101807,
"logits/rejected": -3.241821765899658,
"logps/chosen": -156.53921508789062,
"logps/rejected": -133.07757568359375,
"loss": 0.7004,
"rewards/accuracies": 0.5172222256660461,
"rewards/chosen": 0.058275189250707626,
"rewards/margins": 0.019040387123823166,
"rewards/rejected": 0.03923480585217476,
"step": 9900
},
{
"epoch": 2.2,
"eval_logits/chosen": -3.012793779373169,
"eval_logits/rejected": -3.026986837387085,
"eval_logps/chosen": -157.421142578125,
"eval_logps/rejected": -135.0035400390625,
"eval_loss": 0.6900457739830017,
"eval_rewards/accuracies": 0.5476190447807312,
"eval_rewards/chosen": 0.08382151275873184,
"eval_rewards/margins": 0.04290608689188957,
"eval_rewards/rejected": 0.04091542959213257,
"eval_runtime": 617.2416,
"eval_samples_per_second": 1.62,
"eval_steps_per_second": 0.102,
"step": 9900
},
{
"epoch": 2.4,
"grad_norm": 9.75,
"learning_rate": 2.0689655172413793e-07,
"logits/chosen": -3.2308058738708496,
"logits/rejected": -3.2401750087738037,
"logps/chosen": -159.013427734375,
"logps/rejected": -133.7303924560547,
"loss": 0.6946,
"rewards/accuracies": 0.5322222113609314,
"rewards/chosen": 0.07445600628852844,
"rewards/margins": 0.03202791139483452,
"rewards/rejected": 0.04242809861898422,
"step": 10800
},
{
"epoch": 2.4,
"eval_logits/chosen": -3.0131027698516846,
"eval_logits/rejected": -3.0273118019104004,
"eval_logps/chosen": -157.4429473876953,
"eval_logps/rejected": -135.01034545898438,
"eval_loss": 0.6979788541793823,
"eval_rewards/accuracies": 0.538690447807312,
"eval_rewards/chosen": 0.06201785430312157,
"eval_rewards/margins": 0.027890633791685104,
"eval_rewards/rejected": 0.03412722051143646,
"eval_runtime": 617.1551,
"eval_samples_per_second": 1.62,
"eval_steps_per_second": 0.102,
"step": 10800
},
{
"epoch": 2.6,
"grad_norm": 9.4375,
"learning_rate": 1.379310344827586e-07,
"logits/chosen": -3.236497640609741,
"logits/rejected": -3.243530035018921,
"logps/chosen": -157.6371307373047,
"logps/rejected": -134.19334411621094,
"loss": 0.6898,
"rewards/accuracies": 0.5533333420753479,
"rewards/chosen": 0.0833386555314064,
"rewards/margins": 0.043027978390455246,
"rewards/rejected": 0.04031067714095116,
"step": 11700
},
{
"epoch": 2.6,
"eval_logits/chosen": -3.0131278038024902,
"eval_logits/rejected": -3.0272884368896484,
"eval_logps/chosen": -157.4271240234375,
"eval_logps/rejected": -135.00433349609375,
"eval_loss": 0.6915609240531921,
"eval_rewards/accuracies": 0.5426587462425232,
"eval_rewards/chosen": 0.07781906425952911,
"eval_rewards/margins": 0.03770728409290314,
"eval_rewards/rejected": 0.04011178016662598,
"eval_runtime": 617.4019,
"eval_samples_per_second": 1.62,
"eval_steps_per_second": 0.102,
"step": 11700
},
{
"epoch": 2.8,
"grad_norm": 12.875,
"learning_rate": 6.89655172413793e-08,
"logits/chosen": -3.2290825843811035,
"logits/rejected": -3.238636016845703,
"logps/chosen": -155.427978515625,
"logps/rejected": -132.6368865966797,
"loss": 0.6911,
"rewards/accuracies": 0.5477777719497681,
"rewards/chosen": 0.07133558392524719,
"rewards/margins": 0.04007010534405708,
"rewards/rejected": 0.03126548230648041,
"step": 12600
},
{
"epoch": 2.8,
"eval_logits/chosen": -3.0130114555358887,
"eval_logits/rejected": -3.0271997451782227,
"eval_logps/chosen": -157.41952514648438,
"eval_logps/rejected": -135.01426696777344,
"eval_loss": 0.6846425533294678,
"eval_rewards/accuracies": 0.5436508059501648,
"eval_rewards/chosen": 0.0854325145483017,
"eval_rewards/margins": 0.055243875831365585,
"eval_rewards/rejected": 0.030188636854290962,
"eval_runtime": 617.22,
"eval_samples_per_second": 1.62,
"eval_steps_per_second": 0.102,
"step": 12600
},
{
"epoch": 3.0,
"grad_norm": 10.0,
"learning_rate": 0.0,
"logits/chosen": -3.229140520095825,
"logits/rejected": -3.2376339435577393,
"logps/chosen": -156.7906036376953,
"logps/rejected": -134.84449768066406,
"loss": 0.7003,
"rewards/accuracies": 0.528333306312561,
"rewards/chosen": 0.06633351743221283,
"rewards/margins": 0.02000141702592373,
"rewards/rejected": 0.04633209481835365,
"step": 13500
},
{
"epoch": 3.0,
"eval_logits/chosen": -3.0129826068878174,
"eval_logits/rejected": -3.027216911315918,
"eval_logps/chosen": -157.42532348632812,
"eval_logps/rejected": -135.01596069335938,
"eval_loss": 0.6864064931869507,
"eval_rewards/accuracies": 0.5426587462425232,
"eval_rewards/chosen": 0.07963090389966965,
"eval_rewards/margins": 0.051132071763277054,
"eval_rewards/rejected": 0.028498835861682892,
"eval_runtime": 617.15,
"eval_samples_per_second": 1.62,
"eval_steps_per_second": 0.102,
"step": 13500
}
],
"logging_steps": 900,
"max_steps": 13500,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 900,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}