|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 900, |
|
"global_step": 13500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00022222222222222223, |
|
"grad_norm": 22.625, |
|
"learning_rate": 2.222222222222222e-09, |
|
"logits/chosen": -3.4569907188415527, |
|
"logits/rejected": -3.426312208175659, |
|
"logps/chosen": -237.60638427734375, |
|
"logps/rejected": -149.21551513671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 10.625, |
|
"learning_rate": 9.655172413793103e-07, |
|
"logits/chosen": -3.2259411811828613, |
|
"logits/rejected": -3.2373013496398926, |
|
"logps/chosen": -157.755859375, |
|
"logps/rejected": -135.1826171875, |
|
"loss": 0.7093, |
|
"rewards/accuracies": 0.5077864527702332, |
|
"rewards/chosen": -0.0009299311204813421, |
|
"rewards/margins": 0.0015665229875594378, |
|
"rewards/rejected": -0.0024964541662484407, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -3.013110399246216, |
|
"eval_logits/rejected": -3.0272324085235596, |
|
"eval_logps/chosen": -157.49517822265625, |
|
"eval_logps/rejected": -135.0393829345703, |
|
"eval_loss": 0.707986056804657, |
|
"eval_rewards/accuracies": 0.5138888955116272, |
|
"eval_rewards/chosen": 0.009778406471014023, |
|
"eval_rewards/margins": 0.0047084130346775055, |
|
"eval_rewards/rejected": 0.005069993901997805, |
|
"eval_runtime": 621.3179, |
|
"eval_samples_per_second": 1.609, |
|
"eval_steps_per_second": 0.101, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 9.125, |
|
"learning_rate": 8.96551724137931e-07, |
|
"logits/chosen": -3.2368197441101074, |
|
"logits/rejected": -3.2453553676605225, |
|
"logps/chosen": -156.8467254638672, |
|
"logps/rejected": -132.40830993652344, |
|
"loss": 0.7103, |
|
"rewards/accuracies": 0.49944445490837097, |
|
"rewards/chosen": 0.0005948151228949428, |
|
"rewards/margins": -6.089528324082494e-05, |
|
"rewards/rejected": 0.0006557104061357677, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -3.013019323348999, |
|
"eval_logits/rejected": -3.0271592140197754, |
|
"eval_logps/chosen": -157.4865264892578, |
|
"eval_logps/rejected": -135.03961181640625, |
|
"eval_loss": 0.7029208540916443, |
|
"eval_rewards/accuracies": 0.5059523582458496, |
|
"eval_rewards/chosen": 0.01844022050499916, |
|
"eval_rewards/margins": 0.013589809648692608, |
|
"eval_rewards/rejected": 0.004850410390645266, |
|
"eval_runtime": 618.8158, |
|
"eval_samples_per_second": 1.616, |
|
"eval_steps_per_second": 0.102, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 8.275862068965517e-07, |
|
"logits/chosen": -3.228179693222046, |
|
"logits/rejected": -3.2370007038116455, |
|
"logps/chosen": -155.81236267089844, |
|
"logps/rejected": -132.52044677734375, |
|
"loss": 0.7057, |
|
"rewards/accuracies": 0.49444442987442017, |
|
"rewards/chosen": 0.01380106434226036, |
|
"rewards/margins": 0.007251843344420195, |
|
"rewards/rejected": 0.006549220532178879, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -3.012927293777466, |
|
"eval_logits/rejected": -3.0271010398864746, |
|
"eval_logps/chosen": -157.47120666503906, |
|
"eval_logps/rejected": -135.03591918945312, |
|
"eval_loss": 0.6978173851966858, |
|
"eval_rewards/accuracies": 0.5128968358039856, |
|
"eval_rewards/chosen": 0.0337512232363224, |
|
"eval_rewards/margins": 0.025207480415701866, |
|
"eval_rewards/rejected": 0.008543744683265686, |
|
"eval_runtime": 616.9849, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.102, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 7.586206896551724e-07, |
|
"logits/chosen": -3.2351443767547607, |
|
"logits/rejected": -3.2431368827819824, |
|
"logps/chosen": -156.77496337890625, |
|
"logps/rejected": -133.58193969726562, |
|
"loss": 0.7032, |
|
"rewards/accuracies": 0.5188888907432556, |
|
"rewards/chosen": 0.02398551069200039, |
|
"rewards/margins": 0.01356814056634903, |
|
"rewards/rejected": 0.01041737012565136, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -3.013091802597046, |
|
"eval_logits/rejected": -3.0272812843322754, |
|
"eval_logps/chosen": -157.4769744873047, |
|
"eval_logps/rejected": -135.029052734375, |
|
"eval_loss": 0.7038853764533997, |
|
"eval_rewards/accuracies": 0.5, |
|
"eval_rewards/chosen": 0.02797560952603817, |
|
"eval_rewards/margins": 0.01257056463509798, |
|
"eval_rewards/rejected": 0.015405045822262764, |
|
"eval_runtime": 617.4916, |
|
"eval_samples_per_second": 1.619, |
|
"eval_steps_per_second": 0.102, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 15.4375, |
|
"learning_rate": 6.896551724137931e-07, |
|
"logits/chosen": -3.2322237491607666, |
|
"logits/rejected": -3.2422921657562256, |
|
"logps/chosen": -158.39981079101562, |
|
"logps/rejected": -134.93020629882812, |
|
"loss": 0.7054, |
|
"rewards/accuracies": 0.5116666555404663, |
|
"rewards/chosen": 0.03937062993645668, |
|
"rewards/margins": 0.009623720310628414, |
|
"rewards/rejected": 0.02974691055715084, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -3.013054609298706, |
|
"eval_logits/rejected": -3.0272459983825684, |
|
"eval_logps/chosen": -157.4499053955078, |
|
"eval_logps/rejected": -135.0231170654297, |
|
"eval_loss": 0.6935561299324036, |
|
"eval_rewards/accuracies": 0.538690447807312, |
|
"eval_rewards/chosen": 0.05505846440792084, |
|
"eval_rewards/margins": 0.03372044861316681, |
|
"eval_rewards/rejected": 0.021338019520044327, |
|
"eval_runtime": 618.0893, |
|
"eval_samples_per_second": 1.618, |
|
"eval_steps_per_second": 0.102, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 10.25, |
|
"learning_rate": 6.206896551724138e-07, |
|
"logits/chosen": -3.23718523979187, |
|
"logits/rejected": -3.246558666229248, |
|
"logps/chosen": -157.82701110839844, |
|
"logps/rejected": -135.48013305664062, |
|
"loss": 0.7033, |
|
"rewards/accuracies": 0.5244444608688354, |
|
"rewards/chosen": 0.043098967522382736, |
|
"rewards/margins": 0.014880964532494545, |
|
"rewards/rejected": 0.028218001127243042, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_logits/chosen": -3.01303768157959, |
|
"eval_logits/rejected": -3.0271825790405273, |
|
"eval_logps/chosen": -157.44830322265625, |
|
"eval_logps/rejected": -135.01748657226562, |
|
"eval_loss": 0.6953737139701843, |
|
"eval_rewards/accuracies": 0.528769850730896, |
|
"eval_rewards/chosen": 0.05664879083633423, |
|
"eval_rewards/margins": 0.029674744233489037, |
|
"eval_rewards/rejected": 0.02697405032813549, |
|
"eval_runtime": 619.6669, |
|
"eval_samples_per_second": 1.614, |
|
"eval_steps_per_second": 0.102, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 15.375, |
|
"learning_rate": 5.517241379310344e-07, |
|
"logits/chosen": -3.2300009727478027, |
|
"logits/rejected": -3.239253044128418, |
|
"logps/chosen": -157.90548706054688, |
|
"logps/rejected": -133.99058532714844, |
|
"loss": 0.7052, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.043052662163972855, |
|
"rewards/margins": 0.010972147807478905, |
|
"rewards/rejected": 0.0320805124938488, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_logits/chosen": -3.0130200386047363, |
|
"eval_logits/rejected": -3.0271594524383545, |
|
"eval_logps/chosen": -157.44041442871094, |
|
"eval_logps/rejected": -135.01556396484375, |
|
"eval_loss": 0.6925241351127625, |
|
"eval_rewards/accuracies": 0.5396825671195984, |
|
"eval_rewards/chosen": 0.06454168260097504, |
|
"eval_rewards/margins": 0.03564530983567238, |
|
"eval_rewards/rejected": 0.02889636531472206, |
|
"eval_runtime": 619.0036, |
|
"eval_samples_per_second": 1.615, |
|
"eval_steps_per_second": 0.102, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 13.8125, |
|
"learning_rate": 4.827586206896552e-07, |
|
"logits/chosen": -3.227038860321045, |
|
"logits/rejected": -3.237276792526245, |
|
"logps/chosen": -155.9982452392578, |
|
"logps/rejected": -133.17002868652344, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.5294444561004639, |
|
"rewards/chosen": 0.06239644065499306, |
|
"rewards/margins": 0.032080113887786865, |
|
"rewards/rejected": 0.030316324904561043, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -3.0130317211151123, |
|
"eval_logits/rejected": -3.0272297859191895, |
|
"eval_logps/chosen": -157.42637634277344, |
|
"eval_logps/rejected": -135.01864624023438, |
|
"eval_loss": 0.6855846643447876, |
|
"eval_rewards/accuracies": 0.5664682388305664, |
|
"eval_rewards/chosen": 0.07857942581176758, |
|
"eval_rewards/margins": 0.05277930945158005, |
|
"eval_rewards/rejected": 0.025800110772252083, |
|
"eval_runtime": 617.3442, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 7.625, |
|
"learning_rate": 4.1379310344827586e-07, |
|
"logits/chosen": -3.233175754547119, |
|
"logits/rejected": -3.2411041259765625, |
|
"logps/chosen": -156.3334503173828, |
|
"logps/rejected": -131.23414611816406, |
|
"loss": 0.7011, |
|
"rewards/accuracies": 0.5183333158493042, |
|
"rewards/chosen": 0.058114588260650635, |
|
"rewards/margins": 0.01674296148121357, |
|
"rewards/rejected": 0.041371628642082214, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_logits/chosen": -3.01287579536438, |
|
"eval_logits/rejected": -3.027003288269043, |
|
"eval_logps/chosen": -157.41868591308594, |
|
"eval_logps/rejected": -135.01287841796875, |
|
"eval_loss": 0.6833264827728271, |
|
"eval_rewards/accuracies": 0.5605158805847168, |
|
"eval_rewards/chosen": 0.08628029376268387, |
|
"eval_rewards/margins": 0.05471265688538551, |
|
"eval_rewards/rejected": 0.031567640602588654, |
|
"eval_runtime": 617.0438, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.102, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.0, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"logits/chosen": -3.2326903343200684, |
|
"logits/rejected": -3.241744041442871, |
|
"logps/chosen": -157.41111755371094, |
|
"logps/rejected": -134.63653564453125, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.5327777862548828, |
|
"rewards/chosen": 0.07450559735298157, |
|
"rewards/margins": 0.033994805067777634, |
|
"rewards/rejected": 0.04051079601049423, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -3.012874126434326, |
|
"eval_logits/rejected": -3.027069091796875, |
|
"eval_logps/chosen": -157.4245147705078, |
|
"eval_logps/rejected": -135.01060485839844, |
|
"eval_loss": 0.6874573826789856, |
|
"eval_rewards/accuracies": 0.5416666865348816, |
|
"eval_rewards/chosen": 0.08044072240591049, |
|
"eval_rewards/margins": 0.04658132046461105, |
|
"eval_rewards/rejected": 0.033859409391880035, |
|
"eval_runtime": 617.9074, |
|
"eval_samples_per_second": 1.618, |
|
"eval_steps_per_second": 0.102, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 2.758620689655172e-07, |
|
"logits/chosen": -3.2308294773101807, |
|
"logits/rejected": -3.241821765899658, |
|
"logps/chosen": -156.53921508789062, |
|
"logps/rejected": -133.07757568359375, |
|
"loss": 0.7004, |
|
"rewards/accuracies": 0.5172222256660461, |
|
"rewards/chosen": 0.058275189250707626, |
|
"rewards/margins": 0.019040387123823166, |
|
"rewards/rejected": 0.03923480585217476, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_logits/chosen": -3.012793779373169, |
|
"eval_logits/rejected": -3.026986837387085, |
|
"eval_logps/chosen": -157.421142578125, |
|
"eval_logps/rejected": -135.0035400390625, |
|
"eval_loss": 0.6900457739830017, |
|
"eval_rewards/accuracies": 0.5476190447807312, |
|
"eval_rewards/chosen": 0.08382151275873184, |
|
"eval_rewards/margins": 0.04290608689188957, |
|
"eval_rewards/rejected": 0.04091542959213257, |
|
"eval_runtime": 617.2416, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 9.75, |
|
"learning_rate": 2.0689655172413793e-07, |
|
"logits/chosen": -3.2308058738708496, |
|
"logits/rejected": -3.2401750087738037, |
|
"logps/chosen": -159.013427734375, |
|
"logps/rejected": -133.7303924560547, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.5322222113609314, |
|
"rewards/chosen": 0.07445600628852844, |
|
"rewards/margins": 0.03202791139483452, |
|
"rewards/rejected": 0.04242809861898422, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_logits/chosen": -3.0131027698516846, |
|
"eval_logits/rejected": -3.0273118019104004, |
|
"eval_logps/chosen": -157.4429473876953, |
|
"eval_logps/rejected": -135.01034545898438, |
|
"eval_loss": 0.6979788541793823, |
|
"eval_rewards/accuracies": 0.538690447807312, |
|
"eval_rewards/chosen": 0.06201785430312157, |
|
"eval_rewards/margins": 0.027890633791685104, |
|
"eval_rewards/rejected": 0.03412722051143646, |
|
"eval_runtime": 617.1551, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.379310344827586e-07, |
|
"logits/chosen": -3.236497640609741, |
|
"logits/rejected": -3.243530035018921, |
|
"logps/chosen": -157.6371307373047, |
|
"logps/rejected": -134.19334411621094, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5533333420753479, |
|
"rewards/chosen": 0.0833386555314064, |
|
"rewards/margins": 0.043027978390455246, |
|
"rewards/rejected": 0.04031067714095116, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_logits/chosen": -3.0131278038024902, |
|
"eval_logits/rejected": -3.0272884368896484, |
|
"eval_logps/chosen": -157.4271240234375, |
|
"eval_logps/rejected": -135.00433349609375, |
|
"eval_loss": 0.6915609240531921, |
|
"eval_rewards/accuracies": 0.5426587462425232, |
|
"eval_rewards/chosen": 0.07781906425952911, |
|
"eval_rewards/margins": 0.03770728409290314, |
|
"eval_rewards/rejected": 0.04011178016662598, |
|
"eval_runtime": 617.4019, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 12.875, |
|
"learning_rate": 6.89655172413793e-08, |
|
"logits/chosen": -3.2290825843811035, |
|
"logits/rejected": -3.238636016845703, |
|
"logps/chosen": -155.427978515625, |
|
"logps/rejected": -132.6368865966797, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5477777719497681, |
|
"rewards/chosen": 0.07133558392524719, |
|
"rewards/margins": 0.04007010534405708, |
|
"rewards/rejected": 0.03126548230648041, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_logits/chosen": -3.0130114555358887, |
|
"eval_logits/rejected": -3.0271997451782227, |
|
"eval_logps/chosen": -157.41952514648438, |
|
"eval_logps/rejected": -135.01426696777344, |
|
"eval_loss": 0.6846425533294678, |
|
"eval_rewards/accuracies": 0.5436508059501648, |
|
"eval_rewards/chosen": 0.0854325145483017, |
|
"eval_rewards/margins": 0.055243875831365585, |
|
"eval_rewards/rejected": 0.030188636854290962, |
|
"eval_runtime": 617.22, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 10.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -3.229140520095825, |
|
"logits/rejected": -3.2376339435577393, |
|
"logps/chosen": -156.7906036376953, |
|
"logps/rejected": -134.84449768066406, |
|
"loss": 0.7003, |
|
"rewards/accuracies": 0.528333306312561, |
|
"rewards/chosen": 0.06633351743221283, |
|
"rewards/margins": 0.02000141702592373, |
|
"rewards/rejected": 0.04633209481835365, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -3.0129826068878174, |
|
"eval_logits/rejected": -3.027216911315918, |
|
"eval_logps/chosen": -157.42532348632812, |
|
"eval_logps/rejected": -135.01596069335938, |
|
"eval_loss": 0.6864064931869507, |
|
"eval_rewards/accuracies": 0.5426587462425232, |
|
"eval_rewards/chosen": 0.07963090389966965, |
|
"eval_rewards/margins": 0.051132071763277054, |
|
"eval_rewards/rejected": 0.028498835861682892, |
|
"eval_runtime": 617.15, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 13500 |
|
} |
|
], |
|
"logging_steps": 900, |
|
"max_steps": 13500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 900, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|