|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8, |
|
"eval_steps": 900, |
|
"global_step": 12600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00022222222222222223, |
|
"grad_norm": 13.25, |
|
"learning_rate": 2.222222222222222e-09, |
|
"logits/chosen": -3.4569907188415527, |
|
"logits/rejected": -3.426312208175659, |
|
"logps/chosen": -237.60638427734375, |
|
"logps/rejected": -149.21551513671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 9.655172413793103e-07, |
|
"logits/chosen": -3.2259371280670166, |
|
"logits/rejected": -3.237309217453003, |
|
"logps/chosen": -157.7607879638672, |
|
"logps/rejected": -135.18301391601562, |
|
"loss": 0.6984, |
|
"rewards/accuracies": 0.4966629445552826, |
|
"rewards/chosen": -0.002921752631664276, |
|
"rewards/margins": -0.0014779088087379932, |
|
"rewards/rejected": -0.0014438438229262829, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -3.013286590576172, |
|
"eval_logits/rejected": -3.027402639389038, |
|
"eval_logps/chosen": -157.49420166015625, |
|
"eval_logps/rejected": -135.04640197753906, |
|
"eval_loss": 0.6940267086029053, |
|
"eval_rewards/accuracies": 0.5178571343421936, |
|
"eval_rewards/chosen": 0.005377354100346565, |
|
"eval_rewards/margins": 0.006346767768263817, |
|
"eval_rewards/rejected": -0.0009694137261249125, |
|
"eval_runtime": 621.0759, |
|
"eval_samples_per_second": 1.61, |
|
"eval_steps_per_second": 0.101, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 8.96551724137931e-07, |
|
"logits/chosen": -3.2368738651275635, |
|
"logits/rejected": -3.2453866004943848, |
|
"logps/chosen": -156.84564208984375, |
|
"logps/rejected": -132.406005859375, |
|
"loss": 0.6979, |
|
"rewards/accuracies": 0.4933333396911621, |
|
"rewards/chosen": 0.0008400398073717952, |
|
"rewards/margins": -0.0006429057684727013, |
|
"rewards/rejected": 0.0014829455176368356, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -3.013197898864746, |
|
"eval_logits/rejected": -3.0273756980895996, |
|
"eval_logps/chosen": -157.50088500976562, |
|
"eval_logps/rejected": -135.04953002929688, |
|
"eval_loss": 0.6951879858970642, |
|
"eval_rewards/accuracies": 0.5267857313156128, |
|
"eval_rewards/chosen": 0.0020369377452880144, |
|
"eval_rewards/margins": 0.004572988487780094, |
|
"eval_rewards/rejected": -0.0025360507424920797, |
|
"eval_runtime": 617.5747, |
|
"eval_samples_per_second": 1.619, |
|
"eval_steps_per_second": 0.102, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.90625, |
|
"learning_rate": 8.275862068965517e-07, |
|
"logits/chosen": -3.228177547454834, |
|
"logits/rejected": -3.2369861602783203, |
|
"logps/chosen": -155.81085205078125, |
|
"logps/rejected": -132.52528381347656, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.5061110854148865, |
|
"rewards/chosen": 0.007656366564333439, |
|
"rewards/margins": 0.006786289159208536, |
|
"rewards/rejected": 0.0008700773469172418, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -3.013126850128174, |
|
"eval_logits/rejected": -3.027287483215332, |
|
"eval_logps/chosen": -157.47140502929688, |
|
"eval_logps/rejected": -135.03514099121094, |
|
"eval_loss": 0.6915069818496704, |
|
"eval_rewards/accuracies": 0.52182537317276, |
|
"eval_rewards/chosen": 0.016774658113718033, |
|
"eval_rewards/margins": 0.012120993807911873, |
|
"eval_rewards/rejected": 0.004653665702790022, |
|
"eval_runtime": 616.7891, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.102, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.75, |
|
"learning_rate": 7.586206896551724e-07, |
|
"logits/chosen": -3.235166549682617, |
|
"logits/rejected": -3.2431561946868896, |
|
"logps/chosen": -156.77923583984375, |
|
"logps/rejected": -133.5782012939453, |
|
"loss": 0.6962, |
|
"rewards/accuracies": 0.5005555748939514, |
|
"rewards/chosen": 0.00986157450824976, |
|
"rewards/margins": 0.0027794605121016502, |
|
"rewards/rejected": 0.007082113530486822, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -3.013155937194824, |
|
"eval_logits/rejected": -3.027271032333374, |
|
"eval_logps/chosen": -157.477294921875, |
|
"eval_logps/rejected": -135.04197692871094, |
|
"eval_loss": 0.6910666823387146, |
|
"eval_rewards/accuracies": 0.52182537317276, |
|
"eval_rewards/chosen": 0.013831890188157558, |
|
"eval_rewards/margins": 0.012587492354214191, |
|
"eval_rewards/rejected": 0.0012443973682820797, |
|
"eval_runtime": 617.039, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.102, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 6.896551724137931e-07, |
|
"logits/chosen": -3.2322757244110107, |
|
"logits/rejected": -3.242344379425049, |
|
"logps/chosen": -158.4110107421875, |
|
"logps/rejected": -134.92042541503906, |
|
"loss": 0.7006, |
|
"rewards/accuracies": 0.4866666793823242, |
|
"rewards/chosen": 0.014087031595408916, |
|
"rewards/margins": -0.005671821068972349, |
|
"rewards/rejected": 0.01975885219871998, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -3.013131618499756, |
|
"eval_logits/rejected": -3.027305841445923, |
|
"eval_logps/chosen": -157.4510498046875, |
|
"eval_logps/rejected": -135.02581787109375, |
|
"eval_loss": 0.688616931438446, |
|
"eval_rewards/accuracies": 0.5248016119003296, |
|
"eval_rewards/chosen": 0.026958029717206955, |
|
"eval_rewards/margins": 0.01763634942471981, |
|
"eval_rewards/rejected": 0.009321682155132294, |
|
"eval_runtime": 616.636, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.102, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 5.25, |
|
"learning_rate": 6.206896551724138e-07, |
|
"logits/chosen": -3.237251043319702, |
|
"logits/rejected": -3.2465975284576416, |
|
"logps/chosen": -157.82240295410156, |
|
"logps/rejected": -135.47181701660156, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.5116666555404663, |
|
"rewards/chosen": 0.023857368156313896, |
|
"rewards/margins": 0.005588435102254152, |
|
"rewards/rejected": 0.01826893351972103, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_logits/chosen": -3.012895107269287, |
|
"eval_logits/rejected": -3.027073383331299, |
|
"eval_logps/chosen": -157.4395751953125, |
|
"eval_logps/rejected": -135.0121307373047, |
|
"eval_loss": 0.6891811490058899, |
|
"eval_rewards/accuracies": 0.5466269850730896, |
|
"eval_rewards/chosen": 0.032687753438949585, |
|
"eval_rewards/margins": 0.016526944935321808, |
|
"eval_rewards/rejected": 0.016160808503627777, |
|
"eval_runtime": 617.1923, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 5.517241379310344e-07, |
|
"logits/chosen": -3.2299792766571045, |
|
"logits/rejected": -3.239255666732788, |
|
"logps/chosen": -157.90017700195312, |
|
"logps/rejected": -133.9901580810547, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5111111402511597, |
|
"rewards/chosen": 0.02419172413647175, |
|
"rewards/margins": 0.007937068119645119, |
|
"rewards/rejected": 0.01625465415418148, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_logits/chosen": -3.013338565826416, |
|
"eval_logits/rejected": -3.027513027191162, |
|
"eval_logps/chosen": -157.42361450195312, |
|
"eval_logps/rejected": -135.00498962402344, |
|
"eval_loss": 0.6873784065246582, |
|
"eval_rewards/accuracies": 0.5416666865348816, |
|
"eval_rewards/chosen": 0.04067719727754593, |
|
"eval_rewards/margins": 0.020947163924574852, |
|
"eval_rewards/rejected": 0.019730033352971077, |
|
"eval_runtime": 616.84, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.102, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 4.827586206896552e-07, |
|
"logits/chosen": -3.2270286083221436, |
|
"logits/rejected": -3.2372705936431885, |
|
"logps/chosen": -156.0079345703125, |
|
"logps/rejected": -133.16619873046875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.523888885974884, |
|
"rewards/chosen": 0.026351599022746086, |
|
"rewards/margins": 0.009284625761210918, |
|
"rewards/rejected": 0.017066972330212593, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -3.0130512714385986, |
|
"eval_logits/rejected": -3.027252674102783, |
|
"eval_logps/chosen": -157.42318725585938, |
|
"eval_logps/rejected": -135.01333618164062, |
|
"eval_loss": 0.6849371194839478, |
|
"eval_rewards/accuracies": 0.5535714030265808, |
|
"eval_rewards/chosen": 0.040884003043174744, |
|
"eval_rewards/margins": 0.02531503513455391, |
|
"eval_rewards/rejected": 0.015568966045975685, |
|
"eval_runtime": 616.6049, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.102, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 4.1379310344827586e-07, |
|
"logits/chosen": -3.233133316040039, |
|
"logits/rejected": -3.241081476211548, |
|
"logps/chosen": -156.33836364746094, |
|
"logps/rejected": -131.24473571777344, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5299999713897705, |
|
"rewards/chosen": 0.026600120589137077, |
|
"rewards/margins": 0.011210680939257145, |
|
"rewards/rejected": 0.015389441512525082, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_logits/chosen": -3.0134432315826416, |
|
"eval_logits/rejected": -3.0276236534118652, |
|
"eval_logps/chosen": -157.4323272705078, |
|
"eval_logps/rejected": -135.00303649902344, |
|
"eval_loss": 0.6896921992301941, |
|
"eval_rewards/accuracies": 0.5208333134651184, |
|
"eval_rewards/chosen": 0.03631395846605301, |
|
"eval_rewards/margins": 0.01561205368489027, |
|
"eval_rewards/rejected": 0.020701901987195015, |
|
"eval_runtime": 616.6729, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.102, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"logits/chosen": -3.2326736450195312, |
|
"logits/rejected": -3.2417149543762207, |
|
"logps/chosen": -157.4275360107422, |
|
"logps/rejected": -134.63137817382812, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.5133333206176758, |
|
"rewards/chosen": 0.029039518907666206, |
|
"rewards/margins": 0.006200558505952358, |
|
"rewards/rejected": 0.022838961333036423, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -3.0129990577697754, |
|
"eval_logits/rejected": -3.0272481441497803, |
|
"eval_logps/chosen": -157.42913818359375, |
|
"eval_logps/rejected": -135.008056640625, |
|
"eval_loss": 0.687827467918396, |
|
"eval_rewards/accuracies": 0.5367063283920288, |
|
"eval_rewards/chosen": 0.03790082782506943, |
|
"eval_rewards/margins": 0.019700102508068085, |
|
"eval_rewards/rejected": 0.018200723454356194, |
|
"eval_runtime": 617.771, |
|
"eval_samples_per_second": 1.619, |
|
"eval_steps_per_second": 0.102, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 2.758620689655172e-07, |
|
"logits/chosen": -3.2307889461517334, |
|
"logits/rejected": -3.2417702674865723, |
|
"logps/chosen": -156.54052734375, |
|
"logps/rejected": -133.07623291015625, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5266666412353516, |
|
"rewards/chosen": 0.028480403125286102, |
|
"rewards/margins": 0.008195818401873112, |
|
"rewards/rejected": 0.020284580066800117, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_logits/chosen": -3.0130786895751953, |
|
"eval_logits/rejected": -3.0272867679595947, |
|
"eval_logps/chosen": -157.4261016845703, |
|
"eval_logps/rejected": -135.01654052734375, |
|
"eval_loss": 0.6852558851242065, |
|
"eval_rewards/accuracies": 0.5585317611694336, |
|
"eval_rewards/chosen": 0.039425503462553024, |
|
"eval_rewards/margins": 0.025462908670306206, |
|
"eval_rewards/rejected": 0.013962591998279095, |
|
"eval_runtime": 616.6357, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.102, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 2.0689655172413793e-07, |
|
"logits/chosen": -3.2307872772216797, |
|
"logits/rejected": -3.240145444869995, |
|
"logps/chosen": -159.01539611816406, |
|
"logps/rejected": -133.73573303222656, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5527777671813965, |
|
"rewards/chosen": 0.036232445389032364, |
|
"rewards/margins": 0.01768229715526104, |
|
"rewards/rejected": 0.018550144508481026, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_logits/chosen": -3.01315975189209, |
|
"eval_logits/rejected": -3.0273892879486084, |
|
"eval_logps/chosen": -157.42787170410156, |
|
"eval_logps/rejected": -135.01727294921875, |
|
"eval_loss": 0.6853997707366943, |
|
"eval_rewards/accuracies": 0.5486111044883728, |
|
"eval_rewards/chosen": 0.038546331226825714, |
|
"eval_rewards/margins": 0.024960007518529892, |
|
"eval_rewards/rejected": 0.013586324639618397, |
|
"eval_runtime": 616.6757, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.102, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 6.25, |
|
"learning_rate": 1.379310344827586e-07, |
|
"logits/chosen": -3.236450672149658, |
|
"logits/rejected": -3.243511199951172, |
|
"logps/chosen": -157.64993286132812, |
|
"logps/rejected": -134.20144653320312, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.527222216129303, |
|
"rewards/chosen": 0.03525533899664879, |
|
"rewards/margins": 0.019149743020534515, |
|
"rewards/rejected": 0.016105594113469124, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_logits/chosen": -3.012953996658325, |
|
"eval_logits/rejected": -3.027122974395752, |
|
"eval_logps/chosen": -157.4271240234375, |
|
"eval_logps/rejected": -135.00865173339844, |
|
"eval_loss": 0.6870063543319702, |
|
"eval_rewards/accuracies": 0.5446428656578064, |
|
"eval_rewards/chosen": 0.0389074869453907, |
|
"eval_rewards/margins": 0.021000539883971214, |
|
"eval_rewards/rejected": 0.017906947061419487, |
|
"eval_runtime": 616.7063, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.102, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 6.89655172413793e-08, |
|
"logits/chosen": -3.2290825843811035, |
|
"logits/rejected": -3.238609552383423, |
|
"logps/chosen": -155.42266845703125, |
|
"logps/rejected": -132.62841796875, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5483333468437195, |
|
"rewards/chosen": 0.03833283483982086, |
|
"rewards/margins": 0.018470091745257378, |
|
"rewards/rejected": 0.019862744957208633, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_logits/chosen": -3.013180732727051, |
|
"eval_logits/rejected": -3.0272974967956543, |
|
"eval_logps/chosen": -157.42144775390625, |
|
"eval_logps/rejected": -134.997314453125, |
|
"eval_loss": 0.6886058449745178, |
|
"eval_rewards/accuracies": 0.54067462682724, |
|
"eval_rewards/chosen": 0.041760578751564026, |
|
"eval_rewards/margins": 0.018192334100604057, |
|
"eval_rewards/rejected": 0.02356824465095997, |
|
"eval_runtime": 617.3224, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.102, |
|
"step": 12600 |
|
} |
|
], |
|
"logging_steps": 900, |
|
"max_steps": 13500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 900, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|