|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 44, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": -1.4075380563735962, |
|
"debug/policy_chosen_logps": -300.2610168457031, |
|
"debug/policy_rejected_logits": -1.3621673583984375, |
|
"debug/policy_rejected_logps": -278.803955078125, |
|
"debug/reference_chosen_logps": -300.2610168457031, |
|
"debug/reference_rejected_logps": -278.803955078125, |
|
"epoch": 0.022727272727272728, |
|
"grad_norm": 29.710163996651048, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4075380563735962, |
|
"logits/rejected": -1.3621673583984375, |
|
"logps/chosen": -300.2610168457031, |
|
"logps/rejected": -278.803955078125, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.658880352973938, |
|
"debug/policy_chosen_logps": -246.17088317871094, |
|
"debug/policy_rejected_logits": -1.3099008798599243, |
|
"debug/policy_rejected_logps": -291.26806640625, |
|
"debug/reference_chosen_logps": -249.18255615234375, |
|
"debug/reference_rejected_logps": -293.00592041015625, |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 23.678192495361564, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.658880352973938, |
|
"logits/rejected": -1.3099008798599243, |
|
"logps/chosen": -246.17088317871094, |
|
"logps/rejected": -291.26806640625, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03011680766940117, |
|
"rewards/margins": 0.012738131918013096, |
|
"rewards/rejected": 0.01737867295742035, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6099661588668823, |
|
"debug/policy_chosen_logps": -235.6559600830078, |
|
"debug/policy_rejected_logits": -1.2918318510055542, |
|
"debug/policy_rejected_logps": -312.91632080078125, |
|
"debug/reference_chosen_logps": -238.7340087890625, |
|
"debug/reference_rejected_logps": -312.7275390625, |
|
"epoch": 0.06818181818181818, |
|
"grad_norm": 34.989549498202656, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6099661588668823, |
|
"logits/rejected": -1.2918318510055542, |
|
"logps/chosen": -235.6559600830078, |
|
"logps/rejected": -312.91632080078125, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03078046813607216, |
|
"rewards/margins": 0.032668597996234894, |
|
"rewards/rejected": -0.0018881321884691715, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3408846855163574, |
|
"debug/policy_chosen_logps": -242.54039001464844, |
|
"debug/policy_rejected_logits": -1.2990057468414307, |
|
"debug/policy_rejected_logps": -295.8603515625, |
|
"debug/reference_chosen_logps": -242.50985717773438, |
|
"debug/reference_rejected_logps": -295.34893798828125, |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 32.869910385515084, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3408846855163574, |
|
"logits/rejected": -1.2990057468414307, |
|
"logps/chosen": -242.54039001464844, |
|
"logps/rejected": -295.8603515625, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0003051948733627796, |
|
"rewards/margins": 0.0048088072799146175, |
|
"rewards/rejected": -0.005114002153277397, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5846848487854004, |
|
"debug/policy_chosen_logps": -272.6408386230469, |
|
"debug/policy_rejected_logits": -1.2951120138168335, |
|
"debug/policy_rejected_logps": -337.945068359375, |
|
"debug/reference_chosen_logps": -272.4866943359375, |
|
"debug/reference_rejected_logps": -338.4932861328125, |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 16.739518531494987, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5846848487854004, |
|
"logits/rejected": -1.2951120138168335, |
|
"logps/chosen": -272.6408386230469, |
|
"logps/rejected": -337.945068359375, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.001541042234748602, |
|
"rewards/margins": -0.007023181766271591, |
|
"rewards/rejected": 0.005482139065861702, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4175924062728882, |
|
"debug/policy_chosen_logps": -237.64132690429688, |
|
"debug/policy_rejected_logits": -1.3938319683074951, |
|
"debug/policy_rejected_logps": -245.30130004882812, |
|
"debug/reference_chosen_logps": -236.71168518066406, |
|
"debug/reference_rejected_logps": -243.54733276367188, |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 33.05997081062945, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4175924062728882, |
|
"logits/rejected": -1.3938319683074951, |
|
"logps/chosen": -237.64132690429688, |
|
"logps/rejected": -245.30130004882812, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00929628312587738, |
|
"rewards/margins": 0.008243197575211525, |
|
"rewards/rejected": -0.017539482563734055, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4232984781265259, |
|
"debug/policy_chosen_logps": -243.94329833984375, |
|
"debug/policy_rejected_logits": -1.4116002321243286, |
|
"debug/policy_rejected_logps": -248.73439025878906, |
|
"debug/reference_chosen_logps": -241.92886352539062, |
|
"debug/reference_rejected_logps": -247.91339111328125, |
|
"epoch": 0.1590909090909091, |
|
"grad_norm": 22.910586896410035, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4232984781265259, |
|
"logits/rejected": -1.4116002321243286, |
|
"logps/chosen": -243.94329833984375, |
|
"logps/rejected": -248.73439025878906, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.020144157111644745, |
|
"rewards/margins": -0.011934204027056694, |
|
"rewards/rejected": -0.00820995308458805, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4712331295013428, |
|
"debug/policy_chosen_logps": -233.98004150390625, |
|
"debug/policy_rejected_logits": -1.442948579788208, |
|
"debug/policy_rejected_logps": -264.7082214355469, |
|
"debug/reference_chosen_logps": -232.63677978515625, |
|
"debug/reference_rejected_logps": -265.5264892578125, |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 15.718654227738572, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4712331295013428, |
|
"logits/rejected": -1.442948579788208, |
|
"logps/chosen": -233.98004150390625, |
|
"logps/rejected": -264.7082214355469, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.013432646170258522, |
|
"rewards/margins": -0.021615436300635338, |
|
"rewards/rejected": 0.008182793855667114, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5352590084075928, |
|
"debug/policy_chosen_logps": -243.7238006591797, |
|
"debug/policy_rejected_logits": -1.5029191970825195, |
|
"debug/policy_rejected_logps": -266.58013916015625, |
|
"debug/reference_chosen_logps": -241.81459045410156, |
|
"debug/reference_rejected_logps": -264.5735778808594, |
|
"epoch": 0.20454545454545456, |
|
"grad_norm": 22.532643036918554, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5352590084075928, |
|
"logits/rejected": -1.5029191970825195, |
|
"logps/chosen": -243.7238006591797, |
|
"logps/rejected": -266.58013916015625, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.019092101603746414, |
|
"rewards/margins": 0.0009734919294714928, |
|
"rewards/rejected": -0.02006559446454048, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5353741645812988, |
|
"debug/policy_chosen_logps": -268.5412902832031, |
|
"debug/policy_rejected_logits": -1.5154732465744019, |
|
"debug/policy_rejected_logps": -282.08551025390625, |
|
"debug/reference_chosen_logps": -267.7362976074219, |
|
"debug/reference_rejected_logps": -279.14666748046875, |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 23.319868382806877, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5353741645812988, |
|
"logits/rejected": -1.5154732465744019, |
|
"logps/chosen": -268.5412902832031, |
|
"logps/rejected": -282.08551025390625, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.008050002157688141, |
|
"rewards/margins": 0.02133851870894432, |
|
"rewards/rejected": -0.02938852459192276, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4472002983093262, |
|
"debug/policy_chosen_logps": -240.2578125, |
|
"debug/policy_rejected_logits": -1.392478108406067, |
|
"debug/policy_rejected_logps": -324.4321594238281, |
|
"debug/reference_chosen_logps": -234.3917236328125, |
|
"debug/reference_rejected_logps": -319.2920837402344, |
|
"epoch": 0.25, |
|
"grad_norm": 17.992513913858307, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4472002983093262, |
|
"logits/rejected": -1.392478108406067, |
|
"logps/chosen": -240.2578125, |
|
"logps/rejected": -324.4321594238281, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.05866073817014694, |
|
"rewards/margins": -0.007260264828801155, |
|
"rewards/rejected": -0.05140047147870064, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3548312187194824, |
|
"debug/policy_chosen_logps": -231.70309448242188, |
|
"debug/policy_rejected_logits": -1.3657501935958862, |
|
"debug/policy_rejected_logps": -271.7943420410156, |
|
"debug/reference_chosen_logps": -226.82766723632812, |
|
"debug/reference_rejected_logps": -266.321533203125, |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 73.41147441549734, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3548312187194824, |
|
"logits/rejected": -1.3657501935958862, |
|
"logps/chosen": -231.70309448242188, |
|
"logps/rejected": -271.7943420410156, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04875440523028374, |
|
"rewards/margins": 0.0059736063703894615, |
|
"rewards/rejected": -0.054728008806705475, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5568829774856567, |
|
"debug/policy_chosen_logps": -251.14842224121094, |
|
"debug/policy_rejected_logits": -1.3776763677597046, |
|
"debug/policy_rejected_logps": -241.90863037109375, |
|
"debug/reference_chosen_logps": -250.3316650390625, |
|
"debug/reference_rejected_logps": -240.4967803955078, |
|
"epoch": 0.29545454545454547, |
|
"grad_norm": 31.567012366501405, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5568829774856567, |
|
"logits/rejected": -1.3776763677597046, |
|
"logps/chosen": -251.14842224121094, |
|
"logps/rejected": -241.90863037109375, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.008167648687958717, |
|
"rewards/margins": 0.005950965452939272, |
|
"rewards/rejected": -0.014118613675236702, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2634949684143066, |
|
"debug/policy_chosen_logps": -259.905517578125, |
|
"debug/policy_rejected_logits": -1.2515358924865723, |
|
"debug/policy_rejected_logps": -358.7181091308594, |
|
"debug/reference_chosen_logps": -257.572021484375, |
|
"debug/reference_rejected_logps": -352.8564758300781, |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 33.36598159337087, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2634949684143066, |
|
"logits/rejected": -1.2515358924865723, |
|
"logps/chosen": -259.905517578125, |
|
"logps/rejected": -358.7181091308594, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.023335151374340057, |
|
"rewards/margins": 0.03528135269880295, |
|
"rewards/rejected": -0.058616504073143005, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5755295753479004, |
|
"debug/policy_chosen_logps": -271.21990966796875, |
|
"debug/policy_rejected_logits": -1.5602691173553467, |
|
"debug/policy_rejected_logps": -291.444580078125, |
|
"debug/reference_chosen_logps": -269.54534912109375, |
|
"debug/reference_rejected_logps": -286.5006408691406, |
|
"epoch": 0.3409090909090909, |
|
"grad_norm": 19.995661189240433, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5755295753479004, |
|
"logits/rejected": -1.5602691173553467, |
|
"logps/chosen": -271.21990966796875, |
|
"logps/rejected": -291.444580078125, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.01674526184797287, |
|
"rewards/margins": 0.032694172114133835, |
|
"rewards/rejected": -0.049439430236816406, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5511504411697388, |
|
"debug/policy_chosen_logps": -207.6850128173828, |
|
"debug/policy_rejected_logits": -1.3656481504440308, |
|
"debug/policy_rejected_logps": -259.1358642578125, |
|
"debug/reference_chosen_logps": -206.66064453125, |
|
"debug/reference_rejected_logps": -257.42388916015625, |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 20.444894909265596, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5511504411697388, |
|
"logits/rejected": -1.3656481504440308, |
|
"logps/chosen": -207.6850128173828, |
|
"logps/rejected": -259.1358642578125, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.01024345401674509, |
|
"rewards/margins": 0.006875896360725164, |
|
"rewards/rejected": -0.017119349911808968, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.440645456314087, |
|
"debug/policy_chosen_logps": -246.9857940673828, |
|
"debug/policy_rejected_logits": -1.220913290977478, |
|
"debug/policy_rejected_logps": -311.4046325683594, |
|
"debug/reference_chosen_logps": -246.19845581054688, |
|
"debug/reference_rejected_logps": -311.56048583984375, |
|
"epoch": 0.38636363636363635, |
|
"grad_norm": 24.901672749840092, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.440645456314087, |
|
"logits/rejected": -1.220913290977478, |
|
"logps/chosen": -246.9857940673828, |
|
"logps/rejected": -311.4046325683594, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.007873362861573696, |
|
"rewards/margins": -0.009431838989257812, |
|
"rewards/rejected": 0.001558477059006691, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.410476803779602, |
|
"debug/policy_chosen_logps": -272.2285461425781, |
|
"debug/policy_rejected_logits": -1.310629963874817, |
|
"debug/policy_rejected_logps": -228.26686096191406, |
|
"debug/reference_chosen_logps": -270.7967224121094, |
|
"debug/reference_rejected_logps": -229.7943878173828, |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 38.60148122412081, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.410476803779602, |
|
"logits/rejected": -1.310629963874817, |
|
"logps/chosen": -272.2285461425781, |
|
"logps/rejected": -228.26686096191406, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.014318370260298252, |
|
"rewards/margins": -0.029593737795948982, |
|
"rewards/rejected": 0.015275364741683006, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1632928848266602, |
|
"debug/policy_chosen_logps": -240.0203399658203, |
|
"debug/policy_rejected_logits": -1.5429524183273315, |
|
"debug/policy_rejected_logps": -275.91461181640625, |
|
"debug/reference_chosen_logps": -239.4716796875, |
|
"debug/reference_rejected_logps": -274.268310546875, |
|
"epoch": 0.4318181818181818, |
|
"grad_norm": 60.28746463237661, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1632928848266602, |
|
"logits/rejected": -1.5429524183273315, |
|
"logps/chosen": -240.0203399658203, |
|
"logps/rejected": -275.91461181640625, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.005486545152962208, |
|
"rewards/margins": 0.010976830497384071, |
|
"rewards/rejected": -0.016463376581668854, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.521999478340149, |
|
"debug/policy_chosen_logps": -278.55535888671875, |
|
"debug/policy_rejected_logits": -1.4620181322097778, |
|
"debug/policy_rejected_logps": -259.122802734375, |
|
"debug/reference_chosen_logps": -278.84912109375, |
|
"debug/reference_rejected_logps": -258.3992919921875, |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 46.60154697948134, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.521999478340149, |
|
"logits/rejected": -1.4620181322097778, |
|
"logps/chosen": -278.55535888671875, |
|
"logps/rejected": -259.122802734375, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0029377557802945375, |
|
"rewards/margins": 0.010172786191105843, |
|
"rewards/rejected": -0.007235030643641949, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1010836362838745, |
|
"debug/policy_chosen_logps": -272.86285400390625, |
|
"debug/policy_rejected_logits": -1.3311625719070435, |
|
"debug/policy_rejected_logps": -271.2930603027344, |
|
"debug/reference_chosen_logps": -269.4169921875, |
|
"debug/reference_rejected_logps": -273.9818420410156, |
|
"epoch": 0.4772727272727273, |
|
"grad_norm": 17.643110187730265, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1010836362838745, |
|
"logits/rejected": -1.3311625719070435, |
|
"logps/chosen": -272.86285400390625, |
|
"logps/rejected": -271.2930603027344, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.03445838764309883, |
|
"rewards/margins": -0.06134599447250366, |
|
"rewards/rejected": 0.02688760869204998, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5071077346801758, |
|
"debug/policy_chosen_logps": -241.43759155273438, |
|
"debug/policy_rejected_logits": -1.1153310537338257, |
|
"debug/policy_rejected_logps": -331.3087158203125, |
|
"debug/reference_chosen_logps": -245.50453186035156, |
|
"debug/reference_rejected_logps": -333.63140869140625, |
|
"epoch": 0.5, |
|
"grad_norm": 44.46366116213499, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5071077346801758, |
|
"logits/rejected": -1.1153310537338257, |
|
"logps/chosen": -241.43759155273438, |
|
"logps/rejected": -331.3087158203125, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.040669478476047516, |
|
"rewards/margins": 0.017442265525460243, |
|
"rewards/rejected": 0.023227212950587273, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.451465368270874, |
|
"debug/policy_chosen_logps": -212.37417602539062, |
|
"debug/policy_rejected_logits": -1.3166741132736206, |
|
"debug/policy_rejected_logps": -314.9155578613281, |
|
"debug/reference_chosen_logps": -212.66213989257812, |
|
"debug/reference_rejected_logps": -312.25823974609375, |
|
"epoch": 0.5227272727272727, |
|
"grad_norm": 20.606755035100644, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.451465368270874, |
|
"logits/rejected": -1.3166741132736206, |
|
"logps/chosen": -212.37417602539062, |
|
"logps/rejected": -314.9155578613281, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.002879485720768571, |
|
"rewards/margins": 0.029452495276927948, |
|
"rewards/rejected": -0.02657300978899002, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4294952154159546, |
|
"debug/policy_chosen_logps": -204.76612854003906, |
|
"debug/policy_rejected_logits": -1.4408621788024902, |
|
"debug/policy_rejected_logps": -281.35980224609375, |
|
"debug/reference_chosen_logps": -203.98245239257812, |
|
"debug/reference_rejected_logps": -276.5864562988281, |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 38.391670513493104, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4294952154159546, |
|
"logits/rejected": -1.4408621788024902, |
|
"logps/chosen": -204.76612854003906, |
|
"logps/rejected": -281.35980224609375, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.007836684584617615, |
|
"rewards/margins": 0.03989652544260025, |
|
"rewards/rejected": -0.047733210027217865, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.215004801750183, |
|
"debug/policy_chosen_logps": -239.42361450195312, |
|
"debug/policy_rejected_logits": -0.9678577780723572, |
|
"debug/policy_rejected_logps": -284.46954345703125, |
|
"debug/reference_chosen_logps": -234.15310668945312, |
|
"debug/reference_rejected_logps": -274.2942199707031, |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 72.9941160097735, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.215004801750183, |
|
"logits/rejected": -0.9678577780723572, |
|
"logps/chosen": -239.42361450195312, |
|
"logps/rejected": -284.46954345703125, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.052704982459545135, |
|
"rewards/margins": 0.04904847964644432, |
|
"rewards/rejected": -0.10175345838069916, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.391048550605774, |
|
"debug/policy_chosen_logps": -260.8648986816406, |
|
"debug/policy_rejected_logits": -1.4560633897781372, |
|
"debug/policy_rejected_logps": -346.93145751953125, |
|
"debug/reference_chosen_logps": -255.39593505859375, |
|
"debug/reference_rejected_logps": -336.768310546875, |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 36.44224120417283, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.391048550605774, |
|
"logits/rejected": -1.4560633897781372, |
|
"logps/chosen": -260.8648986816406, |
|
"logps/rejected": -346.93145751953125, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05468965321779251, |
|
"rewards/margins": 0.04694166034460068, |
|
"rewards/rejected": -0.10163131356239319, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5338735580444336, |
|
"debug/policy_chosen_logps": -222.9837646484375, |
|
"debug/policy_rejected_logits": -1.471374273300171, |
|
"debug/policy_rejected_logps": -278.50897216796875, |
|
"debug/reference_chosen_logps": -223.839599609375, |
|
"debug/reference_rejected_logps": -273.5242919921875, |
|
"epoch": 0.6136363636363636, |
|
"grad_norm": 18.787474855932984, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5338735580444336, |
|
"logits/rejected": -1.471374273300171, |
|
"logps/chosen": -222.9837646484375, |
|
"logps/rejected": -278.50897216796875, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008558426052331924, |
|
"rewards/margins": 0.05840524658560753, |
|
"rewards/rejected": -0.049846820533275604, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5032626390457153, |
|
"debug/policy_chosen_logps": -267.772705078125, |
|
"debug/policy_rejected_logits": -1.4030882120132446, |
|
"debug/policy_rejected_logps": -366.5946350097656, |
|
"debug/reference_chosen_logps": -263.9977722167969, |
|
"debug/reference_rejected_logps": -362.62274169921875, |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 89.25136180706029, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5032626390457153, |
|
"logits/rejected": -1.4030882120132446, |
|
"logps/chosen": -267.772705078125, |
|
"logps/rejected": -366.5946350097656, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.03774913772940636, |
|
"rewards/margins": 0.0019695255905389786, |
|
"rewards/rejected": -0.039718665182590485, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3853955268859863, |
|
"debug/policy_chosen_logps": -291.3934631347656, |
|
"debug/policy_rejected_logits": -1.288482427597046, |
|
"debug/policy_rejected_logps": -264.8169250488281, |
|
"debug/reference_chosen_logps": -287.48876953125, |
|
"debug/reference_rejected_logps": -263.4806823730469, |
|
"epoch": 0.6590909090909091, |
|
"grad_norm": 18.946606370821538, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3853955268859863, |
|
"logits/rejected": -1.288482427597046, |
|
"logps/chosen": -291.3934631347656, |
|
"logps/rejected": -264.8169250488281, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.03904670476913452, |
|
"rewards/margins": -0.025684315711259842, |
|
"rewards/rejected": -0.013362388126552105, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3024711608886719, |
|
"debug/policy_chosen_logps": -327.91033935546875, |
|
"debug/policy_rejected_logits": -1.5670039653778076, |
|
"debug/policy_rejected_logps": -321.4190368652344, |
|
"debug/reference_chosen_logps": -323.4125061035156, |
|
"debug/reference_rejected_logps": -318.29742431640625, |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 67.52699012019576, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3024711608886719, |
|
"logits/rejected": -1.5670039653778076, |
|
"logps/chosen": -327.91033935546875, |
|
"logps/rejected": -321.4190368652344, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.04497835040092468, |
|
"rewards/margins": -0.013762515038251877, |
|
"rewards/rejected": -0.031215837225317955, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5216386318206787, |
|
"debug/policy_chosen_logps": -257.83624267578125, |
|
"debug/policy_rejected_logits": -1.4058371782302856, |
|
"debug/policy_rejected_logps": -300.9150390625, |
|
"debug/reference_chosen_logps": -257.880126953125, |
|
"debug/reference_rejected_logps": -298.1737060546875, |
|
"epoch": 0.7045454545454546, |
|
"grad_norm": 20.060782775941146, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5216386318206787, |
|
"logits/rejected": -1.4058371782302856, |
|
"logps/chosen": -257.83624267578125, |
|
"logps/rejected": -300.9150390625, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00043886248022317886, |
|
"rewards/margins": 0.027851875871419907, |
|
"rewards/rejected": -0.027413014322519302, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2779229879379272, |
|
"debug/policy_chosen_logps": -201.38796997070312, |
|
"debug/policy_rejected_logits": -1.156309962272644, |
|
"debug/policy_rejected_logps": -257.59429931640625, |
|
"debug/reference_chosen_logps": -206.09375, |
|
"debug/reference_rejected_logps": -257.58074951171875, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 75.52018869219997, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2779229879379272, |
|
"logits/rejected": -1.156309962272644, |
|
"logps/chosen": -201.38796997070312, |
|
"logps/rejected": -257.59429931640625, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04705776274204254, |
|
"rewards/margins": 0.04719316214323044, |
|
"rewards/rejected": -0.00013540498912334442, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5799387693405151, |
|
"debug/policy_chosen_logps": -251.5661163330078, |
|
"debug/policy_rejected_logits": -1.2778719663619995, |
|
"debug/policy_rejected_logps": -302.98028564453125, |
|
"debug/reference_chosen_logps": -253.25216674804688, |
|
"debug/reference_rejected_logps": -301.58880615234375, |
|
"epoch": 0.75, |
|
"grad_norm": 41.55558759207921, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5799387693405151, |
|
"logits/rejected": -1.2778719663619995, |
|
"logps/chosen": -251.5661163330078, |
|
"logps/rejected": -302.98028564453125, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.016860580071806908, |
|
"rewards/margins": 0.030775029212236404, |
|
"rewards/rejected": -0.01391445193439722, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3849577903747559, |
|
"debug/policy_chosen_logps": -243.8576202392578, |
|
"debug/policy_rejected_logits": -1.3409092426300049, |
|
"debug/policy_rejected_logps": -280.698486328125, |
|
"debug/reference_chosen_logps": -245.44943237304688, |
|
"debug/reference_rejected_logps": -283.48333740234375, |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 14.362369526795225, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3849577903747559, |
|
"logits/rejected": -1.3409092426300049, |
|
"logps/chosen": -243.8576202392578, |
|
"logps/rejected": -280.698486328125, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.015918217599391937, |
|
"rewards/margins": -0.011930178850889206, |
|
"rewards/rejected": 0.027848394587635994, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3460434675216675, |
|
"debug/policy_chosen_logps": -281.47344970703125, |
|
"debug/policy_rejected_logits": -1.1573445796966553, |
|
"debug/policy_rejected_logps": -344.3446044921875, |
|
"debug/reference_chosen_logps": -282.89141845703125, |
|
"debug/reference_rejected_logps": -341.33868408203125, |
|
"epoch": 0.7954545454545454, |
|
"grad_norm": 13.1284617391076, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3460434675216675, |
|
"logits/rejected": -1.1573445796966553, |
|
"logps/chosen": -281.47344970703125, |
|
"logps/rejected": -344.3446044921875, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.01417986024171114, |
|
"rewards/margins": 0.04423864185810089, |
|
"rewards/rejected": -0.030058782547712326, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.406193733215332, |
|
"debug/policy_chosen_logps": -207.40655517578125, |
|
"debug/policy_rejected_logits": -1.238633394241333, |
|
"debug/policy_rejected_logps": -262.58428955078125, |
|
"debug/reference_chosen_logps": -213.7412567138672, |
|
"debug/reference_rejected_logps": -260.3867492675781, |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 32.43079232408406, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.406193733215332, |
|
"logits/rejected": -1.238633394241333, |
|
"logps/chosen": -207.40655517578125, |
|
"logps/rejected": -262.58428955078125, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0633469745516777, |
|
"rewards/margins": 0.08532249182462692, |
|
"rewards/rejected": -0.02197551727294922, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6442923545837402, |
|
"debug/policy_chosen_logps": -258.9522705078125, |
|
"debug/policy_rejected_logits": -1.4356112480163574, |
|
"debug/policy_rejected_logps": -293.63323974609375, |
|
"debug/reference_chosen_logps": -264.2718505859375, |
|
"debug/reference_rejected_logps": -291.06591796875, |
|
"epoch": 0.8409090909090909, |
|
"grad_norm": 41.971026053468705, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6442923545837402, |
|
"logits/rejected": -1.4356112480163574, |
|
"logps/chosen": -258.9522705078125, |
|
"logps/rejected": -293.63323974609375, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.05319575220346451, |
|
"rewards/margins": 0.07886872440576553, |
|
"rewards/rejected": -0.025672968477010727, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5740854740142822, |
|
"debug/policy_chosen_logps": -229.13720703125, |
|
"debug/policy_rejected_logits": -1.4220911264419556, |
|
"debug/policy_rejected_logps": -265.41192626953125, |
|
"debug/reference_chosen_logps": -232.80499267578125, |
|
"debug/reference_rejected_logps": -263.7989501953125, |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 13.989957061611456, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5740854740142822, |
|
"logits/rejected": -1.4220911264419556, |
|
"logps/chosen": -229.13720703125, |
|
"logps/rejected": -265.41192626953125, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03667771816253662, |
|
"rewards/margins": 0.05280757695436478, |
|
"rewards/rejected": -0.016129855066537857, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6223005056381226, |
|
"debug/policy_chosen_logps": -289.09027099609375, |
|
"debug/policy_rejected_logits": -1.5821017026901245, |
|
"debug/policy_rejected_logps": -274.63519287109375, |
|
"debug/reference_chosen_logps": -284.13433837890625, |
|
"debug/reference_rejected_logps": -273.46636962890625, |
|
"epoch": 0.8863636363636364, |
|
"grad_norm": 15.865701906281847, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6223005056381226, |
|
"logits/rejected": -1.5821017026901245, |
|
"logps/chosen": -289.09027099609375, |
|
"logps/rejected": -274.63519287109375, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.049559421837329865, |
|
"rewards/margins": -0.03787106275558472, |
|
"rewards/rejected": -0.011688357219099998, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.445629358291626, |
|
"debug/policy_chosen_logps": -197.75921630859375, |
|
"debug/policy_rejected_logits": -1.4035519361495972, |
|
"debug/policy_rejected_logps": -253.138916015625, |
|
"debug/reference_chosen_logps": -195.52206420898438, |
|
"debug/reference_rejected_logps": -253.06124877929688, |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 23.689590897900334, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.445629358291626, |
|
"logits/rejected": -1.4035519361495972, |
|
"logps/chosen": -197.75921630859375, |
|
"logps/rejected": -253.138916015625, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.022371472790837288, |
|
"rewards/margins": -0.021594811230897903, |
|
"rewards/rejected": -0.0007766615599393845, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5050469636917114, |
|
"debug/policy_chosen_logps": -240.60708618164062, |
|
"debug/policy_rejected_logits": -1.2861061096191406, |
|
"debug/policy_rejected_logps": -264.02239990234375, |
|
"debug/reference_chosen_logps": -240.14486694335938, |
|
"debug/reference_rejected_logps": -259.6829833984375, |
|
"epoch": 0.9318181818181818, |
|
"grad_norm": 19.441334795087283, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5050469636917114, |
|
"logits/rejected": -1.2861061096191406, |
|
"logps/chosen": -240.60708618164062, |
|
"logps/rejected": -264.02239990234375, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.004622154403477907, |
|
"rewards/margins": 0.038772162050008774, |
|
"rewards/rejected": -0.04339431971311569, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.679877519607544, |
|
"debug/policy_chosen_logps": -234.7102813720703, |
|
"debug/policy_rejected_logits": -1.480136752128601, |
|
"debug/policy_rejected_logps": -351.16754150390625, |
|
"debug/reference_chosen_logps": -227.78549194335938, |
|
"debug/reference_rejected_logps": -342.3416442871094, |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 50.79320866145651, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.679877519607544, |
|
"logits/rejected": -1.480136752128601, |
|
"logps/chosen": -234.7102813720703, |
|
"logps/rejected": -351.16754150390625, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0692477598786354, |
|
"rewards/margins": 0.019011324271559715, |
|
"rewards/rejected": -0.08825908601284027, |
|
"step": 42 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6371181011199951, |
|
"debug/policy_chosen_logps": -240.50299072265625, |
|
"debug/policy_rejected_logits": -1.487899899482727, |
|
"debug/policy_rejected_logps": -341.2508239746094, |
|
"debug/reference_chosen_logps": -238.32655334472656, |
|
"debug/reference_rejected_logps": -331.7200622558594, |
|
"epoch": 0.9772727272727273, |
|
"grad_norm": 20.71302449718998, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6371181011199951, |
|
"logits/rejected": -1.487899899482727, |
|
"logps/chosen": -240.50299072265625, |
|
"logps/rejected": -341.2508239746094, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.021764487028121948, |
|
"rewards/margins": 0.07354332506656647, |
|
"rewards/rejected": -0.09530781209468842, |
|
"step": 43 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2511305809020996, |
|
"debug/policy_chosen_logps": -212.03353881835938, |
|
"debug/policy_rejected_logits": -1.1813651323318481, |
|
"debug/policy_rejected_logps": -274.3526611328125, |
|
"debug/reference_chosen_logps": -210.2974853515625, |
|
"debug/reference_rejected_logps": -270.35479736328125, |
|
"epoch": 1.0, |
|
"grad_norm": 65.73660428884652, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2511305809020996, |
|
"logits/rejected": -1.1813651323318481, |
|
"logps/chosen": -212.03353881835938, |
|
"logps/rejected": -274.3526611328125, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.017360497266054153, |
|
"rewards/margins": 0.022618159651756287, |
|
"rewards/rejected": -0.03997865691781044, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 44, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4935350160707127, |
|
"train_runtime": 155.2111, |
|
"train_samples_per_second": 17.937, |
|
"train_steps_per_second": 0.283 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 44, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|