|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 288, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 9.275770073122036, |
|
"learning_rate": 9.788889404119949e-09, |
|
"logits/chosen": -2.590585231781006, |
|
"logits/rejected": -2.5664222240448, |
|
"logps/chosen": -80.29847717285156, |
|
"logps/rejected": -53.10200881958008, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 8.275367876853776, |
|
"learning_rate": 9.78888940411995e-08, |
|
"logits/chosen": -2.556431531906128, |
|
"logits/rejected": -2.538444995880127, |
|
"logps/chosen": -87.91434478759766, |
|
"logps/rejected": -81.0243148803711, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.2152777761220932, |
|
"rewards/chosen": 1.1403658390918281e-05, |
|
"rewards/margins": -0.00018849593470804393, |
|
"rewards/rejected": 0.00019989960128441453, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 7.402776854966623, |
|
"learning_rate": 1.95777788082399e-07, |
|
"logits/chosen": -2.6145739555358887, |
|
"logits/rejected": -2.565951347351074, |
|
"logps/chosen": -103.30118560791016, |
|
"logps/rejected": -89.87110137939453, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.26249998807907104, |
|
"rewards/chosen": 0.0007008779793977737, |
|
"rewards/margins": 0.0005141849396750331, |
|
"rewards/rejected": 0.00018669303972274065, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 9.949219453281035, |
|
"learning_rate": 2.8278173946573535e-07, |
|
"logits/chosen": -2.5197548866271973, |
|
"logits/rejected": -2.534052848815918, |
|
"logps/chosen": -67.16825103759766, |
|
"logps/rejected": -74.81047821044922, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.003788034664466977, |
|
"rewards/margins": 0.002163059776648879, |
|
"rewards/rejected": 0.00162497500423342, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 6.4502947310265, |
|
"learning_rate": 2.7182120692830374e-07, |
|
"logits/chosen": -2.5752604007720947, |
|
"logits/rejected": -2.562830924987793, |
|
"logps/chosen": -71.4312973022461, |
|
"logps/rejected": -70.39434814453125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 0.010071559809148312, |
|
"rewards/margins": 0.005580292548984289, |
|
"rewards/rejected": 0.004491268657147884, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 9.621064053414667, |
|
"learning_rate": 2.6086067439087217e-07, |
|
"logits/chosen": -2.480384111404419, |
|
"logits/rejected": -2.492711067199707, |
|
"logps/chosen": -49.86461639404297, |
|
"logps/rejected": -58.42780685424805, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.23125000298023224, |
|
"rewards/chosen": 0.014595480635762215, |
|
"rewards/margins": 0.00781795009970665, |
|
"rewards/rejected": 0.006777531001716852, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 8.026421482041735, |
|
"learning_rate": 2.4990014185344056e-07, |
|
"logits/chosen": -2.5557591915130615, |
|
"logits/rejected": -2.5367629528045654, |
|
"logps/chosen": -77.70902252197266, |
|
"logps/rejected": -79.41876983642578, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.019152989611029625, |
|
"rewards/margins": 0.020297441631555557, |
|
"rewards/rejected": -0.0011444514384493232, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 9.050000945016462, |
|
"learning_rate": 2.3893960931600894e-07, |
|
"logits/chosen": -2.519299030303955, |
|
"logits/rejected": -2.5085341930389404, |
|
"logps/chosen": -94.42185974121094, |
|
"logps/rejected": -86.42499542236328, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.02707870863378048, |
|
"rewards/margins": 0.040806300938129425, |
|
"rewards/rejected": -0.013727596029639244, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 12.300153236365405, |
|
"learning_rate": 2.2797907677857735e-07, |
|
"logits/chosen": -2.5100162029266357, |
|
"logits/rejected": -2.4531941413879395, |
|
"logps/chosen": -82.90911865234375, |
|
"logps/rejected": -81.5802230834961, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.038527704775333405, |
|
"rewards/margins": 0.06947566568851471, |
|
"rewards/rejected": -0.030947959050536156, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 12.056245748843725, |
|
"learning_rate": 2.1701854424114576e-07, |
|
"logits/chosen": -2.449734687805176, |
|
"logits/rejected": -2.444124698638916, |
|
"logps/chosen": -56.5576057434082, |
|
"logps/rejected": -69.25230407714844, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.26249998807907104, |
|
"rewards/chosen": -0.03629917651414871, |
|
"rewards/margins": 0.04515077546238899, |
|
"rewards/rejected": -0.081449955701828, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 10.09831393288397, |
|
"learning_rate": 2.0605801170371414e-07, |
|
"logits/chosen": -2.4516613483428955, |
|
"logits/rejected": -2.431792736053467, |
|
"logps/chosen": -72.71832275390625, |
|
"logps/rejected": -80.36412048339844, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.011315432377159595, |
|
"rewards/margins": 0.12874236702919006, |
|
"rewards/rejected": -0.11742694675922394, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"eval_logits/chosen": -2.4856722354888916, |
|
"eval_logits/rejected": -2.4671130180358887, |
|
"eval_logps/chosen": -77.75892639160156, |
|
"eval_logps/rejected": -89.28395080566406, |
|
"eval_loss": 0.6659004092216492, |
|
"eval_rewards/accuracies": 0.32341268658638, |
|
"eval_rewards/chosen": -0.04601436108350754, |
|
"eval_rewards/margins": 0.06882937997579575, |
|
"eval_rewards/rejected": -0.11484373360872269, |
|
"eval_runtime": 113.747, |
|
"eval_samples_per_second": 17.583, |
|
"eval_steps_per_second": 0.554, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1458333333333333, |
|
"grad_norm": 12.658367804317662, |
|
"learning_rate": 1.9509747916628253e-07, |
|
"logits/chosen": -2.4512288570404053, |
|
"logits/rejected": -2.4872188568115234, |
|
"logps/chosen": -66.1578369140625, |
|
"logps/rejected": -93.39311218261719, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.011113283224403858, |
|
"rewards/margins": 0.15658384561538696, |
|
"rewards/rejected": -0.16769713163375854, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 16.005314541222447, |
|
"learning_rate": 1.8413694662885094e-07, |
|
"logits/chosen": -2.4917149543762207, |
|
"logits/rejected": -2.4566116333007812, |
|
"logps/chosen": -105.62614440917969, |
|
"logps/rejected": -108.29341125488281, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.033508118242025375, |
|
"rewards/margins": 0.24412801861763, |
|
"rewards/rejected": -0.21061992645263672, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3541666666666667, |
|
"grad_norm": 18.85056869781382, |
|
"learning_rate": 1.7317641409141935e-07, |
|
"logits/chosen": -2.3912250995635986, |
|
"logits/rejected": -2.3987691402435303, |
|
"logps/chosen": -87.25572204589844, |
|
"logps/rejected": -103.68363189697266, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.00971260853111744, |
|
"rewards/margins": 0.19231685996055603, |
|
"rewards/rejected": -0.20202946662902832, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4583333333333333, |
|
"grad_norm": 22.031793806569805, |
|
"learning_rate": 1.6221588155398773e-07, |
|
"logits/chosen": -2.375826358795166, |
|
"logits/rejected": -2.40364408493042, |
|
"logps/chosen": -50.64940643310547, |
|
"logps/rejected": -78.66454315185547, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.019174393266439438, |
|
"rewards/margins": 0.17153134942054749, |
|
"rewards/rejected": -0.19070573151111603, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 16.53568031633389, |
|
"learning_rate": 1.5125534901655614e-07, |
|
"logits/chosen": -2.4368748664855957, |
|
"logits/rejected": -2.4099836349487305, |
|
"logps/chosen": -78.88619232177734, |
|
"logps/rejected": -97.15766906738281, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.09488637745380402, |
|
"rewards/margins": 0.2371809184551239, |
|
"rewards/rejected": -0.3320673108100891, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 18.221103188672174, |
|
"learning_rate": 1.4029481647912452e-07, |
|
"logits/chosen": -2.391329050064087, |
|
"logits/rejected": -2.402101993560791, |
|
"logps/chosen": -87.55098724365234, |
|
"logps/rejected": -114.4619369506836, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0933021530508995, |
|
"rewards/margins": 0.2802050709724426, |
|
"rewards/rejected": -0.3735072910785675, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7708333333333335, |
|
"grad_norm": 17.1043150905885, |
|
"learning_rate": 1.293342839416929e-07, |
|
"logits/chosen": -2.3455810546875, |
|
"logits/rejected": -2.3396453857421875, |
|
"logps/chosen": -77.65857696533203, |
|
"logps/rejected": -104.78330993652344, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.034337688237428665, |
|
"rewards/margins": 0.27259936928749084, |
|
"rewards/rejected": -0.306937038898468, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 21.686788778311172, |
|
"learning_rate": 1.1837375140426132e-07, |
|
"logits/chosen": -2.368490219116211, |
|
"logits/rejected": -2.337435483932495, |
|
"logps/chosen": -78.26847076416016, |
|
"logps/rejected": -98.10568237304688, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.09030274301767349, |
|
"rewards/margins": 0.25460249185562134, |
|
"rewards/rejected": -0.3449052572250366, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9791666666666665, |
|
"grad_norm": 18.213789337934347, |
|
"learning_rate": 1.0741321886682972e-07, |
|
"logits/chosen": -2.3731675148010254, |
|
"logits/rejected": -2.3687660694122314, |
|
"logps/chosen": -88.2901611328125, |
|
"logps/rejected": -111.50384521484375, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.00241573597304523, |
|
"rewards/margins": 0.29647231101989746, |
|
"rewards/rejected": -0.29405659437179565, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 17.365297005169634, |
|
"learning_rate": 9.645268632939811e-08, |
|
"logits/chosen": -2.3061447143554688, |
|
"logits/rejected": -2.3294944763183594, |
|
"logps/chosen": -89.75189208984375, |
|
"logps/rejected": -133.4348907470703, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.01052729319781065, |
|
"rewards/margins": 0.43865451216697693, |
|
"rewards/rejected": -0.44918179512023926, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"eval_logits/chosen": -2.3414034843444824, |
|
"eval_logits/rejected": -2.319497585296631, |
|
"eval_logps/chosen": -84.86211395263672, |
|
"eval_logps/rejected": -102.20085906982422, |
|
"eval_loss": 0.6495013236999512, |
|
"eval_rewards/accuracies": 0.329365074634552, |
|
"eval_rewards/chosen": -0.14603844285011292, |
|
"eval_rewards/margins": 0.1506960541009903, |
|
"eval_rewards/rejected": -0.296734482049942, |
|
"eval_runtime": 113.581, |
|
"eval_samples_per_second": 17.609, |
|
"eval_steps_per_second": 0.555, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.1875, |
|
"grad_norm": 36.763502445523656, |
|
"learning_rate": 8.549215379196651e-08, |
|
"logits/chosen": -2.348222017288208, |
|
"logits/rejected": -2.3373069763183594, |
|
"logps/chosen": -64.14533996582031, |
|
"logps/rejected": -96.91500854492188, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0077634840272367, |
|
"rewards/margins": 0.3896782100200653, |
|
"rewards/rejected": -0.39744165539741516, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.2916666666666665, |
|
"grad_norm": 20.09517432602137, |
|
"learning_rate": 7.453162125453489e-08, |
|
"logits/chosen": -2.359616756439209, |
|
"logits/rejected": -2.295492172241211, |
|
"logps/chosen": -106.1574478149414, |
|
"logps/rejected": -127.97090911865234, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.057844799011945724, |
|
"rewards/margins": 0.44004330039024353, |
|
"rewards/rejected": -0.4978880286216736, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.3958333333333335, |
|
"grad_norm": 14.900227896678382, |
|
"learning_rate": 6.35710887171033e-08, |
|
"logits/chosen": -2.3140273094177246, |
|
"logits/rejected": -2.2706756591796875, |
|
"logps/chosen": -60.497161865234375, |
|
"logps/rejected": -80.89750671386719, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.009366447106003761, |
|
"rewards/margins": 0.3419914245605469, |
|
"rewards/rejected": -0.3513578772544861, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 19.49103632925891, |
|
"learning_rate": 5.2610556179671694e-08, |
|
"logits/chosen": -2.2797024250030518, |
|
"logits/rejected": -2.210596799850464, |
|
"logps/chosen": -90.98292541503906, |
|
"logps/rejected": -107.18631744384766, |
|
"loss": 0.5714, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.04347435384988785, |
|
"rewards/margins": 0.3474506735801697, |
|
"rewards/rejected": -0.39092501997947693, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.6041666666666665, |
|
"grad_norm": 25.396423499917123, |
|
"learning_rate": 4.16500236422401e-08, |
|
"logits/chosen": -2.221781015396118, |
|
"logits/rejected": -2.205749034881592, |
|
"logps/chosen": -81.74434661865234, |
|
"logps/rejected": -102.39741516113281, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.1277952641248703, |
|
"rewards/margins": 0.32224446535110474, |
|
"rewards/rejected": -0.45003968477249146, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.7083333333333335, |
|
"grad_norm": 29.7738794213807, |
|
"learning_rate": 3.0689491104808494e-08, |
|
"logits/chosen": -2.2012267112731934, |
|
"logits/rejected": -2.2187840938568115, |
|
"logps/chosen": -59.17329025268555, |
|
"logps/rejected": -96.04655456542969, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.014679071493446827, |
|
"rewards/margins": 0.31036660075187683, |
|
"rewards/rejected": -0.325045645236969, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.8125, |
|
"grad_norm": 19.448135836458437, |
|
"learning_rate": 1.9728958567376888e-08, |
|
"logits/chosen": -2.205172061920166, |
|
"logits/rejected": -2.253237009048462, |
|
"logps/chosen": -76.06713104248047, |
|
"logps/rejected": -105.37796783447266, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.008548146113753319, |
|
"rewards/margins": 0.39001256227493286, |
|
"rewards/rejected": -0.381464421749115, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 27.69086027593312, |
|
"learning_rate": 8.768426029945283e-09, |
|
"logits/chosen": -2.263256788253784, |
|
"logits/rejected": -2.214367389678955, |
|
"logps/chosen": -84.65340423583984, |
|
"logps/rejected": -112.45893859863281, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.009914604015648365, |
|
"rewards/margins": 0.4335610270500183, |
|
"rewards/rejected": -0.4236464500427246, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 288, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6178496645556556, |
|
"train_runtime": 3593.4638, |
|
"train_samples_per_second": 5.103, |
|
"train_steps_per_second": 0.08 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 288, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|