|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988571428571429, |
|
"eval_steps": 50, |
|
"global_step": 437, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 6.795239341624469, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.700852632522583, |
|
"logits/rejected": -2.6250014305114746, |
|
"logps/chosen": -301.27313232421875, |
|
"logps/rejected": -281.78619384765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0001308169448748231, |
|
"rewards/margins": 0.0004958957433700562, |
|
"rewards/rejected": -0.00036507885670289397, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 5.31428372226332, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6415421962738037, |
|
"logits/rejected": -2.606222629547119, |
|
"logps/chosen": -278.8970642089844, |
|
"logps/rejected": -254.64749145507812, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0027411712799221277, |
|
"rewards/margins": 0.001525188097730279, |
|
"rewards/rejected": 0.001215982949361205, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06857142857142857, |
|
"grad_norm": 5.9664481153189435, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.638169765472412, |
|
"logits/rejected": -2.617159843444824, |
|
"logps/chosen": -263.23223876953125, |
|
"logps/rejected": -263.40374755859375, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.014508177526295185, |
|
"rewards/margins": 0.00861530750989914, |
|
"rewards/rejected": 0.0058928681537508965, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 6.667336557428276, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.648975372314453, |
|
"logits/rejected": -2.585244655609131, |
|
"logps/chosen": -290.2044372558594, |
|
"logps/rejected": -268.3276062011719, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.037928324192762375, |
|
"rewards/margins": 0.044891245663166046, |
|
"rewards/rejected": -0.006962914951145649, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 9.813117329804816, |
|
"learning_rate": 4.997124959943201e-07, |
|
"logits/chosen": -2.6792047023773193, |
|
"logits/rejected": -2.5978188514709473, |
|
"logps/chosen": -293.65264892578125, |
|
"logps/rejected": -254.2649688720703, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.026576850563287735, |
|
"rewards/margins": 0.10058300197124481, |
|
"rewards/rejected": -0.07400616258382797, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"eval_logits/chosen": -2.541201591491699, |
|
"eval_logits/rejected": -2.4377598762512207, |
|
"eval_logps/chosen": -276.20166015625, |
|
"eval_logps/rejected": -235.61155700683594, |
|
"eval_loss": 0.6532372832298279, |
|
"eval_rewards/accuracies": 0.6896551847457886, |
|
"eval_rewards/chosen": -0.005977254826575518, |
|
"eval_rewards/margins": 0.15937723219394684, |
|
"eval_rewards/rejected": -0.16535447537899017, |
|
"eval_runtime": 91.1786, |
|
"eval_samples_per_second": 20.081, |
|
"eval_steps_per_second": 0.318, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13714285714285715, |
|
"grad_norm": 8.50170881260791, |
|
"learning_rate": 4.979579212164186e-07, |
|
"logits/chosen": -2.5797510147094727, |
|
"logits/rejected": -2.472832202911377, |
|
"logps/chosen": -293.24212646484375, |
|
"logps/rejected": -275.13885498046875, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.12736138701438904, |
|
"rewards/margins": 0.1385059803724289, |
|
"rewards/rejected": -0.2658673822879791, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 9.027696167666651, |
|
"learning_rate": 4.946196886175515e-07, |
|
"logits/chosen": -2.5882785320281982, |
|
"logits/rejected": -2.539330005645752, |
|
"logps/chosen": -293.43145751953125, |
|
"logps/rejected": -300.1482849121094, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.17653189599514008, |
|
"rewards/margins": 0.22868318855762482, |
|
"rewards/rejected": -0.4052151143550873, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 10.603734895101157, |
|
"learning_rate": 4.897191188239667e-07, |
|
"logits/chosen": -2.623680591583252, |
|
"logits/rejected": -2.5742952823638916, |
|
"logps/chosen": -285.3603820800781, |
|
"logps/rejected": -306.60211181640625, |
|
"loss": 0.6123, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.18331322073936462, |
|
"rewards/margins": 0.3296189308166504, |
|
"rewards/rejected": -0.5129320621490479, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2057142857142857, |
|
"grad_norm": 15.25024463895093, |
|
"learning_rate": 4.832875107981763e-07, |
|
"logits/chosen": -2.6875650882720947, |
|
"logits/rejected": -2.6345021724700928, |
|
"logps/chosen": -295.8832092285156, |
|
"logps/rejected": -313.13983154296875, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.19597890973091125, |
|
"rewards/margins": 0.37993985414505005, |
|
"rewards/rejected": -0.5759187340736389, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 12.34704700331818, |
|
"learning_rate": 4.753659419387223e-07, |
|
"logits/chosen": -2.679297685623169, |
|
"logits/rejected": -2.5944952964782715, |
|
"logps/chosen": -330.1031799316406, |
|
"logps/rejected": -318.5290832519531, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.43716782331466675, |
|
"rewards/margins": 0.4136085510253906, |
|
"rewards/rejected": -0.8507764935493469, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"eval_logits/chosen": -2.493269920349121, |
|
"eval_logits/rejected": -2.3746213912963867, |
|
"eval_logps/chosen": -356.7496643066406, |
|
"eval_logps/rejected": -346.2107238769531, |
|
"eval_loss": 0.612766683101654, |
|
"eval_rewards/accuracies": 0.7112069129943848, |
|
"eval_rewards/chosen": -0.8114572167396545, |
|
"eval_rewards/margins": 0.4598887860774994, |
|
"eval_rewards/rejected": -1.2713459730148315, |
|
"eval_runtime": 90.1893, |
|
"eval_samples_per_second": 20.302, |
|
"eval_steps_per_second": 0.322, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25142857142857145, |
|
"grad_norm": 13.527645485553371, |
|
"learning_rate": 4.660050057270191e-07, |
|
"logits/chosen": -2.269178628921509, |
|
"logits/rejected": -2.168506622314453, |
|
"logps/chosen": -387.71820068359375, |
|
"logps/rejected": -411.88427734375, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7141460180282593, |
|
"rewards/margins": 0.4256005883216858, |
|
"rewards/rejected": -1.1397466659545898, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 17.046687026346753, |
|
"learning_rate": 4.5526448859687144e-07, |
|
"logits/chosen": -1.293348789215088, |
|
"logits/rejected": -0.927165687084198, |
|
"logps/chosen": -381.55316162109375, |
|
"logps/rejected": -354.0766906738281, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7064869403839111, |
|
"rewards/margins": 0.5171381831169128, |
|
"rewards/rejected": -1.2236251831054688, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29714285714285715, |
|
"grad_norm": 19.945418074044913, |
|
"learning_rate": 4.432129880904388e-07, |
|
"logits/chosen": -0.14520399272441864, |
|
"logits/rejected": 0.31017133593559265, |
|
"logps/chosen": -394.09820556640625, |
|
"logps/rejected": -395.47674560546875, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8894233703613281, |
|
"rewards/margins": 0.5606414675712585, |
|
"rewards/rejected": -1.4500648975372314, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 23.269894199893105, |
|
"learning_rate": 4.299274747394055e-07, |
|
"logits/chosen": 0.3922499716281891, |
|
"logits/rejected": 0.7626418471336365, |
|
"logps/chosen": -402.1969299316406, |
|
"logps/rejected": -436.99725341796875, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8674923777580261, |
|
"rewards/margins": 0.783365786075592, |
|
"rewards/rejected": -1.6508581638336182, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 18.255646968547413, |
|
"learning_rate": 4.1549280046953653e-07, |
|
"logits/chosen": -0.056426752358675, |
|
"logits/rejected": 0.6437274813652039, |
|
"logps/chosen": -360.7496032714844, |
|
"logps/rejected": -432.40399169921875, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7376368641853333, |
|
"rewards/margins": 0.8234134912490845, |
|
"rewards/rejected": -1.5610501766204834, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"eval_logits/chosen": 0.20309801399707794, |
|
"eval_logits/rejected": 1.3727048635482788, |
|
"eval_logps/chosen": -358.7465515136719, |
|
"eval_logps/rejected": -413.1859436035156, |
|
"eval_loss": 0.5486596822738647, |
|
"eval_rewards/accuracies": 0.767241358757019, |
|
"eval_rewards/chosen": -0.8314265012741089, |
|
"eval_rewards/margins": 1.1096714735031128, |
|
"eval_rewards/rejected": -1.9410980939865112, |
|
"eval_runtime": 90.1892, |
|
"eval_samples_per_second": 20.302, |
|
"eval_steps_per_second": 0.322, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3657142857142857, |
|
"grad_norm": 20.04208599875873, |
|
"learning_rate": 4.000011566683401e-07, |
|
"logits/chosen": 0.4694085121154785, |
|
"logits/rejected": 1.3121615648269653, |
|
"logps/chosen": -412.69488525390625, |
|
"logps/rejected": -459.99188232421875, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1335456371307373, |
|
"rewards/margins": 0.9099456071853638, |
|
"rewards/rejected": -2.0434913635253906, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38857142857142857, |
|
"grad_norm": 22.516250506361718, |
|
"learning_rate": 3.8355148537705047e-07, |
|
"logits/chosen": 0.1457391083240509, |
|
"logits/rejected": 0.8692816495895386, |
|
"logps/chosen": -395.64947509765625, |
|
"logps/rejected": -417.6402282714844, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9154456257820129, |
|
"rewards/margins": 0.6036463379859924, |
|
"rewards/rejected": -1.5190918445587158, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4114285714285714, |
|
"grad_norm": 20.567923807377685, |
|
"learning_rate": 3.662488473675315e-07, |
|
"logits/chosen": 0.6181103587150574, |
|
"logits/rejected": 1.7128187417984009, |
|
"logps/chosen": -436.68780517578125, |
|
"logps/rejected": -469.717041015625, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.065079927444458, |
|
"rewards/margins": 1.0325844287872314, |
|
"rewards/rejected": -2.0976643562316895, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4342857142857143, |
|
"grad_norm": 20.909674986872478, |
|
"learning_rate": 3.48203751140067e-07, |
|
"logits/chosen": 1.2501403093338013, |
|
"logits/rejected": 2.2078864574432373, |
|
"logps/chosen": -380.656982421875, |
|
"logps/rejected": -409.70556640625, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0439938306808472, |
|
"rewards/margins": 0.7080799341201782, |
|
"rewards/rejected": -1.7520736455917358, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 21.67729434989596, |
|
"learning_rate": 3.2953144712759537e-07, |
|
"logits/chosen": 0.7689538598060608, |
|
"logits/rejected": 1.9063518047332764, |
|
"logps/chosen": -359.4909362792969, |
|
"logps/rejected": -411.184814453125, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9114822149276733, |
|
"rewards/margins": 0.91156005859375, |
|
"rewards/rejected": -1.8230421543121338, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"eval_logits/chosen": 1.3441277742385864, |
|
"eval_logits/rejected": 2.707573652267456, |
|
"eval_logps/chosen": -374.5489501953125, |
|
"eval_logps/rejected": -426.7857971191406, |
|
"eval_loss": 0.5358834266662598, |
|
"eval_rewards/accuracies": 0.7543103694915771, |
|
"eval_rewards/chosen": -0.9894503355026245, |
|
"eval_rewards/margins": 1.087646245956421, |
|
"eval_rewards/rejected": -2.077096462249756, |
|
"eval_runtime": 90.1648, |
|
"eval_samples_per_second": 20.307, |
|
"eval_steps_per_second": 0.322, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 19.28945802551147, |
|
"learning_rate": 3.103511916141658e-07, |
|
"logits/chosen": 1.5224826335906982, |
|
"logits/rejected": 2.394577741622925, |
|
"logps/chosen": -385.7353210449219, |
|
"logps/rejected": -451.604248046875, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2228174209594727, |
|
"rewards/margins": 0.8404253125190735, |
|
"rewards/rejected": -2.0632426738739014, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5028571428571429, |
|
"grad_norm": 24.47080032118637, |
|
"learning_rate": 2.9078548506882117e-07, |
|
"logits/chosen": 1.5350468158721924, |
|
"logits/rejected": 2.541968822479248, |
|
"logps/chosen": -425.51287841796875, |
|
"logps/rejected": -466.1084899902344, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4044690132141113, |
|
"rewards/margins": 0.794781506061554, |
|
"rewards/rejected": -2.1992506980895996, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5257142857142857, |
|
"grad_norm": 20.61426463626924, |
|
"learning_rate": 2.709592897595191e-07, |
|
"logits/chosen": 1.438730001449585, |
|
"logits/rejected": 2.638312816619873, |
|
"logps/chosen": -390.794189453125, |
|
"logps/rejected": -433.10406494140625, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0973504781723022, |
|
"rewards/margins": 0.8408235311508179, |
|
"rewards/rejected": -1.9381740093231201, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5485714285714286, |
|
"grad_norm": 31.905445593128672, |
|
"learning_rate": 2.509992316440332e-07, |
|
"logits/chosen": 1.2066385746002197, |
|
"logits/rejected": 2.3449177742004395, |
|
"logps/chosen": -413.14825439453125, |
|
"logps/rejected": -506.625, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.1443836688995361, |
|
"rewards/margins": 1.2076470851898193, |
|
"rewards/rejected": -2.3520307540893555, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 18.97837160736367, |
|
"learning_rate": 2.3103279163519918e-07, |
|
"logits/chosen": 0.9885716438293457, |
|
"logits/rejected": 1.7852414846420288, |
|
"logps/chosen": -384.52496337890625, |
|
"logps/rejected": -472.253662109375, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0389870405197144, |
|
"rewards/margins": 0.9815452694892883, |
|
"rewards/rejected": -2.0205321311950684, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"eval_logits/chosen": 0.858768880367279, |
|
"eval_logits/rejected": 2.412114381790161, |
|
"eval_logps/chosen": -365.6370544433594, |
|
"eval_logps/rejected": -425.7062683105469, |
|
"eval_loss": 0.528998613357544, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -0.9003310203552246, |
|
"eval_rewards/margins": 1.1659703254699707, |
|
"eval_rewards/rejected": -2.0663013458251953, |
|
"eval_runtime": 90.3653, |
|
"eval_samples_per_second": 20.262, |
|
"eval_steps_per_second": 0.321, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5942857142857143, |
|
"grad_norm": 21.94464499251825, |
|
"learning_rate": 2.1118749140573358e-07, |
|
"logits/chosen": 1.5066580772399902, |
|
"logits/rejected": 2.079137086868286, |
|
"logps/chosen": -411.3843688964844, |
|
"logps/rejected": -482.978515625, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3159770965576172, |
|
"rewards/margins": 0.7803784608840942, |
|
"rewards/rejected": -2.096355438232422, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6171428571428571, |
|
"grad_norm": 23.287724561115347, |
|
"learning_rate": 1.9159007893272703e-07, |
|
"logits/chosen": 1.869363784790039, |
|
"logits/rejected": 3.169628620147705, |
|
"logps/chosen": -400.696533203125, |
|
"logps/rejected": -456.28155517578125, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2817071676254272, |
|
"rewards/margins": 0.9760338664054871, |
|
"rewards/rejected": -2.2577412128448486, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 26.1145325639797, |
|
"learning_rate": 1.7236571898357766e-07, |
|
"logits/chosen": 2.085681438446045, |
|
"logits/rejected": 2.909884214401245, |
|
"logps/chosen": -402.3949890136719, |
|
"logps/rejected": -493.7689514160156, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2935690879821777, |
|
"rewards/margins": 1.0130523443222046, |
|
"rewards/rejected": -2.3066213130950928, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6628571428571428, |
|
"grad_norm": 28.3817297395316, |
|
"learning_rate": 1.5363719371356882e-07, |
|
"logits/chosen": 1.904044508934021, |
|
"logits/rejected": 2.7162575721740723, |
|
"logps/chosen": -424.409912109375, |
|
"logps/rejected": -482.04913330078125, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2350399494171143, |
|
"rewards/margins": 0.9299663305282593, |
|
"rewards/rejected": -2.165006399154663, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 17.85129410356221, |
|
"learning_rate": 1.3552411848071565e-07, |
|
"logits/chosen": 1.697782278060913, |
|
"logits/rejected": 3.180041551589966, |
|
"logps/chosen": -419.85028076171875, |
|
"logps/rejected": -478.419677734375, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1794109344482422, |
|
"rewards/margins": 1.078364610671997, |
|
"rewards/rejected": -2.2577755451202393, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"eval_logits/chosen": 1.7084869146347046, |
|
"eval_logits/rejected": 3.311720132827759, |
|
"eval_logps/chosen": -371.6744689941406, |
|
"eval_logps/rejected": -439.6499938964844, |
|
"eval_loss": 0.5213173031806946, |
|
"eval_rewards/accuracies": 0.7715517282485962, |
|
"eval_rewards/chosen": -0.9607052206993103, |
|
"eval_rewards/margins": 1.2450333833694458, |
|
"eval_rewards/rejected": -2.2057385444641113, |
|
"eval_runtime": 89.9422, |
|
"eval_samples_per_second": 20.358, |
|
"eval_steps_per_second": 0.322, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7085714285714285, |
|
"grad_norm": 22.76802438882901, |
|
"learning_rate": 1.1814217788631473e-07, |
|
"logits/chosen": 1.900792121887207, |
|
"logits/rejected": 2.7918269634246826, |
|
"logps/chosen": -372.843994140625, |
|
"logps/rejected": -442.9312438964844, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1653985977172852, |
|
"rewards/margins": 0.8919604420661926, |
|
"rewards/rejected": -2.057358980178833, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 19.73975657149685, |
|
"learning_rate": 1.0160238692045331e-07, |
|
"logits/chosen": 2.1896469593048096, |
|
"logits/rejected": 2.8715972900390625, |
|
"logps/chosen": -380.424560546875, |
|
"logps/rejected": -454.2293395996094, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3614219427108765, |
|
"rewards/margins": 0.7709897756576538, |
|
"rewards/rejected": -2.132411479949951, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7542857142857143, |
|
"grad_norm": 29.56922781200817, |
|
"learning_rate": 8.601038193139438e-08, |
|
"logits/chosen": 1.6053155660629272, |
|
"logits/rejected": 2.692516565322876, |
|
"logps/chosen": -416.57342529296875, |
|
"logps/rejected": -465.4991760253906, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1735525131225586, |
|
"rewards/margins": 1.003560185432434, |
|
"rewards/rejected": -2.177112579345703, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7771428571428571, |
|
"grad_norm": 18.098670935967576, |
|
"learning_rate": 7.146574594727572e-08, |
|
"logits/chosen": 2.0766067504882812, |
|
"logits/rejected": 2.8303616046905518, |
|
"logps/chosen": -387.4620361328125, |
|
"logps/rejected": -468.67718505859375, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.2151105403900146, |
|
"rewards/margins": 1.0514241456985474, |
|
"rewards/rejected": -2.2665345668792725, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 20.794164513921476, |
|
"learning_rate": 5.8061372659157306e-08, |
|
"logits/chosen": 1.6319509744644165, |
|
"logits/rejected": 2.7972917556762695, |
|
"logps/chosen": -412.102783203125, |
|
"logps/rejected": -458.27191162109375, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2239553928375244, |
|
"rewards/margins": 0.8157873153686523, |
|
"rewards/rejected": -2.0397427082061768, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": 2.0842368602752686, |
|
"eval_logits/rejected": 3.6707816123962402, |
|
"eval_logps/chosen": -389.46490478515625, |
|
"eval_logps/rejected": -456.7085266113281, |
|
"eval_loss": 0.5216463804244995, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -1.1386092901229858, |
|
"eval_rewards/margins": 1.237714409828186, |
|
"eval_rewards/rejected": -2.3763234615325928, |
|
"eval_runtime": 89.8616, |
|
"eval_samples_per_second": 20.376, |
|
"eval_steps_per_second": 0.323, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8228571428571428, |
|
"grad_norm": 18.184259604484346, |
|
"learning_rate": 4.5882873127531614e-08, |
|
"logits/chosen": 1.648209810256958, |
|
"logits/rejected": 2.9181623458862305, |
|
"logps/chosen": -407.1295166015625, |
|
"logps/rejected": -477.27447509765625, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.217245101928711, |
|
"rewards/margins": 1.044634222984314, |
|
"rewards/rejected": -2.2618794441223145, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8457142857142858, |
|
"grad_norm": 19.108285818305696, |
|
"learning_rate": 3.500802900154412e-08, |
|
"logits/chosen": 1.801898717880249, |
|
"logits/rejected": 3.196338176727295, |
|
"logps/chosen": -383.25311279296875, |
|
"logps/rejected": -463.01727294921875, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1219167709350586, |
|
"rewards/margins": 1.1241002082824707, |
|
"rewards/rejected": -2.2460172176361084, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8685714285714285, |
|
"grad_norm": 23.620382836684982, |
|
"learning_rate": 2.550629574310309e-08, |
|
"logits/chosen": 1.4818474054336548, |
|
"logits/rejected": 2.90739107131958, |
|
"logps/chosen": -453.0061950683594, |
|
"logps/rejected": -476.94830322265625, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.349498987197876, |
|
"rewards/margins": 0.8459898233413696, |
|
"rewards/rejected": -2.195488691329956, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8914285714285715, |
|
"grad_norm": 22.14239335519297, |
|
"learning_rate": 1.7438359028687983e-08, |
|
"logits/chosen": 1.8351167440414429, |
|
"logits/rejected": 2.6260292530059814, |
|
"logps/chosen": -425.75128173828125, |
|
"logps/rejected": -503.3841857910156, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1437828540802002, |
|
"rewards/margins": 0.9423319697380066, |
|
"rewards/rejected": -2.0861151218414307, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 32.206706951444914, |
|
"learning_rate": 1.0855747162029361e-08, |
|
"logits/chosen": 2.132110357284546, |
|
"logits/rejected": 2.6392226219177246, |
|
"logps/chosen": -411.29962158203125, |
|
"logps/rejected": -477.0232849121094, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3256638050079346, |
|
"rewards/margins": 0.7788330316543579, |
|
"rewards/rejected": -2.104496955871582, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"eval_logits/chosen": 2.117452621459961, |
|
"eval_logits/rejected": 3.7050397396087646, |
|
"eval_logps/chosen": -386.83795166015625, |
|
"eval_logps/rejected": -455.0307312011719, |
|
"eval_loss": 0.520908772945404, |
|
"eval_rewards/accuracies": 0.767241358757019, |
|
"eval_rewards/chosen": -1.112339973449707, |
|
"eval_rewards/margins": 1.2472059726715088, |
|
"eval_rewards/rejected": -2.3595457077026367, |
|
"eval_runtime": 90.8703, |
|
"eval_samples_per_second": 20.15, |
|
"eval_steps_per_second": 0.319, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9371428571428572, |
|
"grad_norm": 21.9038704574243, |
|
"learning_rate": 5.8005019731033615e-09, |
|
"logits/chosen": 1.9021247625350952, |
|
"logits/rejected": 2.9709084033966064, |
|
"logps/chosen": -423.39990234375, |
|
"logps/rejected": -478.46929931640625, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3521995544433594, |
|
"rewards/margins": 0.8351926803588867, |
|
"rewards/rejected": -2.187392234802246, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 21.54473906200769, |
|
"learning_rate": 2.3049103053431886e-09, |
|
"logits/chosen": 1.8090896606445312, |
|
"logits/rejected": 3.297045946121216, |
|
"logps/chosen": -384.42333984375, |
|
"logps/rejected": -458.969482421875, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0167523622512817, |
|
"rewards/margins": 1.2330738306045532, |
|
"rewards/rejected": -2.249825954437256, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9828571428571429, |
|
"grad_norm": 22.41955699037185, |
|
"learning_rate": 3.9129780600541397e-10, |
|
"logits/chosen": 2.2351975440979004, |
|
"logits/rejected": 3.178173065185547, |
|
"logps/chosen": -401.39642333984375, |
|
"logps/rejected": -481.4127502441406, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.191645622253418, |
|
"rewards/margins": 0.9857856631278992, |
|
"rewards/rejected": -2.177431344985962, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9988571428571429, |
|
"step": 437, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5630035629534339, |
|
"train_runtime": 11387.5716, |
|
"train_samples_per_second": 4.918, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 437, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|