sfulay's picture
Model save
a773699 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9988571428571429,
"eval_steps": 50,
"global_step": 437,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022857142857142857,
"grad_norm": 6.795239341624469,
"learning_rate": 1.1363636363636363e-07,
"logits/chosen": -2.700852632522583,
"logits/rejected": -2.6250014305114746,
"logps/chosen": -301.27313232421875,
"logps/rejected": -281.78619384765625,
"loss": 0.6931,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": 0.0001308169448748231,
"rewards/margins": 0.0004958957433700562,
"rewards/rejected": -0.00036507885670289397,
"step": 10
},
{
"epoch": 0.045714285714285714,
"grad_norm": 5.31428372226332,
"learning_rate": 2.2727272727272726e-07,
"logits/chosen": -2.6415421962738037,
"logits/rejected": -2.606222629547119,
"logps/chosen": -278.8970642089844,
"logps/rejected": -254.64749145507812,
"loss": 0.6924,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.0027411712799221277,
"rewards/margins": 0.001525188097730279,
"rewards/rejected": 0.001215982949361205,
"step": 20
},
{
"epoch": 0.06857142857142857,
"grad_norm": 5.9664481153189435,
"learning_rate": 3.4090909090909085e-07,
"logits/chosen": -2.638169765472412,
"logits/rejected": -2.617159843444824,
"logps/chosen": -263.23223876953125,
"logps/rejected": -263.40374755859375,
"loss": 0.6883,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.014508177526295185,
"rewards/margins": 0.00861530750989914,
"rewards/rejected": 0.0058928681537508965,
"step": 30
},
{
"epoch": 0.09142857142857143,
"grad_norm": 6.667336557428276,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": -2.648975372314453,
"logits/rejected": -2.585244655609131,
"logps/chosen": -290.2044372558594,
"logps/rejected": -268.3276062011719,
"loss": 0.6785,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.037928324192762375,
"rewards/margins": 0.044891245663166046,
"rewards/rejected": -0.006962914951145649,
"step": 40
},
{
"epoch": 0.11428571428571428,
"grad_norm": 9.813117329804816,
"learning_rate": 4.997124959943201e-07,
"logits/chosen": -2.6792047023773193,
"logits/rejected": -2.5978188514709473,
"logps/chosen": -293.65264892578125,
"logps/rejected": -254.2649688720703,
"loss": 0.6663,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.026576850563287735,
"rewards/margins": 0.10058300197124481,
"rewards/rejected": -0.07400616258382797,
"step": 50
},
{
"epoch": 0.11428571428571428,
"eval_logits/chosen": -2.541201591491699,
"eval_logits/rejected": -2.4377598762512207,
"eval_logps/chosen": -276.20166015625,
"eval_logps/rejected": -235.61155700683594,
"eval_loss": 0.6532372832298279,
"eval_rewards/accuracies": 0.6896551847457886,
"eval_rewards/chosen": -0.005977254826575518,
"eval_rewards/margins": 0.15937723219394684,
"eval_rewards/rejected": -0.16535447537899017,
"eval_runtime": 91.1786,
"eval_samples_per_second": 20.081,
"eval_steps_per_second": 0.318,
"step": 50
},
{
"epoch": 0.13714285714285715,
"grad_norm": 8.50170881260791,
"learning_rate": 4.979579212164186e-07,
"logits/chosen": -2.5797510147094727,
"logits/rejected": -2.472832202911377,
"logps/chosen": -293.24212646484375,
"logps/rejected": -275.13885498046875,
"loss": 0.646,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.12736138701438904,
"rewards/margins": 0.1385059803724289,
"rewards/rejected": -0.2658673822879791,
"step": 60
},
{
"epoch": 0.16,
"grad_norm": 9.027696167666651,
"learning_rate": 4.946196886175515e-07,
"logits/chosen": -2.5882785320281982,
"logits/rejected": -2.539330005645752,
"logps/chosen": -293.43145751953125,
"logps/rejected": -300.1482849121094,
"loss": 0.6244,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.17653189599514008,
"rewards/margins": 0.22868318855762482,
"rewards/rejected": -0.4052151143550873,
"step": 70
},
{
"epoch": 0.18285714285714286,
"grad_norm": 10.603734895101157,
"learning_rate": 4.897191188239667e-07,
"logits/chosen": -2.623680591583252,
"logits/rejected": -2.5742952823638916,
"logps/chosen": -285.3603820800781,
"logps/rejected": -306.60211181640625,
"loss": 0.6123,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.18331322073936462,
"rewards/margins": 0.3296189308166504,
"rewards/rejected": -0.5129320621490479,
"step": 80
},
{
"epoch": 0.2057142857142857,
"grad_norm": 15.25024463895093,
"learning_rate": 4.832875107981763e-07,
"logits/chosen": -2.6875650882720947,
"logits/rejected": -2.6345021724700928,
"logps/chosen": -295.8832092285156,
"logps/rejected": -313.13983154296875,
"loss": 0.6191,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.19597890973091125,
"rewards/margins": 0.37993985414505005,
"rewards/rejected": -0.5759187340736389,
"step": 90
},
{
"epoch": 0.22857142857142856,
"grad_norm": 12.34704700331818,
"learning_rate": 4.753659419387223e-07,
"logits/chosen": -2.679297685623169,
"logits/rejected": -2.5944952964782715,
"logps/chosen": -330.1031799316406,
"logps/rejected": -318.5290832519531,
"loss": 0.6051,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.43716782331466675,
"rewards/margins": 0.4136085510253906,
"rewards/rejected": -0.8507764935493469,
"step": 100
},
{
"epoch": 0.22857142857142856,
"eval_logits/chosen": -2.493269920349121,
"eval_logits/rejected": -2.3746213912963867,
"eval_logps/chosen": -356.7496643066406,
"eval_logps/rejected": -346.2107238769531,
"eval_loss": 0.612766683101654,
"eval_rewards/accuracies": 0.7112069129943848,
"eval_rewards/chosen": -0.8114572167396545,
"eval_rewards/margins": 0.4598887860774994,
"eval_rewards/rejected": -1.2713459730148315,
"eval_runtime": 90.1893,
"eval_samples_per_second": 20.302,
"eval_steps_per_second": 0.322,
"step": 100
},
{
"epoch": 0.25142857142857145,
"grad_norm": 13.527645485553371,
"learning_rate": 4.660050057270191e-07,
"logits/chosen": -2.269178628921509,
"logits/rejected": -2.168506622314453,
"logps/chosen": -387.71820068359375,
"logps/rejected": -411.88427734375,
"loss": 0.5873,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.7141460180282593,
"rewards/margins": 0.4256005883216858,
"rewards/rejected": -1.1397466659545898,
"step": 110
},
{
"epoch": 0.2742857142857143,
"grad_norm": 17.046687026346753,
"learning_rate": 4.5526448859687144e-07,
"logits/chosen": -1.293348789215088,
"logits/rejected": -0.927165687084198,
"logps/chosen": -381.55316162109375,
"logps/rejected": -354.0766906738281,
"loss": 0.5755,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.7064869403839111,
"rewards/margins": 0.5171381831169128,
"rewards/rejected": -1.2236251831054688,
"step": 120
},
{
"epoch": 0.29714285714285715,
"grad_norm": 19.945418074044913,
"learning_rate": 4.432129880904388e-07,
"logits/chosen": -0.14520399272441864,
"logits/rejected": 0.31017133593559265,
"logps/chosen": -394.09820556640625,
"logps/rejected": -395.47674560546875,
"loss": 0.5448,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.8894233703613281,
"rewards/margins": 0.5606414675712585,
"rewards/rejected": -1.4500648975372314,
"step": 130
},
{
"epoch": 0.32,
"grad_norm": 23.269894199893105,
"learning_rate": 4.299274747394055e-07,
"logits/chosen": 0.3922499716281891,
"logits/rejected": 0.7626418471336365,
"logps/chosen": -402.1969299316406,
"logps/rejected": -436.99725341796875,
"loss": 0.5611,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.8674923777580261,
"rewards/margins": 0.783365786075592,
"rewards/rejected": -1.6508581638336182,
"step": 140
},
{
"epoch": 0.34285714285714286,
"grad_norm": 18.255646968547413,
"learning_rate": 4.1549280046953653e-07,
"logits/chosen": -0.056426752358675,
"logits/rejected": 0.6437274813652039,
"logps/chosen": -360.7496032714844,
"logps/rejected": -432.40399169921875,
"loss": 0.5375,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.7376368641853333,
"rewards/margins": 0.8234134912490845,
"rewards/rejected": -1.5610501766204834,
"step": 150
},
{
"epoch": 0.34285714285714286,
"eval_logits/chosen": 0.20309801399707794,
"eval_logits/rejected": 1.3727048635482788,
"eval_logps/chosen": -358.7465515136719,
"eval_logps/rejected": -413.1859436035156,
"eval_loss": 0.5486596822738647,
"eval_rewards/accuracies": 0.767241358757019,
"eval_rewards/chosen": -0.8314265012741089,
"eval_rewards/margins": 1.1096714735031128,
"eval_rewards/rejected": -1.9410980939865112,
"eval_runtime": 90.1892,
"eval_samples_per_second": 20.302,
"eval_steps_per_second": 0.322,
"step": 150
},
{
"epoch": 0.3657142857142857,
"grad_norm": 20.04208599875873,
"learning_rate": 4.000011566683401e-07,
"logits/chosen": 0.4694085121154785,
"logits/rejected": 1.3121615648269653,
"logps/chosen": -412.69488525390625,
"logps/rejected": -459.99188232421875,
"loss": 0.548,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.1335456371307373,
"rewards/margins": 0.9099456071853638,
"rewards/rejected": -2.0434913635253906,
"step": 160
},
{
"epoch": 0.38857142857142857,
"grad_norm": 22.516250506361718,
"learning_rate": 3.8355148537705047e-07,
"logits/chosen": 0.1457391083240509,
"logits/rejected": 0.8692816495895386,
"logps/chosen": -395.64947509765625,
"logps/rejected": -417.6402282714844,
"loss": 0.5469,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.9154456257820129,
"rewards/margins": 0.6036463379859924,
"rewards/rejected": -1.5190918445587158,
"step": 170
},
{
"epoch": 0.4114285714285714,
"grad_norm": 20.567923807377685,
"learning_rate": 3.662488473675315e-07,
"logits/chosen": 0.6181103587150574,
"logits/rejected": 1.7128187417984009,
"logps/chosen": -436.68780517578125,
"logps/rejected": -469.717041015625,
"loss": 0.5551,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.065079927444458,
"rewards/margins": 1.0325844287872314,
"rewards/rejected": -2.0976643562316895,
"step": 180
},
{
"epoch": 0.4342857142857143,
"grad_norm": 20.909674986872478,
"learning_rate": 3.48203751140067e-07,
"logits/chosen": 1.2501403093338013,
"logits/rejected": 2.2078864574432373,
"logps/chosen": -380.656982421875,
"logps/rejected": -409.70556640625,
"loss": 0.5412,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -1.0439938306808472,
"rewards/margins": 0.7080799341201782,
"rewards/rejected": -1.7520736455917358,
"step": 190
},
{
"epoch": 0.45714285714285713,
"grad_norm": 21.67729434989596,
"learning_rate": 3.2953144712759537e-07,
"logits/chosen": 0.7689538598060608,
"logits/rejected": 1.9063518047332764,
"logps/chosen": -359.4909362792969,
"logps/rejected": -411.184814453125,
"loss": 0.5435,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.9114822149276733,
"rewards/margins": 0.91156005859375,
"rewards/rejected": -1.8230421543121338,
"step": 200
},
{
"epoch": 0.45714285714285713,
"eval_logits/chosen": 1.3441277742385864,
"eval_logits/rejected": 2.707573652267456,
"eval_logps/chosen": -374.5489501953125,
"eval_logps/rejected": -426.7857971191406,
"eval_loss": 0.5358834266662598,
"eval_rewards/accuracies": 0.7543103694915771,
"eval_rewards/chosen": -0.9894503355026245,
"eval_rewards/margins": 1.087646245956421,
"eval_rewards/rejected": -2.077096462249756,
"eval_runtime": 90.1648,
"eval_samples_per_second": 20.307,
"eval_steps_per_second": 0.322,
"step": 200
},
{
"epoch": 0.48,
"grad_norm": 19.28945802551147,
"learning_rate": 3.103511916141658e-07,
"logits/chosen": 1.5224826335906982,
"logits/rejected": 2.394577741622925,
"logps/chosen": -385.7353210449219,
"logps/rejected": -451.604248046875,
"loss": 0.5372,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.2228174209594727,
"rewards/margins": 0.8404253125190735,
"rewards/rejected": -2.0632426738739014,
"step": 210
},
{
"epoch": 0.5028571428571429,
"grad_norm": 24.47080032118637,
"learning_rate": 2.9078548506882117e-07,
"logits/chosen": 1.5350468158721924,
"logits/rejected": 2.541968822479248,
"logps/chosen": -425.51287841796875,
"logps/rejected": -466.1084899902344,
"loss": 0.5604,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.4044690132141113,
"rewards/margins": 0.794781506061554,
"rewards/rejected": -2.1992506980895996,
"step": 220
},
{
"epoch": 0.5257142857142857,
"grad_norm": 20.61426463626924,
"learning_rate": 2.709592897595191e-07,
"logits/chosen": 1.438730001449585,
"logits/rejected": 2.638312816619873,
"logps/chosen": -390.794189453125,
"logps/rejected": -433.10406494140625,
"loss": 0.5311,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.0973504781723022,
"rewards/margins": 0.8408235311508179,
"rewards/rejected": -1.9381740093231201,
"step": 230
},
{
"epoch": 0.5485714285714286,
"grad_norm": 31.905445593128672,
"learning_rate": 2.509992316440332e-07,
"logits/chosen": 1.2066385746002197,
"logits/rejected": 2.3449177742004395,
"logps/chosen": -413.14825439453125,
"logps/rejected": -506.625,
"loss": 0.5256,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -1.1443836688995361,
"rewards/margins": 1.2076470851898193,
"rewards/rejected": -2.3520307540893555,
"step": 240
},
{
"epoch": 0.5714285714285714,
"grad_norm": 18.97837160736367,
"learning_rate": 2.3103279163519918e-07,
"logits/chosen": 0.9885716438293457,
"logits/rejected": 1.7852414846420288,
"logps/chosen": -384.52496337890625,
"logps/rejected": -472.253662109375,
"loss": 0.5433,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.0389870405197144,
"rewards/margins": 0.9815452694892883,
"rewards/rejected": -2.0205321311950684,
"step": 250
},
{
"epoch": 0.5714285714285714,
"eval_logits/chosen": 0.858768880367279,
"eval_logits/rejected": 2.412114381790161,
"eval_logps/chosen": -365.6370544433594,
"eval_logps/rejected": -425.7062683105469,
"eval_loss": 0.528998613357544,
"eval_rewards/accuracies": 0.7629310488700867,
"eval_rewards/chosen": -0.9003310203552246,
"eval_rewards/margins": 1.1659703254699707,
"eval_rewards/rejected": -2.0663013458251953,
"eval_runtime": 90.3653,
"eval_samples_per_second": 20.262,
"eval_steps_per_second": 0.321,
"step": 250
},
{
"epoch": 0.5942857142857143,
"grad_norm": 21.94464499251825,
"learning_rate": 2.1118749140573358e-07,
"logits/chosen": 1.5066580772399902,
"logits/rejected": 2.079137086868286,
"logps/chosen": -411.3843688964844,
"logps/rejected": -482.978515625,
"loss": 0.5408,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -1.3159770965576172,
"rewards/margins": 0.7803784608840942,
"rewards/rejected": -2.096355438232422,
"step": 260
},
{
"epoch": 0.6171428571428571,
"grad_norm": 23.287724561115347,
"learning_rate": 1.9159007893272703e-07,
"logits/chosen": 1.869363784790039,
"logits/rejected": 3.169628620147705,
"logps/chosen": -400.696533203125,
"logps/rejected": -456.28155517578125,
"loss": 0.517,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.2817071676254272,
"rewards/margins": 0.9760338664054871,
"rewards/rejected": -2.2577412128448486,
"step": 270
},
{
"epoch": 0.64,
"grad_norm": 26.1145325639797,
"learning_rate": 1.7236571898357766e-07,
"logits/chosen": 2.085681438446045,
"logits/rejected": 2.909884214401245,
"logps/chosen": -402.3949890136719,
"logps/rejected": -493.7689514160156,
"loss": 0.5287,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.2935690879821777,
"rewards/margins": 1.0130523443222046,
"rewards/rejected": -2.3066213130950928,
"step": 280
},
{
"epoch": 0.6628571428571428,
"grad_norm": 28.3817297395316,
"learning_rate": 1.5363719371356882e-07,
"logits/chosen": 1.904044508934021,
"logits/rejected": 2.7162575721740723,
"logps/chosen": -424.409912109375,
"logps/rejected": -482.04913330078125,
"loss": 0.5285,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.2350399494171143,
"rewards/margins": 0.9299663305282593,
"rewards/rejected": -2.165006399154663,
"step": 290
},
{
"epoch": 0.6857142857142857,
"grad_norm": 17.85129410356221,
"learning_rate": 1.3552411848071565e-07,
"logits/chosen": 1.697782278060913,
"logits/rejected": 3.180041551589966,
"logps/chosen": -419.85028076171875,
"logps/rejected": -478.419677734375,
"loss": 0.5194,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.1794109344482422,
"rewards/margins": 1.078364610671997,
"rewards/rejected": -2.2577755451202393,
"step": 300
},
{
"epoch": 0.6857142857142857,
"eval_logits/chosen": 1.7084869146347046,
"eval_logits/rejected": 3.311720132827759,
"eval_logps/chosen": -371.6744689941406,
"eval_logps/rejected": -439.6499938964844,
"eval_loss": 0.5213173031806946,
"eval_rewards/accuracies": 0.7715517282485962,
"eval_rewards/chosen": -0.9607052206993103,
"eval_rewards/margins": 1.2450333833694458,
"eval_rewards/rejected": -2.2057385444641113,
"eval_runtime": 89.9422,
"eval_samples_per_second": 20.358,
"eval_steps_per_second": 0.322,
"step": 300
},
{
"epoch": 0.7085714285714285,
"grad_norm": 22.76802438882901,
"learning_rate": 1.1814217788631473e-07,
"logits/chosen": 1.900792121887207,
"logits/rejected": 2.7918269634246826,
"logps/chosen": -372.843994140625,
"logps/rejected": -442.9312438964844,
"loss": 0.5285,
"rewards/accuracies": 0.65625,
"rewards/chosen": -1.1653985977172852,
"rewards/margins": 0.8919604420661926,
"rewards/rejected": -2.057358980178833,
"step": 310
},
{
"epoch": 0.7314285714285714,
"grad_norm": 19.73975657149685,
"learning_rate": 1.0160238692045331e-07,
"logits/chosen": 2.1896469593048096,
"logits/rejected": 2.8715972900390625,
"logps/chosen": -380.424560546875,
"logps/rejected": -454.2293395996094,
"loss": 0.536,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.3614219427108765,
"rewards/margins": 0.7709897756576538,
"rewards/rejected": -2.132411479949951,
"step": 320
},
{
"epoch": 0.7542857142857143,
"grad_norm": 29.56922781200817,
"learning_rate": 8.601038193139438e-08,
"logits/chosen": 1.6053155660629272,
"logits/rejected": 2.692516565322876,
"logps/chosen": -416.57342529296875,
"logps/rejected": -465.4991760253906,
"loss": 0.5313,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.1735525131225586,
"rewards/margins": 1.003560185432434,
"rewards/rejected": -2.177112579345703,
"step": 330
},
{
"epoch": 0.7771428571428571,
"grad_norm": 18.098670935967576,
"learning_rate": 7.146574594727572e-08,
"logits/chosen": 2.0766067504882812,
"logits/rejected": 2.8303616046905518,
"logps/chosen": -387.4620361328125,
"logps/rejected": -468.67718505859375,
"loss": 0.5193,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -1.2151105403900146,
"rewards/margins": 1.0514241456985474,
"rewards/rejected": -2.2665345668792725,
"step": 340
},
{
"epoch": 0.8,
"grad_norm": 20.794164513921476,
"learning_rate": 5.8061372659157306e-08,
"logits/chosen": 1.6319509744644165,
"logits/rejected": 2.7972917556762695,
"logps/chosen": -412.102783203125,
"logps/rejected": -458.27191162109375,
"loss": 0.5325,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.2239553928375244,
"rewards/margins": 0.8157873153686523,
"rewards/rejected": -2.0397427082061768,
"step": 350
},
{
"epoch": 0.8,
"eval_logits/chosen": 2.0842368602752686,
"eval_logits/rejected": 3.6707816123962402,
"eval_logps/chosen": -389.46490478515625,
"eval_logps/rejected": -456.7085266113281,
"eval_loss": 0.5216463804244995,
"eval_rewards/accuracies": 0.7629310488700867,
"eval_rewards/chosen": -1.1386092901229858,
"eval_rewards/margins": 1.237714409828186,
"eval_rewards/rejected": -2.3763234615325928,
"eval_runtime": 89.8616,
"eval_samples_per_second": 20.376,
"eval_steps_per_second": 0.323,
"step": 350
},
{
"epoch": 0.8228571428571428,
"grad_norm": 18.184259604484346,
"learning_rate": 4.5882873127531614e-08,
"logits/chosen": 1.648209810256958,
"logits/rejected": 2.9181623458862305,
"logps/chosen": -407.1295166015625,
"logps/rejected": -477.27447509765625,
"loss": 0.5146,
"rewards/accuracies": 0.78125,
"rewards/chosen": -1.217245101928711,
"rewards/margins": 1.044634222984314,
"rewards/rejected": -2.2618794441223145,
"step": 360
},
{
"epoch": 0.8457142857142858,
"grad_norm": 19.108285818305696,
"learning_rate": 3.500802900154412e-08,
"logits/chosen": 1.801898717880249,
"logits/rejected": 3.196338176727295,
"logps/chosen": -383.25311279296875,
"logps/rejected": -463.01727294921875,
"loss": 0.5188,
"rewards/accuracies": 0.78125,
"rewards/chosen": -1.1219167709350586,
"rewards/margins": 1.1241002082824707,
"rewards/rejected": -2.2460172176361084,
"step": 370
},
{
"epoch": 0.8685714285714285,
"grad_norm": 23.620382836684982,
"learning_rate": 2.550629574310309e-08,
"logits/chosen": 1.4818474054336548,
"logits/rejected": 2.90739107131958,
"logps/chosen": -453.0061950683594,
"logps/rejected": -476.94830322265625,
"loss": 0.5263,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.349498987197876,
"rewards/margins": 0.8459898233413696,
"rewards/rejected": -2.195488691329956,
"step": 380
},
{
"epoch": 0.8914285714285715,
"grad_norm": 22.14239335519297,
"learning_rate": 1.7438359028687983e-08,
"logits/chosen": 1.8351167440414429,
"logits/rejected": 2.6260292530059814,
"logps/chosen": -425.75128173828125,
"logps/rejected": -503.3841857910156,
"loss": 0.5275,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.1437828540802002,
"rewards/margins": 0.9423319697380066,
"rewards/rejected": -2.0861151218414307,
"step": 390
},
{
"epoch": 0.9142857142857143,
"grad_norm": 32.206706951444914,
"learning_rate": 1.0855747162029361e-08,
"logits/chosen": 2.132110357284546,
"logits/rejected": 2.6392226219177246,
"logps/chosen": -411.29962158203125,
"logps/rejected": -477.0232849121094,
"loss": 0.5483,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.3256638050079346,
"rewards/margins": 0.7788330316543579,
"rewards/rejected": -2.104496955871582,
"step": 400
},
{
"epoch": 0.9142857142857143,
"eval_logits/chosen": 2.117452621459961,
"eval_logits/rejected": 3.7050397396087646,
"eval_logps/chosen": -386.83795166015625,
"eval_logps/rejected": -455.0307312011719,
"eval_loss": 0.520908772945404,
"eval_rewards/accuracies": 0.767241358757019,
"eval_rewards/chosen": -1.112339973449707,
"eval_rewards/margins": 1.2472059726715088,
"eval_rewards/rejected": -2.3595457077026367,
"eval_runtime": 90.8703,
"eval_samples_per_second": 20.15,
"eval_steps_per_second": 0.319,
"step": 400
},
{
"epoch": 0.9371428571428572,
"grad_norm": 21.9038704574243,
"learning_rate": 5.8005019731033615e-09,
"logits/chosen": 1.9021247625350952,
"logits/rejected": 2.9709084033966064,
"logps/chosen": -423.39990234375,
"logps/rejected": -478.46929931640625,
"loss": 0.5184,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.3521995544433594,
"rewards/margins": 0.8351926803588867,
"rewards/rejected": -2.187392234802246,
"step": 410
},
{
"epoch": 0.96,
"grad_norm": 21.54473906200769,
"learning_rate": 2.3049103053431886e-09,
"logits/chosen": 1.8090896606445312,
"logits/rejected": 3.297045946121216,
"logps/chosen": -384.42333984375,
"logps/rejected": -458.969482421875,
"loss": 0.522,
"rewards/accuracies": 0.78125,
"rewards/chosen": -1.0167523622512817,
"rewards/margins": 1.2330738306045532,
"rewards/rejected": -2.249825954437256,
"step": 420
},
{
"epoch": 0.9828571428571429,
"grad_norm": 22.41955699037185,
"learning_rate": 3.9129780600541397e-10,
"logits/chosen": 2.2351975440979004,
"logits/rejected": 3.178173065185547,
"logps/chosen": -401.39642333984375,
"logps/rejected": -481.4127502441406,
"loss": 0.5214,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.191645622253418,
"rewards/margins": 0.9857856631278992,
"rewards/rejected": -2.177431344985962,
"step": 430
},
{
"epoch": 0.9988571428571429,
"step": 437,
"total_flos": 0.0,
"train_loss": 0.5630035629534339,
"train_runtime": 11387.5716,
"train_samples_per_second": 4.918,
"train_steps_per_second": 0.038
}
],
"logging_steps": 10,
"max_steps": 437,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}