zephyr-7b-sft-full-orpo / trainer_state.json
statking's picture
Model save
5dc2393 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994756161510225,
"eval_steps": 100,
"global_step": 953,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01048767697954903,
"grad_norm": 281.5632535171625,
"learning_rate": 7.000000000000001e-07,
"log_odds_chosen": 0.14837229251861572,
"log_odds_ratio": -0.7063122987747192,
"logits/chosen": -2.4233744144439697,
"logits/rejected": -2.3922557830810547,
"logps/chosen": -1.0665283203125,
"logps/rejected": -1.164435625076294,
"loss": 3.7384,
"nll_loss": 3.6487019062042236,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.05332641676068306,
"rewards/margins": 0.004895367659628391,
"rewards/rejected": -0.058221787214279175,
"step": 10
},
{
"epoch": 0.02097535395909806,
"grad_norm": 3.6095114671977337,
"learning_rate": 1.4000000000000001e-06,
"log_odds_chosen": 0.18771903216838837,
"log_odds_ratio": -0.6616674661636353,
"logits/chosen": -2.669743061065674,
"logits/rejected": -2.6637511253356934,
"logps/chosen": -0.8115625381469727,
"logps/rejected": -0.9194537401199341,
"loss": 0.598,
"nll_loss": 0.5553613901138306,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.04057813063263893,
"rewards/margins": 0.005394552834331989,
"rewards/rejected": -0.045972686260938644,
"step": 20
},
{
"epoch": 0.03146303093864709,
"grad_norm": 2.6104338509446743,
"learning_rate": 2.1e-06,
"log_odds_chosen": 0.24361269176006317,
"log_odds_ratio": -0.6484603881835938,
"logits/chosen": -2.8152480125427246,
"logits/rejected": -2.770486831665039,
"logps/chosen": -0.7975724339485168,
"logps/rejected": -0.9327106475830078,
"loss": 0.539,
"nll_loss": 0.4975182116031647,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.03987862169742584,
"rewards/margins": 0.006756913848221302,
"rewards/rejected": -0.04663553088903427,
"step": 30
},
{
"epoch": 0.04195070791819612,
"grad_norm": 2.6082713320666966,
"learning_rate": 2.8000000000000003e-06,
"log_odds_chosen": 0.18453697860240936,
"log_odds_ratio": -0.6863341331481934,
"logits/chosen": -2.7431702613830566,
"logits/rejected": -2.721076488494873,
"logps/chosen": -0.7775384783744812,
"logps/rejected": -0.8990561366081238,
"loss": 0.5182,
"nll_loss": 0.4802665710449219,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.03887692838907242,
"rewards/margins": 0.006075879093259573,
"rewards/rejected": -0.04495280981063843,
"step": 40
},
{
"epoch": 0.05243838489774515,
"grad_norm": 2.8319159240383356,
"learning_rate": 3.5e-06,
"log_odds_chosen": 0.2895735204219818,
"log_odds_ratio": -0.6829751133918762,
"logits/chosen": -2.6645712852478027,
"logits/rejected": -2.6532058715820312,
"logps/chosen": -0.7420316934585571,
"logps/rejected": -0.92218017578125,
"loss": 0.5346,
"nll_loss": 0.4737791419029236,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.03710158169269562,
"rewards/margins": 0.009007426910102367,
"rewards/rejected": -0.04610900953412056,
"step": 50
},
{
"epoch": 0.06292606187729417,
"grad_norm": 2.702391106634465,
"learning_rate": 4.2e-06,
"log_odds_chosen": 0.23618292808532715,
"log_odds_ratio": -0.6679760217666626,
"logits/chosen": -2.7234179973602295,
"logits/rejected": -2.701585292816162,
"logps/chosen": -0.7408851385116577,
"logps/rejected": -0.8674576878547668,
"loss": 0.5296,
"nll_loss": 0.5001371502876282,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.03704426437616348,
"rewards/margins": 0.0063286214135587215,
"rewards/rejected": -0.04337288811802864,
"step": 60
},
{
"epoch": 0.07341373885684321,
"grad_norm": 2.7579557747488237,
"learning_rate": 4.9e-06,
"log_odds_chosen": 0.1982727348804474,
"log_odds_ratio": -0.7039018869400024,
"logits/chosen": -2.716829776763916,
"logits/rejected": -2.7165746688842773,
"logps/chosen": -0.7602167129516602,
"logps/rejected": -0.8683260679244995,
"loss": 0.5179,
"nll_loss": 0.5095189213752747,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.03801083564758301,
"rewards/margins": 0.005405469331890345,
"rewards/rejected": -0.043416302651166916,
"step": 70
},
{
"epoch": 0.08390141583639224,
"grad_norm": 2.7333788754363826,
"learning_rate": 5.600000000000001e-06,
"log_odds_chosen": 0.19610878825187683,
"log_odds_ratio": -0.6825613379478455,
"logits/chosen": -2.6934926509857178,
"logits/rejected": -2.6538023948669434,
"logps/chosen": -0.8004279136657715,
"logps/rejected": -0.9359849095344543,
"loss": 0.5198,
"nll_loss": 0.44797396659851074,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.040021397173404694,
"rewards/margins": 0.006777846720069647,
"rewards/rejected": -0.04679924249649048,
"step": 80
},
{
"epoch": 0.09438909281594127,
"grad_norm": 2.643892428655997,
"learning_rate": 6.3e-06,
"log_odds_chosen": 0.32694971561431885,
"log_odds_ratio": -0.6449785828590393,
"logits/chosen": -2.6064088344573975,
"logits/rejected": -2.600590229034424,
"logps/chosen": -0.7779799699783325,
"logps/rejected": -0.970491886138916,
"loss": 0.5108,
"nll_loss": 0.4519652724266052,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.03889899700880051,
"rewards/margins": 0.009625596925616264,
"rewards/rejected": -0.04852459207177162,
"step": 90
},
{
"epoch": 0.1048767697954903,
"grad_norm": 2.7386435335682178,
"learning_rate": 7e-06,
"log_odds_chosen": 0.24293240904808044,
"log_odds_ratio": -0.65534907579422,
"logits/chosen": -2.800649881362915,
"logits/rejected": -2.783020257949829,
"logps/chosen": -0.7912999391555786,
"logps/rejected": -0.931311309337616,
"loss": 0.5226,
"nll_loss": 0.4863203167915344,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.03956499695777893,
"rewards/margins": 0.007000570185482502,
"rewards/rejected": -0.04656556248664856,
"step": 100
},
{
"epoch": 0.1048767697954903,
"eval_log_odds_chosen": 0.2873421609401703,
"eval_log_odds_ratio": -0.632556140422821,
"eval_logits/chosen": -2.7859702110290527,
"eval_logits/rejected": -2.758275270462036,
"eval_logps/chosen": -0.7728292942047119,
"eval_logps/rejected": -0.9448140263557434,
"eval_loss": 0.5279971957206726,
"eval_nll_loss": 0.49532046914100647,
"eval_rewards/accuracies": 0.6329365372657776,
"eval_rewards/chosen": -0.03864146023988724,
"eval_rewards/margins": 0.008599241264164448,
"eval_rewards/rejected": -0.04724070429801941,
"eval_runtime": 137.6903,
"eval_samples_per_second": 14.482,
"eval_steps_per_second": 0.458,
"step": 100
},
{
"epoch": 0.11536444677503933,
"grad_norm": 3.1992530570673416,
"learning_rate": 6.674238124719146e-06,
"log_odds_chosen": 0.34574735164642334,
"log_odds_ratio": -0.612960934638977,
"logits/chosen": -2.770359516143799,
"logits/rejected": -2.785818099975586,
"logps/chosen": -0.7360346913337708,
"logps/rejected": -0.9339498281478882,
"loss": 0.516,
"nll_loss": 0.46663737297058105,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.03680173680186272,
"rewards/margins": 0.009895754046738148,
"rewards/rejected": -0.04669748991727829,
"step": 110
},
{
"epoch": 0.12585212375458835,
"grad_norm": 2.389888529611206,
"learning_rate": 6.390096504226938e-06,
"log_odds_chosen": 0.3332720696926117,
"log_odds_ratio": -0.629552960395813,
"logits/chosen": -2.765531063079834,
"logits/rejected": -2.7438697814941406,
"logps/chosen": -0.7498644590377808,
"logps/rejected": -0.9586297273635864,
"loss": 0.5424,
"nll_loss": 0.5031455159187317,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.03749322146177292,
"rewards/margins": 0.010438265278935432,
"rewards/rejected": -0.0479314923286438,
"step": 120
},
{
"epoch": 0.1363398007341374,
"grad_norm": 2.352563456984363,
"learning_rate": 6.139406135149204e-06,
"log_odds_chosen": 0.22595734894275665,
"log_odds_ratio": -0.6784238219261169,
"logits/chosen": -2.7593860626220703,
"logits/rejected": -2.743048667907715,
"logps/chosen": -0.7811408042907715,
"logps/rejected": -0.9164878726005554,
"loss": 0.5343,
"nll_loss": 0.49365147948265076,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.039057038724422455,
"rewards/margins": 0.006767353508621454,
"rewards/rejected": -0.04582439363002777,
"step": 130
},
{
"epoch": 0.14682747771368643,
"grad_norm": 2.436711404156596,
"learning_rate": 5.916079783099616e-06,
"log_odds_chosen": 0.2472628802061081,
"log_odds_ratio": -0.6597720384597778,
"logits/chosen": -2.6898269653320312,
"logits/rejected": -2.669379711151123,
"logps/chosen": -0.8302755355834961,
"logps/rejected": -0.9775524139404297,
"loss": 0.5262,
"nll_loss": 0.49079251289367676,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.041513778269290924,
"rewards/margins": 0.007363851182162762,
"rewards/rejected": -0.04887763410806656,
"step": 140
},
{
"epoch": 0.15731515469323545,
"grad_norm": 2.622232308829729,
"learning_rate": 5.715476066494083e-06,
"log_odds_chosen": 0.23396515846252441,
"log_odds_ratio": -0.7018890976905823,
"logits/chosen": -2.6906025409698486,
"logits/rejected": -2.685272455215454,
"logps/chosen": -0.8395276069641113,
"logps/rejected": -0.9926843643188477,
"loss": 0.4873,
"nll_loss": 0.4751507639884949,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.041976384818553925,
"rewards/margins": 0.007657832466065884,
"rewards/rejected": -0.04963421821594238,
"step": 150
},
{
"epoch": 0.16780283167278448,
"grad_norm": 2.5349291816098587,
"learning_rate": 5.533985905294663e-06,
"log_odds_chosen": 0.23518291115760803,
"log_odds_ratio": -0.64958655834198,
"logits/chosen": -2.7026143074035645,
"logits/rejected": -2.690053701400757,
"logps/chosen": -0.7785183191299438,
"logps/rejected": -0.9093867540359497,
"loss": 0.5435,
"nll_loss": 0.4887324869632721,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.038925912231206894,
"rewards/margins": 0.006543423049151897,
"rewards/rejected": -0.045469339936971664,
"step": 160
},
{
"epoch": 0.1782905086523335,
"grad_norm": 2.421225073724309,
"learning_rate": 5.368754921931593e-06,
"log_odds_chosen": 0.3210265636444092,
"log_odds_ratio": -0.6400843262672424,
"logits/chosen": -2.7624573707580566,
"logits/rejected": -2.7493152618408203,
"logps/chosen": -0.7663661241531372,
"logps/rejected": -0.9589449763298035,
"loss": 0.5263,
"nll_loss": 0.4972688555717468,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.03831830993294716,
"rewards/margins": 0.009628941304981709,
"rewards/rejected": -0.047947246581315994,
"step": 170
},
{
"epoch": 0.18877818563188253,
"grad_norm": 2.413880479048562,
"learning_rate": 5.217491947499509e-06,
"log_odds_chosen": 0.29789280891418457,
"log_odds_ratio": -0.6485607028007507,
"logits/chosen": -2.750358819961548,
"logits/rejected": -2.7341530323028564,
"logps/chosen": -0.8058354258537292,
"logps/rejected": -0.9941579699516296,
"loss": 0.5125,
"nll_loss": 0.4958602488040924,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.04029177129268646,
"rewards/margins": 0.009416128508746624,
"rewards/rejected": -0.04970790073275566,
"step": 180
},
{
"epoch": 0.19926586261143156,
"grad_norm": 2.6903547627560362,
"learning_rate": 5.078333750770082e-06,
"log_odds_chosen": 0.3165002167224884,
"log_odds_ratio": -0.6190484762191772,
"logits/chosen": -2.766507387161255,
"logits/rejected": -2.747089385986328,
"logps/chosen": -0.8013149499893188,
"logps/rejected": -0.9806981086730957,
"loss": 0.5316,
"nll_loss": 0.5532199740409851,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04006574675440788,
"rewards/margins": 0.008969161659479141,
"rewards/rejected": -0.04903491213917732,
"step": 190
},
{
"epoch": 0.2097535395909806,
"grad_norm": 2.1991852076726754,
"learning_rate": 4.949747468305832e-06,
"log_odds_chosen": 0.33575549721717834,
"log_odds_ratio": -0.651211678981781,
"logits/chosen": -2.7371087074279785,
"logits/rejected": -2.7220566272735596,
"logps/chosen": -0.7840306162834167,
"logps/rejected": -1.0072247982025146,
"loss": 0.5074,
"nll_loss": 0.5064893960952759,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.039201535284519196,
"rewards/margins": 0.011159711517393589,
"rewards/rejected": -0.05036124587059021,
"step": 200
},
{
"epoch": 0.2097535395909806,
"eval_log_odds_chosen": 0.31895044445991516,
"eval_log_odds_ratio": -0.6356511116027832,
"eval_logits/chosen": -2.700209140777588,
"eval_logits/rejected": -2.673612594604492,
"eval_logps/chosen": -0.7611523866653442,
"eval_logps/rejected": -0.9565821290016174,
"eval_loss": 0.5133659839630127,
"eval_nll_loss": 0.47739487886428833,
"eval_rewards/accuracies": 0.6408730149269104,
"eval_rewards/chosen": -0.03805762156844139,
"eval_rewards/margins": 0.009771487675607204,
"eval_rewards/rejected": -0.04782910645008087,
"eval_runtime": 136.4881,
"eval_samples_per_second": 14.609,
"eval_steps_per_second": 0.462,
"step": 200
},
{
"epoch": 0.22024121657052964,
"grad_norm": 2.2979124053363367,
"learning_rate": 4.830458915396479e-06,
"log_odds_chosen": 0.14570581912994385,
"log_odds_ratio": -0.7079066038131714,
"logits/chosen": -2.6945998668670654,
"logits/rejected": -2.693587064743042,
"logps/chosen": -0.7664598226547241,
"logps/rejected": -0.8435371518135071,
"loss": 0.5092,
"nll_loss": 0.47726479172706604,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.03832298889756203,
"rewards/margins": 0.003853868693113327,
"rewards/rejected": -0.04217685014009476,
"step": 210
},
{
"epoch": 0.23072889355007867,
"grad_norm": 2.7379211509120998,
"learning_rate": 4.719399037242694e-06,
"log_odds_chosen": 0.2301570177078247,
"log_odds_ratio": -0.6864482164382935,
"logits/chosen": -2.7330780029296875,
"logits/rejected": -2.738948106765747,
"logps/chosen": -0.7607365250587463,
"logps/rejected": -0.902021050453186,
"loss": 0.5025,
"nll_loss": 0.4629960060119629,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.038036830723285675,
"rewards/margins": 0.007064227946102619,
"rewards/rejected": -0.04510105401277542,
"step": 220
},
{
"epoch": 0.2412165705296277,
"grad_norm": 2.3286309701071986,
"learning_rate": 4.615663313770509e-06,
"log_odds_chosen": 0.30348774790763855,
"log_odds_ratio": -0.6618221402168274,
"logits/chosen": -2.681114673614502,
"logits/rejected": -2.680468797683716,
"logps/chosen": -0.8015350103378296,
"logps/rejected": -0.9835436940193176,
"loss": 0.5126,
"nll_loss": 0.47201746702194214,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.04007675126194954,
"rewards/margins": 0.009100432507693768,
"rewards/rejected": -0.04917718470096588,
"step": 230
},
{
"epoch": 0.2517042475091767,
"grad_norm": 2.498755216094707,
"learning_rate": 4.51848057057532e-06,
"log_odds_chosen": 0.28177785873413086,
"log_odds_ratio": -0.6470693945884705,
"logits/chosen": -2.7920804023742676,
"logits/rejected": -2.7859511375427246,
"logps/chosen": -0.7856557965278625,
"logps/rejected": -0.9694973826408386,
"loss": 0.5227,
"nll_loss": 0.49716347455978394,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.03928279131650925,
"rewards/margins": 0.009192083030939102,
"rewards/rejected": -0.04847487062215805,
"step": 240
},
{
"epoch": 0.26219192448872575,
"grad_norm": 2.5700569103186335,
"learning_rate": 4.427188724235731e-06,
"log_odds_chosen": 0.2942022681236267,
"log_odds_ratio": -0.6677531003952026,
"logits/chosen": -2.761166572570801,
"logits/rejected": -2.763213634490967,
"logps/chosen": -0.77226322889328,
"logps/rejected": -0.9335973858833313,
"loss": 0.4963,
"nll_loss": 0.4665839672088623,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.03861316293478012,
"rewards/margins": 0.008066706359386444,
"rewards/rejected": -0.04667987301945686,
"step": 250
},
{
"epoch": 0.2726796014682748,
"grad_norm": 2.5460185754878415,
"learning_rate": 4.341215710622295e-06,
"log_odds_chosen": 0.31073135137557983,
"log_odds_ratio": -0.6524397134780884,
"logits/chosen": -2.721327304840088,
"logits/rejected": -2.711200475692749,
"logps/chosen": -0.7779613137245178,
"logps/rejected": -0.9653064608573914,
"loss": 0.478,
"nll_loss": 0.40727710723876953,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.038898058235645294,
"rewards/margins": 0.009367265738546848,
"rewards/rejected": -0.048265330493450165,
"step": 260
},
{
"epoch": 0.2831672784478238,
"grad_norm": 2.63045792619979,
"learning_rate": 4.260064336151291e-06,
"log_odds_chosen": 0.2511529326438904,
"log_odds_ratio": -0.6676173806190491,
"logits/chosen": -2.757246255874634,
"logits/rejected": -2.7497289180755615,
"logps/chosen": -0.8231350779533386,
"logps/rejected": -0.9868103265762329,
"loss": 0.5115,
"nll_loss": 0.48606061935424805,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.04115675389766693,
"rewards/margins": 0.008183758705854416,
"rewards/rejected": -0.04934050887823105,
"step": 270
},
{
"epoch": 0.29365495542737285,
"grad_norm": 2.074128745122309,
"learning_rate": 4.183300132670378e-06,
"log_odds_chosen": 0.27424556016921997,
"log_odds_ratio": -0.6629655361175537,
"logits/chosen": -2.694702625274658,
"logits/rejected": -2.695335626602173,
"logps/chosen": -0.8050632476806641,
"logps/rejected": -0.9577094912528992,
"loss": 0.4891,
"nll_loss": 0.4250563681125641,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.0402531661093235,
"rewards/margins": 0.007632312830537558,
"rewards/rejected": -0.0478854700922966,
"step": 280
},
{
"epoch": 0.30414263240692185,
"grad_norm": 2.818316169672816,
"learning_rate": 4.110541536602925e-06,
"log_odds_chosen": 0.40846139192581177,
"log_odds_ratio": -0.6159543991088867,
"logits/chosen": -2.689415216445923,
"logits/rejected": -2.6885359287261963,
"logps/chosen": -0.729388952255249,
"logps/rejected": -0.9667993783950806,
"loss": 0.5032,
"nll_loss": 0.43972086906433105,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.03646944463253021,
"rewards/margins": 0.011870523914694786,
"rewards/rejected": -0.04833997040987015,
"step": 290
},
{
"epoch": 0.3146303093864709,
"grad_norm": 2.6319487345124495,
"learning_rate": 4.0414518843273805e-06,
"log_odds_chosen": 0.2938074767589569,
"log_odds_ratio": -0.675439178943634,
"logits/chosen": -2.746011257171631,
"logits/rejected": -2.719851016998291,
"logps/chosen": -0.7730266451835632,
"logps/rejected": -0.9800483584403992,
"loss": 0.5265,
"nll_loss": 0.45733898878097534,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.03865132853388786,
"rewards/margins": 0.01035108882933855,
"rewards/rejected": -0.04900241643190384,
"step": 300
},
{
"epoch": 0.3146303093864709,
"eval_log_odds_chosen": 0.32782861590385437,
"eval_log_odds_ratio": -0.6374222040176392,
"eval_logits/chosen": -2.75937819480896,
"eval_logits/rejected": -2.731720209121704,
"eval_logps/chosen": -0.7587753534317017,
"eval_logps/rejected": -0.9572128653526306,
"eval_loss": 0.5012248754501343,
"eval_nll_loss": 0.4652516841888428,
"eval_rewards/accuracies": 0.6329365372657776,
"eval_rewards/chosen": -0.037938766181468964,
"eval_rewards/margins": 0.009921879507601261,
"eval_rewards/rejected": -0.04786064475774765,
"eval_runtime": 143.3287,
"eval_samples_per_second": 13.912,
"eval_steps_per_second": 0.44,
"step": 300
},
{
"epoch": 0.3251179863660199,
"grad_norm": 2.303425231373124,
"learning_rate": 3.975732839729454e-06,
"log_odds_chosen": 0.23192088305950165,
"log_odds_ratio": -0.6818796396255493,
"logits/chosen": -2.7074503898620605,
"logits/rejected": -2.673837661743164,
"logps/chosen": -0.7971353530883789,
"logps/rejected": -0.9301053285598755,
"loss": 0.5302,
"nll_loss": 0.48708105087280273,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.039856769144535065,
"rewards/margins": 0.006648494862020016,
"rewards/rejected": -0.046505264937877655,
"step": 310
},
{
"epoch": 0.33560566334556896,
"grad_norm": 2.5118343787899735,
"learning_rate": 3.913118960624632e-06,
"log_odds_chosen": 0.3314226567745209,
"log_odds_ratio": -0.6417438387870789,
"logits/chosen": -2.7188448905944824,
"logits/rejected": -2.7005674839019775,
"logps/chosen": -0.7902022004127502,
"logps/rejected": -0.9723421335220337,
"loss": 0.4738,
"nll_loss": 0.44032588601112366,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.03951011225581169,
"rewards/margins": 0.009106996469199657,
"rewards/rejected": -0.048617102205753326,
"step": 320
},
{
"epoch": 0.34609334032511796,
"grad_norm": 2.490550595224948,
"learning_rate": 3.853373177942262e-06,
"log_odds_chosen": 0.29606467485427856,
"log_odds_ratio": -0.6935312151908875,
"logits/chosen": -2.6737678050994873,
"logits/rejected": -2.6778550148010254,
"logps/chosen": -0.7957532405853271,
"logps/rejected": -0.9609133005142212,
"loss": 0.5015,
"nll_loss": 0.48406466841697693,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.039787657558918,
"rewards/margins": 0.008258005604147911,
"rewards/rejected": -0.04804566502571106,
"step": 330
},
{
"epoch": 0.356581017304667,
"grad_norm": 2.455512863241718,
"learning_rate": 3.796283011826483e-06,
"log_odds_chosen": 0.2068498581647873,
"log_odds_ratio": -0.6988531947135925,
"logits/chosen": -2.656428575515747,
"logits/rejected": -2.67673659324646,
"logps/chosen": -0.7645977139472961,
"logps/rejected": -0.9020528793334961,
"loss": 0.5161,
"nll_loss": 0.46574801206588745,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.038229890167713165,
"rewards/margins": 0.006872760597616434,
"rewards/rejected": -0.04510264843702316,
"step": 340
},
{
"epoch": 0.36706869428421607,
"grad_norm": 2.3906859020418243,
"learning_rate": 3.7416573867739415e-06,
"log_odds_chosen": 0.32536062598228455,
"log_odds_ratio": -0.6628221273422241,
"logits/chosen": -2.7076945304870605,
"logits/rejected": -2.6763672828674316,
"logps/chosen": -0.7698060274124146,
"logps/rejected": -0.9597750902175903,
"loss": 0.4925,
"nll_loss": 0.468719482421875,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.03849030286073685,
"rewards/margins": 0.009498453699052334,
"rewards/rejected": -0.047988757491111755,
"step": 350
},
{
"epoch": 0.37755637126376507,
"grad_norm": 2.1635991647413824,
"learning_rate": 3.689323936863109e-06,
"log_odds_chosen": 0.4051761031150818,
"log_odds_ratio": -0.6067623496055603,
"logits/chosen": -2.6350862979888916,
"logits/rejected": -2.635108232498169,
"logps/chosen": -0.768888533115387,
"logps/rejected": -1.0009427070617676,
"loss": 0.5009,
"nll_loss": 0.45801717042922974,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.03844442963600159,
"rewards/margins": 0.011602705344557762,
"rewards/rejected": -0.0500471368432045,
"step": 360
},
{
"epoch": 0.3880440482433141,
"grad_norm": 2.3887899088845037,
"learning_rate": 3.6391267143702543e-06,
"log_odds_chosen": 0.4100113809108734,
"log_odds_ratio": -0.6096552014350891,
"logits/chosen": -2.707559108734131,
"logits/rejected": -2.6750998497009277,
"logps/chosen": -0.7636415362358093,
"logps/rejected": -1.0189807415008545,
"loss": 0.4701,
"nll_loss": 0.45124197006225586,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.038182083517313004,
"rewards/margins": 0.012766959145665169,
"rewards/rejected": -0.050949037075042725,
"step": 370
},
{
"epoch": 0.3985317252228631,
"grad_norm": 2.5794228625801225,
"learning_rate": 3.5909242322980396e-06,
"log_odds_chosen": 0.4701065421104431,
"log_odds_ratio": -0.5877975821495056,
"logits/chosen": -2.7147293090820312,
"logits/rejected": -2.700373888015747,
"logps/chosen": -0.7640558481216431,
"logps/rejected": -1.0210450887680054,
"loss": 0.4866,
"nll_loss": 0.4662235379219055,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.03820279613137245,
"rewards/margins": 0.012849463149905205,
"rewards/rejected": -0.05105225369334221,
"step": 380
},
{
"epoch": 0.4090194022024122,
"grad_norm": 2.2524505662506007,
"learning_rate": 3.544587784792833e-06,
"log_odds_chosen": 0.15358106791973114,
"log_odds_ratio": -0.6960343718528748,
"logits/chosen": -2.6469695568084717,
"logits/rejected": -2.6523191928863525,
"logps/chosen": -0.8073819875717163,
"logps/rejected": -0.9069193005561829,
"loss": 0.5052,
"nll_loss": 0.48589834570884705,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.04036910459399223,
"rewards/margins": 0.004976863972842693,
"rewards/rejected": -0.0453459694981575,
"step": 390
},
{
"epoch": 0.4195070791819612,
"grad_norm": 2.151733711875547,
"learning_rate": 3.5e-06,
"log_odds_chosen": 0.3257240355014801,
"log_odds_ratio": -0.6618676781654358,
"logits/chosen": -2.5556883811950684,
"logits/rejected": -2.5709598064422607,
"logps/chosen": -0.8370679616928101,
"logps/rejected": -1.0387462377548218,
"loss": 0.5194,
"nll_loss": 0.471977561712265,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.0418534018099308,
"rewards/margins": 0.010083912871778011,
"rewards/rejected": -0.05193731188774109,
"step": 400
},
{
"epoch": 0.4195070791819612,
"eval_log_odds_chosen": 0.3606604039669037,
"eval_log_odds_ratio": -0.6283872127532959,
"eval_logits/chosen": -2.6973965167999268,
"eval_logits/rejected": -2.664045572280884,
"eval_logps/chosen": -0.7416918277740479,
"eval_logps/rejected": -0.9558579921722412,
"eval_loss": 0.4911641776561737,
"eval_nll_loss": 0.455983966588974,
"eval_rewards/accuracies": 0.6428571343421936,
"eval_rewards/chosen": -0.03708459436893463,
"eval_rewards/margins": 0.010708308778703213,
"eval_rewards/rejected": -0.04779290035367012,
"eval_runtime": 137.3177,
"eval_samples_per_second": 14.521,
"eval_steps_per_second": 0.459,
"step": 400
},
{
"epoch": 0.4299947561615102,
"grad_norm": 2.234889439349526,
"learning_rate": 3.457053588273564e-06,
"log_odds_chosen": 0.22749297320842743,
"log_odds_ratio": -0.6977051496505737,
"logits/chosen": -2.6853058338165283,
"logits/rejected": -2.646806001663208,
"logps/chosen": -0.7714927792549133,
"logps/rejected": -0.9221086502075195,
"loss": 0.4951,
"nll_loss": 0.43608254194259644,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.038574643433094025,
"rewards/margins": 0.00753078842535615,
"rewards/rejected": -0.04610542953014374,
"step": 410
},
{
"epoch": 0.4404824331410593,
"grad_norm": 2.0285171917411766,
"learning_rate": 3.4156502553198657e-06,
"log_odds_chosen": 0.3810080885887146,
"log_odds_ratio": -0.6389856338500977,
"logits/chosen": -2.6045069694519043,
"logits/rejected": -2.621366024017334,
"logps/chosen": -0.7517096996307373,
"logps/rejected": -0.9603899121284485,
"loss": 0.4852,
"nll_loss": 0.42949992418289185,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.037585485726594925,
"rewards/margins": 0.01043400727212429,
"rewards/rejected": -0.048019491136074066,
"step": 420
},
{
"epoch": 0.4509701101206083,
"grad_norm": 2.508500818711511,
"learning_rate": 3.375699755192885e-06,
"log_odds_chosen": 0.3060067594051361,
"log_odds_ratio": -0.6428481936454773,
"logits/chosen": -2.6315762996673584,
"logits/rejected": -2.614450216293335,
"logps/chosen": -0.7450464367866516,
"logps/rejected": -0.9214862585067749,
"loss": 0.5054,
"nll_loss": 0.4888521730899811,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.03725232556462288,
"rewards/margins": 0.008821990340948105,
"rewards/rejected": -0.046074315905570984,
"step": 430
},
{
"epoch": 0.46145778710015734,
"grad_norm": 2.209049048242546,
"learning_rate": 3.337119062359573e-06,
"log_odds_chosen": 0.2785058617591858,
"log_odds_ratio": -0.6411095857620239,
"logits/chosen": -2.6460564136505127,
"logits/rejected": -2.6254661083221436,
"logps/chosen": -0.7616952061653137,
"logps/rejected": -0.9235254526138306,
"loss": 0.5024,
"nll_loss": 0.46845754981040955,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.038084764033555984,
"rewards/margins": 0.008091514930129051,
"rewards/rejected": -0.04617627337574959,
"step": 440
},
{
"epoch": 0.47194546407970633,
"grad_norm": 2.0098987626040574,
"learning_rate": 3.2998316455372222e-06,
"log_odds_chosen": 0.37491756677627563,
"log_odds_ratio": -0.648253321647644,
"logits/chosen": -2.6618144512176514,
"logits/rejected": -2.643500566482544,
"logps/chosen": -0.7266156673431396,
"logps/rejected": -0.9600238800048828,
"loss": 0.4828,
"nll_loss": 0.4462718069553375,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.03633078932762146,
"rewards/margins": 0.01167040504515171,
"rewards/rejected": -0.04800119251012802,
"step": 450
},
{
"epoch": 0.4824331410592554,
"grad_norm": 2.3085421987869785,
"learning_rate": 3.263766828841098e-06,
"log_odds_chosen": 0.2140667885541916,
"log_odds_ratio": -0.6971082091331482,
"logits/chosen": -2.6545071601867676,
"logits/rejected": -2.6458332538604736,
"logps/chosen": -0.8354724049568176,
"logps/rejected": -0.9942563772201538,
"loss": 0.4871,
"nll_loss": 0.48358869552612305,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.04177362099289894,
"rewards/margins": 0.007939198985695839,
"rewards/rejected": -0.04971281811594963,
"step": 460
},
{
"epoch": 0.4929208180388044,
"grad_norm": 2.58413257051123,
"learning_rate": 3.2288592281010976e-06,
"log_odds_chosen": 0.30273735523223877,
"log_odds_ratio": -0.6744717359542847,
"logits/chosen": -2.6462035179138184,
"logits/rejected": -2.6307010650634766,
"logps/chosen": -0.7793454527854919,
"logps/rejected": -0.9655405879020691,
"loss": 0.4932,
"nll_loss": 0.4597246050834656,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.038967277854681015,
"rewards/margins": 0.009309760294854641,
"rewards/rejected": -0.048277031630277634,
"step": 470
},
{
"epoch": 0.5034084950183534,
"grad_norm": 2.275276830168767,
"learning_rate": 3.195048252113469e-06,
"log_odds_chosen": 0.25159093737602234,
"log_odds_ratio": -0.6775428056716919,
"logits/chosen": -2.6590356826782227,
"logits/rejected": -2.649465560913086,
"logps/chosen": -0.7499970197677612,
"logps/rejected": -0.8869997262954712,
"loss": 0.4713,
"nll_loss": 0.4634857177734375,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.03749985247850418,
"rewards/margins": 0.006850133184343576,
"rewards/rejected": -0.04434997960925102,
"step": 480
},
{
"epoch": 0.5138961719979025,
"grad_norm": 2.134835184101472,
"learning_rate": 3.1622776601683796e-06,
"log_odds_chosen": 0.2592507004737854,
"log_odds_ratio": -0.6677337884902954,
"logits/chosen": -2.638939619064331,
"logits/rejected": -2.5990116596221924,
"logps/chosen": -0.8319272994995117,
"logps/rejected": -0.9564205408096313,
"loss": 0.4941,
"nll_loss": 0.4587552547454834,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.041596364229917526,
"rewards/margins": 0.006224661134183407,
"rewards/rejected": -0.04782102257013321,
"step": 490
},
{
"epoch": 0.5243838489774515,
"grad_norm": 2.3707837495895494,
"learning_rate": 3.1304951684997056e-06,
"log_odds_chosen": 0.25932976603507996,
"log_odds_ratio": -0.6785644292831421,
"logits/chosen": -2.690480947494507,
"logits/rejected": -2.6417829990386963,
"logps/chosen": -0.7875474095344543,
"logps/rejected": -0.9345542788505554,
"loss": 0.5008,
"nll_loss": 0.47637850046157837,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.0393773689866066,
"rewards/margins": 0.007350355386734009,
"rewards/rejected": -0.04672772437334061,
"step": 500
},
{
"epoch": 0.5243838489774515,
"eval_log_odds_chosen": 0.3873175382614136,
"eval_log_odds_ratio": -0.6208989024162292,
"eval_logits/chosen": -2.62943434715271,
"eval_logits/rejected": -2.5956878662109375,
"eval_logps/chosen": -0.7454984188079834,
"eval_logps/rejected": -0.9786220192909241,
"eval_loss": 0.4847143888473511,
"eval_nll_loss": 0.44987979531288147,
"eval_rewards/accuracies": 0.6507936716079712,
"eval_rewards/chosen": -0.03727491945028305,
"eval_rewards/margins": 0.011656176298856735,
"eval_rewards/rejected": -0.04893109202384949,
"eval_runtime": 138.4279,
"eval_samples_per_second": 14.405,
"eval_steps_per_second": 0.455,
"step": 500
},
{
"epoch": 0.5348715259570005,
"grad_norm": 1.9535668554599182,
"learning_rate": 3.0996520993903337e-06,
"log_odds_chosen": 0.32442158460617065,
"log_odds_ratio": -0.6475775837898254,
"logits/chosen": -2.6708967685699463,
"logits/rejected": -2.649402141571045,
"logps/chosen": -0.7484665513038635,
"logps/rejected": -0.9413715600967407,
"loss": 0.4786,
"nll_loss": 0.48495978116989136,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.03742332383990288,
"rewards/margins": 0.00964525155723095,
"rewards/rejected": -0.047068577259778976,
"step": 510
},
{
"epoch": 0.5453592029365496,
"grad_norm": 1.9645096615425393,
"learning_rate": 3.069703067574602e-06,
"log_odds_chosen": 0.2872227430343628,
"log_odds_ratio": -0.6613379716873169,
"logits/chosen": -2.6058475971221924,
"logits/rejected": -2.577051877975464,
"logps/chosen": -0.8017369508743286,
"logps/rejected": -0.9904945492744446,
"loss": 0.4897,
"nll_loss": 0.4331512451171875,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.04008684307336807,
"rewards/margins": 0.009437882341444492,
"rewards/rejected": -0.04952472820878029,
"step": 520
},
{
"epoch": 0.5558468799160986,
"grad_norm": 1.9526548988230616,
"learning_rate": 3.0406056993414858e-06,
"log_odds_chosen": 0.42971426248550415,
"log_odds_ratio": -0.641510009765625,
"logits/chosen": -2.6119577884674072,
"logits/rejected": -2.5998666286468506,
"logps/chosen": -0.7399083375930786,
"logps/rejected": -1.0167956352233887,
"loss": 0.4914,
"nll_loss": 0.41224998235702515,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.03699541836977005,
"rewards/margins": 0.013844366185367107,
"rewards/rejected": -0.050839781761169434,
"step": 530
},
{
"epoch": 0.5663345568956476,
"grad_norm": 1.9884035673972174,
"learning_rate": 3.012320380383546e-06,
"log_odds_chosen": 0.21374063193798065,
"log_odds_ratio": -0.6833196878433228,
"logits/chosen": -2.6167845726013184,
"logits/rejected": -2.599025011062622,
"logps/chosen": -0.7700163125991821,
"logps/rejected": -0.890272319316864,
"loss": 0.5043,
"nll_loss": 0.47903138399124146,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.03850081190466881,
"rewards/margins": 0.006012803874909878,
"rewards/rejected": -0.04451362043619156,
"step": 540
},
{
"epoch": 0.5768222338751966,
"grad_norm": 2.186607185927277,
"learning_rate": 2.9848100289785456e-06,
"log_odds_chosen": 0.45103105902671814,
"log_odds_ratio": -0.6082615852355957,
"logits/chosen": -2.6567091941833496,
"logits/rejected": -2.609574794769287,
"logps/chosen": -0.7585142850875854,
"logps/rejected": -1.0295699834823608,
"loss": 0.4918,
"nll_loss": 0.48958802223205566,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.03792571276426315,
"rewards/margins": 0.01355278305709362,
"rewards/rejected": -0.05147849768400192,
"step": 550
},
{
"epoch": 0.5873099108547457,
"grad_norm": 2.1145358879634872,
"learning_rate": 2.958039891549808e-06,
"log_odds_chosen": 0.2827582359313965,
"log_odds_ratio": -0.6594165563583374,
"logits/chosen": -2.6023669242858887,
"logits/rejected": -2.574957847595215,
"logps/chosen": -0.7867820858955383,
"logps/rejected": -0.9555041193962097,
"loss": 0.4774,
"nll_loss": 0.45714274048805237,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.0393391028046608,
"rewards/margins": 0.008436103351414204,
"rewards/rejected": -0.047775208950042725,
"step": 560
},
{
"epoch": 0.5977975878342947,
"grad_norm": 2.3757421806444343,
"learning_rate": 2.9319773580418683e-06,
"log_odds_chosen": 0.2533697485923767,
"log_odds_ratio": -0.6926103830337524,
"logits/chosen": -2.662379264831543,
"logits/rejected": -2.6397509574890137,
"logps/chosen": -0.7862294316291809,
"logps/rejected": -0.9584717750549316,
"loss": 0.463,
"nll_loss": 0.4819509983062744,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.039311472326517105,
"rewards/margins": 0.00861212145537138,
"rewards/rejected": -0.04792358726263046,
"step": 570
},
{
"epoch": 0.6082852648138437,
"grad_norm": 2.172213103107974,
"learning_rate": 2.906591794880899e-06,
"log_odds_chosen": 0.3392280340194702,
"log_odds_ratio": -0.6386864185333252,
"logits/chosen": -2.6814630031585693,
"logits/rejected": -2.6795036792755127,
"logps/chosen": -0.7794855833053589,
"logps/rejected": -1.0036094188690186,
"loss": 0.4996,
"nll_loss": 0.4401033818721771,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.038974277675151825,
"rewards/margins": 0.011206192895770073,
"rewards/rejected": -0.05018047243356705,
"step": 580
},
{
"epoch": 0.6187729417933928,
"grad_norm": 2.0671922387658377,
"learning_rate": 2.8818543935741638e-06,
"log_odds_chosen": 0.3985132575035095,
"log_odds_ratio": -0.6514524221420288,
"logits/chosen": -2.6682472229003906,
"logits/rejected": -2.679994821548462,
"logps/chosen": -0.7318185567855835,
"logps/rejected": -0.9744182825088501,
"loss": 0.4678,
"nll_loss": 0.49909916520118713,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.03659093379974365,
"rewards/margins": 0.012129982002079487,
"rewards/rejected": -0.048720914870500565,
"step": 590
},
{
"epoch": 0.6292606187729418,
"grad_norm": 2.1967713493078604,
"learning_rate": 2.8577380332470414e-06,
"log_odds_chosen": 0.35757365822792053,
"log_odds_ratio": -0.6395149230957031,
"logits/chosen": -2.663159132003784,
"logits/rejected": -2.649722099304199,
"logps/chosen": -0.7385202646255493,
"logps/rejected": -0.9542753100395203,
"loss": 0.4725,
"nll_loss": 0.4449065625667572,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.03692600876092911,
"rewards/margins": 0.010787753388285637,
"rewards/rejected": -0.04771377146244049,
"step": 600
},
{
"epoch": 0.6292606187729418,
"eval_log_odds_chosen": 0.35674363374710083,
"eval_log_odds_ratio": -0.631996214389801,
"eval_logits/chosen": -2.647721767425537,
"eval_logits/rejected": -2.6147334575653076,
"eval_logps/chosen": -0.7248181104660034,
"eval_logps/rejected": -0.9394434690475464,
"eval_loss": 0.4794267416000366,
"eval_nll_loss": 0.44346076250076294,
"eval_rewards/accuracies": 0.6349206566810608,
"eval_rewards/chosen": -0.03624090179800987,
"eval_rewards/margins": 0.01073127705603838,
"eval_rewards/rejected": -0.046972181648015976,
"eval_runtime": 137.9534,
"eval_samples_per_second": 14.454,
"eval_steps_per_second": 0.457,
"step": 600
},
{
"epoch": 0.6397482957524908,
"grad_norm": 2.2292431160793216,
"learning_rate": 2.834217155626206e-06,
"log_odds_chosen": 0.23770160973072052,
"log_odds_ratio": -0.6840949654579163,
"logits/chosen": -2.5699760913848877,
"logits/rejected": -2.5653116703033447,
"logps/chosen": -0.7841805219650269,
"logps/rejected": -0.9241795539855957,
"loss": 0.4832,
"nll_loss": 0.4458464980125427,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.039209023118019104,
"rewards/margins": 0.006999955512583256,
"rewards/rejected": -0.046208981424570084,
"step": 610
},
{
"epoch": 0.6502359727320398,
"grad_norm": 2.2910730765164247,
"learning_rate": 2.811267651158746e-06,
"log_odds_chosen": 0.21747846901416779,
"log_odds_ratio": -0.6945130825042725,
"logits/chosen": -2.724179744720459,
"logits/rejected": -2.691539764404297,
"logps/chosen": -0.7931413054466248,
"logps/rejected": -0.943394660949707,
"loss": 0.487,
"nll_loss": 0.4727168679237366,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.03965706750750542,
"rewards/margins": 0.007512666285037994,
"rewards/rejected": -0.04716973379254341,
"step": 620
},
{
"epoch": 0.6607236497115889,
"grad_norm": 2.2609308397995616,
"learning_rate": 2.788866755113585e-06,
"log_odds_chosen": 0.29844212532043457,
"log_odds_ratio": -0.690433919429779,
"logits/chosen": -2.718883991241455,
"logits/rejected": -2.7198710441589355,
"logps/chosen": -0.7700183391571045,
"logps/rejected": -0.9475862383842468,
"loss": 0.4893,
"nll_loss": 0.48064035177230835,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.038500916212797165,
"rewards/margins": 0.00887839961796999,
"rewards/rejected": -0.04737931489944458,
"step": 630
},
{
"epoch": 0.6712113266911379,
"grad_norm": 2.6649009571693107,
"learning_rate": 2.7669929526473316e-06,
"log_odds_chosen": 0.4156903326511383,
"log_odds_ratio": -0.6158550977706909,
"logits/chosen": -2.7182445526123047,
"logits/rejected": -2.6942853927612305,
"logps/chosen": -0.7768423557281494,
"logps/rejected": -1.0251133441925049,
"loss": 0.4711,
"nll_loss": 0.41822823882102966,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.03884211927652359,
"rewards/margins": 0.012413550168275833,
"rewards/rejected": -0.051255665719509125,
"step": 640
},
{
"epoch": 0.6816990036706869,
"grad_norm": 2.0343884705834268,
"learning_rate": 2.745625891934577e-06,
"log_odds_chosen": 0.23737592995166779,
"log_odds_ratio": -0.6948662996292114,
"logits/chosen": -2.74450421333313,
"logits/rejected": -2.7467565536499023,
"logps/chosen": -0.7428392767906189,
"logps/rejected": -0.8866605758666992,
"loss": 0.4898,
"nll_loss": 0.4688393175601959,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.03714196756482124,
"rewards/margins": 0.00719106663018465,
"rewards/rejected": -0.04433303326368332,
"step": 650
},
{
"epoch": 0.6921866806502359,
"grad_norm": 2.0637062426142556,
"learning_rate": 2.7247463045653303e-06,
"log_odds_chosen": 0.36518558859825134,
"log_odds_ratio": -0.6426655650138855,
"logits/chosen": -2.7563986778259277,
"logits/rejected": -2.74312424659729,
"logps/chosen": -0.7905346751213074,
"logps/rejected": -1.0196200609207153,
"loss": 0.4859,
"nll_loss": 0.4443667531013489,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.03952673822641373,
"rewards/margins": 0.011454259976744652,
"rewards/rejected": -0.05098099634051323,
"step": 660
},
{
"epoch": 0.702674357629785,
"grad_norm": 1.992995386941069,
"learning_rate": 2.704335932501895e-06,
"log_odds_chosen": 0.490286260843277,
"log_odds_ratio": -0.6087489724159241,
"logits/chosen": -2.72459077835083,
"logits/rejected": -2.7280569076538086,
"logps/chosen": -0.7373065948486328,
"logps/rejected": -1.0489108562469482,
"loss": 0.4831,
"nll_loss": 0.42895203828811646,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.03686532750725746,
"rewards/margins": 0.01558021642267704,
"rewards/rejected": -0.05244554951786995,
"step": 670
},
{
"epoch": 0.713162034609334,
"grad_norm": 2.8251895935339886,
"learning_rate": 2.6843774609657963e-06,
"log_odds_chosen": 0.3856969177722931,
"log_odds_ratio": -0.6318041086196899,
"logits/chosen": -2.7299182415008545,
"logits/rejected": -2.699131488800049,
"logps/chosen": -0.7913435697555542,
"logps/rejected": -1.0201423168182373,
"loss": 0.4669,
"nll_loss": 0.45303601026535034,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.03956717997789383,
"rewards/margins": 0.011439927853643894,
"rewards/rejected": -0.05100711062550545,
"step": 680
},
{
"epoch": 0.723649711588883,
"grad_norm": 2.3126283290431457,
"learning_rate": 2.6648544566940834e-06,
"log_odds_chosen": 0.21687667071819305,
"log_odds_ratio": -0.7159269452095032,
"logits/chosen": -2.7354016304016113,
"logits/rejected": -2.722414493560791,
"logps/chosen": -0.7863477468490601,
"logps/rejected": -0.9429599046707153,
"loss": 0.4903,
"nll_loss": 0.5047397613525391,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.03931739181280136,
"rewards/margins": 0.007830603048205376,
"rewards/rejected": -0.04714799299836159,
"step": 690
},
{
"epoch": 0.7341373885684321,
"grad_norm": 2.323029961728673,
"learning_rate": 2.6457513110645903e-06,
"log_odds_chosen": 0.342260479927063,
"log_odds_ratio": -0.6298097968101501,
"logits/chosen": -2.679320812225342,
"logits/rejected": -2.6582911014556885,
"logps/chosen": -0.7469282746315002,
"logps/rejected": -0.9541714787483215,
"loss": 0.4875,
"nll_loss": 0.4991229474544525,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.03734641522169113,
"rewards/margins": 0.010362156666815281,
"rewards/rejected": -0.04770857095718384,
"step": 700
},
{
"epoch": 0.7341373885684321,
"eval_log_odds_chosen": 0.417955607175827,
"eval_log_odds_ratio": -0.6158252358436584,
"eval_logits/chosen": -2.7213134765625,
"eval_logits/rejected": -2.691012144088745,
"eval_logps/chosen": -0.7365118861198425,
"eval_logps/rejected": -0.9954525232315063,
"eval_loss": 0.47666841745376587,
"eval_nll_loss": 0.441643089056015,
"eval_rewards/accuracies": 0.6408730149269104,
"eval_rewards/chosen": -0.036825601011514664,
"eval_rewards/margins": 0.01294703409075737,
"eval_rewards/rejected": -0.049772635102272034,
"eval_runtime": 140.8809,
"eval_samples_per_second": 14.154,
"eval_steps_per_second": 0.447,
"step": 700
},
{
"epoch": 0.7446250655479811,
"grad_norm": 2.2253143227977055,
"learning_rate": 2.627053187642805e-06,
"log_odds_chosen": 0.31003057956695557,
"log_odds_ratio": -0.6495457887649536,
"logits/chosen": -2.7463955879211426,
"logits/rejected": -2.7364678382873535,
"logps/chosen": -0.7539780139923096,
"logps/rejected": -0.9565252065658569,
"loss": 0.4819,
"nll_loss": 0.4394974708557129,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.0376988984644413,
"rewards/margins": 0.010127360001206398,
"rewards/rejected": -0.047826264053583145,
"step": 710
},
{
"epoch": 0.7551127425275301,
"grad_norm": 1.9919741933282713,
"learning_rate": 2.6087459737497545e-06,
"log_odds_chosen": 0.40133896470069885,
"log_odds_ratio": -0.6439169645309448,
"logits/chosen": -2.7264726161956787,
"logits/rejected": -2.7285008430480957,
"logps/chosen": -0.7132266759872437,
"logps/rejected": -0.9523170590400696,
"loss": 0.4904,
"nll_loss": 0.42442673444747925,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.03566133230924606,
"rewards/margins": 0.011954517103731632,
"rewards/rejected": -0.04761584475636482,
"step": 720
},
{
"epoch": 0.7656004195070791,
"grad_norm": 2.5524316814232657,
"learning_rate": 2.5908162356916185e-06,
"log_odds_chosen": 0.1571163833141327,
"log_odds_ratio": -0.7166911363601685,
"logits/chosen": -2.805894613265991,
"logits/rejected": -2.7996468544006348,
"logps/chosen": -0.7540133595466614,
"logps/rejected": -0.8382581472396851,
"loss": 0.4937,
"nll_loss": 0.4598192572593689,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.03770066425204277,
"rewards/margins": 0.004212243482470512,
"rewards/rejected": -0.041912905871868134,
"step": 730
},
{
"epoch": 0.7760880964866282,
"grad_norm": 2.1353118528501684,
"learning_rate": 2.5732511773283276e-06,
"log_odds_chosen": 0.35292255878448486,
"log_odds_ratio": -0.625573992729187,
"logits/chosen": -2.8535656929016113,
"logits/rejected": -2.8482494354248047,
"logps/chosen": -0.7254922389984131,
"logps/rejected": -0.9415895342826843,
"loss": 0.4903,
"nll_loss": 0.4391508996486664,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.036274611949920654,
"rewards/margins": 0.010804859921336174,
"rewards/rejected": -0.04707947373390198,
"step": 740
},
{
"epoch": 0.7865757734661772,
"grad_norm": 2.076299852744321,
"learning_rate": 2.556038601690775e-06,
"log_odds_chosen": 0.27716293931007385,
"log_odds_ratio": -0.6662799119949341,
"logits/chosen": -2.8263370990753174,
"logits/rejected": -2.8200631141662598,
"logps/chosen": -0.7884274125099182,
"logps/rejected": -0.9425498843193054,
"loss": 0.5033,
"nll_loss": 0.460857093334198,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.03942137211561203,
"rewards/margins": 0.00770611921325326,
"rewards/rejected": -0.04712748900055885,
"step": 750
},
{
"epoch": 0.7970634504457262,
"grad_norm": 2.144911846283459,
"learning_rate": 2.539166875385041e-06,
"log_odds_chosen": 0.28878992795944214,
"log_odds_ratio": -0.6523956060409546,
"logits/chosen": -2.827876567840576,
"logits/rejected": -2.818580389022827,
"logps/chosen": -0.7346550226211548,
"logps/rejected": -0.9111967086791992,
"loss": 0.4719,
"nll_loss": 0.3698672354221344,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.0367327556014061,
"rewards/margins": 0.008827080950140953,
"rewards/rejected": -0.0455598309636116,
"step": 760
},
{
"epoch": 0.8075511274252754,
"grad_norm": 2.457074288822972,
"learning_rate": 2.522624895547565e-06,
"log_odds_chosen": 0.2632114589214325,
"log_odds_ratio": -0.6844597458839417,
"logits/chosen": -2.785381317138672,
"logits/rejected": -2.7871222496032715,
"logps/chosen": -0.796169102191925,
"logps/rejected": -0.9764283895492554,
"loss": 0.4935,
"nll_loss": 0.4608798921108246,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.039808452129364014,
"rewards/margins": 0.009012967348098755,
"rewards/rejected": -0.048821426928043365,
"step": 770
},
{
"epoch": 0.8180388044048243,
"grad_norm": 2.1250851855347417,
"learning_rate": 2.506402059138015e-06,
"log_odds_chosen": 0.2769099771976471,
"log_odds_ratio": -0.6522020101547241,
"logits/chosen": -2.8049657344818115,
"logits/rejected": -2.8198862075805664,
"logps/chosen": -0.7881239056587219,
"logps/rejected": -0.9357802271842957,
"loss": 0.5049,
"nll_loss": 0.5033601522445679,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.039406199008226395,
"rewards/margins": 0.0073828138411045074,
"rewards/rejected": -0.0467890128493309,
"step": 780
},
{
"epoch": 0.8285264813843733,
"grad_norm": 2.1157883450641966,
"learning_rate": 2.49048823437687e-06,
"log_odds_chosen": 0.4010138511657715,
"log_odds_ratio": -0.6229840517044067,
"logits/chosen": -2.8338706493377686,
"logits/rejected": -2.8394291400909424,
"logps/chosen": -0.7245864272117615,
"logps/rejected": -0.9661226272583008,
"loss": 0.4661,
"nll_loss": 0.4065842032432556,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.03622932359576225,
"rewards/margins": 0.01207680907100439,
"rewards/rejected": -0.04830613359808922,
"step": 790
},
{
"epoch": 0.8390141583639223,
"grad_norm": 2.3895076758034515,
"learning_rate": 2.474873734152916e-06,
"log_odds_chosen": 0.48685508966445923,
"log_odds_ratio": -0.5867618918418884,
"logits/chosen": -2.813389301300049,
"logits/rejected": -2.7975525856018066,
"logps/chosen": -0.6979315876960754,
"logps/rejected": -1.0023411512374878,
"loss": 0.4796,
"nll_loss": 0.3860110640525818,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.03489658236503601,
"rewards/margins": 0.015220480971038342,
"rewards/rejected": -0.050117067992687225,
"step": 800
},
{
"epoch": 0.8390141583639223,
"eval_log_odds_chosen": 0.4362943768501282,
"eval_log_odds_ratio": -0.6168639063835144,
"eval_logits/chosen": -2.8114309310913086,
"eval_logits/rejected": -2.791295289993286,
"eval_logps/chosen": -0.7415919303894043,
"eval_logps/rejected": -1.016213297843933,
"eval_loss": 0.4739992916584015,
"eval_nll_loss": 0.4396199584007263,
"eval_rewards/accuracies": 0.6507936716079712,
"eval_rewards/chosen": -0.037079595029354095,
"eval_rewards/margins": 0.013731070794165134,
"eval_rewards/rejected": -0.050810668617486954,
"eval_runtime": 137.8725,
"eval_samples_per_second": 14.463,
"eval_steps_per_second": 0.457,
"step": 800
},
{
"epoch": 0.8495018353434715,
"grad_norm": 2.2171962411607398,
"learning_rate": 2.459549291242073e-06,
"log_odds_chosen": 0.4064277708530426,
"log_odds_ratio": -0.6227105259895325,
"logits/chosen": -2.8798890113830566,
"logits/rejected": -2.8490796089172363,
"logps/chosen": -0.729169487953186,
"logps/rejected": -0.9680086970329285,
"loss": 0.4744,
"nll_loss": 0.4338308870792389,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.03645847737789154,
"rewards/margins": 0.011941960081458092,
"rewards/rejected": -0.04840043932199478,
"step": 810
},
{
"epoch": 0.8599895123230205,
"grad_norm": 2.607409368726623,
"learning_rate": 2.4445060351935238e-06,
"log_odds_chosen": 0.3091586232185364,
"log_odds_ratio": -0.6474903225898743,
"logits/chosen": -2.820725679397583,
"logits/rejected": -2.804964303970337,
"logps/chosen": -0.7581018805503845,
"logps/rejected": -0.9343080520629883,
"loss": 0.4661,
"nll_loss": 0.3911210894584656,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.03790510073304176,
"rewards/margins": 0.00881030224263668,
"rewards/rejected": -0.046715401113033295,
"step": 820
},
{
"epoch": 0.8704771893025695,
"grad_norm": 2.6267861444652034,
"learning_rate": 2.4297354707521817e-06,
"log_odds_chosen": 0.21734324097633362,
"log_odds_ratio": -0.7081775069236755,
"logits/chosen": -2.805722236633301,
"logits/rejected": -2.8377511501312256,
"logps/chosen": -0.777400553226471,
"logps/rejected": -0.915818989276886,
"loss": 0.4873,
"nll_loss": 0.4305228292942047,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.03887002915143967,
"rewards/margins": 0.0069209253415465355,
"rewards/rejected": -0.04579095169901848,
"step": 830
},
{
"epoch": 0.8809648662821186,
"grad_norm": 2.1614161917289363,
"learning_rate": 2.4152294576982395e-06,
"log_odds_chosen": 0.21988508105278015,
"log_odds_ratio": -0.6872502565383911,
"logits/chosen": -2.8258466720581055,
"logits/rejected": -2.8268680572509766,
"logps/chosen": -0.7874829769134521,
"logps/rejected": -0.9251054525375366,
"loss": 0.4733,
"nll_loss": 0.4440709054470062,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.03937415033578873,
"rewards/margins": 0.006881123874336481,
"rewards/rejected": -0.04625527560710907,
"step": 840
},
{
"epoch": 0.8914525432616676,
"grad_norm": 2.2102319814571074,
"learning_rate": 2.4009801919951233e-06,
"log_odds_chosen": 0.3129335641860962,
"log_odds_ratio": -0.6348214149475098,
"logits/chosen": -2.8568568229675293,
"logits/rejected": -2.865201473236084,
"logps/chosen": -0.749543309211731,
"logps/rejected": -0.9329560399055481,
"loss": 0.466,
"nll_loss": 0.4490523934364319,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.03747716546058655,
"rewards/margins": 0.009170634672045708,
"rewards/rejected": -0.046647801995277405,
"step": 850
},
{
"epoch": 0.9019402202412166,
"grad_norm": 2.082847476776939,
"learning_rate": 2.3869801881466573e-06,
"log_odds_chosen": 0.2860751152038574,
"log_odds_ratio": -0.6700129508972168,
"logits/chosen": -2.825407028198242,
"logits/rejected": -2.8392233848571777,
"logps/chosen": -0.7431017756462097,
"logps/rejected": -0.9103603363037109,
"loss": 0.4884,
"nll_loss": 0.4357692301273346,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.037155088037252426,
"rewards/margins": 0.008362922817468643,
"rewards/rejected": -0.045518018305301666,
"step": 860
},
{
"epoch": 0.9124278972207656,
"grad_norm": 2.188429034443825,
"learning_rate": 2.3732222626728365e-06,
"log_odds_chosen": 0.3270949423313141,
"log_odds_ratio": -0.6543049812316895,
"logits/chosen": -2.8709769248962402,
"logits/rejected": -2.888324022293091,
"logps/chosen": -0.7763268947601318,
"logps/rejected": -0.9964207410812378,
"loss": 0.454,
"nll_loss": 0.4407920837402344,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.03881634771823883,
"rewards/margins": 0.011004697531461716,
"rewards/rejected": -0.04982104152441025,
"step": 870
},
{
"epoch": 0.9229155742003147,
"grad_norm": 1.8451620085670009,
"learning_rate": 2.359699518621347e-06,
"log_odds_chosen": 0.3485734164714813,
"log_odds_ratio": -0.6351412534713745,
"logits/chosen": -2.9025185108184814,
"logits/rejected": -2.8809902667999268,
"logps/chosen": -0.7233132719993591,
"logps/rejected": -0.9310896992683411,
"loss": 0.4524,
"nll_loss": 0.4024543762207031,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.03616566210985184,
"rewards/margins": 0.010388821363449097,
"rewards/rejected": -0.046554479748010635,
"step": 880
},
{
"epoch": 0.9334032511798637,
"grad_norm": 1.9306573871485972,
"learning_rate": 2.3464053310389682e-06,
"log_odds_chosen": 0.3904303014278412,
"log_odds_ratio": -0.623832106590271,
"logits/chosen": -2.84079909324646,
"logits/rejected": -2.8426525592803955,
"logps/chosen": -0.7186557650566101,
"logps/rejected": -0.9262601137161255,
"loss": 0.4565,
"nll_loss": 0.42616167664527893,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.035932786762714386,
"rewards/margins": 0.010380217805504799,
"rewards/rejected": -0.046313002705574036,
"step": 890
},
{
"epoch": 0.9438909281594127,
"grad_norm": 2.157911532280212,
"learning_rate": 2.333333333333333e-06,
"log_odds_chosen": 0.3039458692073822,
"log_odds_ratio": -0.6423442959785461,
"logits/chosen": -2.896359920501709,
"logits/rejected": -2.9049692153930664,
"logps/chosen": -0.6981052756309509,
"logps/rejected": -0.8672422170639038,
"loss": 0.4851,
"nll_loss": 0.428159236907959,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.03490526229143143,
"rewards/margins": 0.008456850424408913,
"rewards/rejected": -0.04336211457848549,
"step": 900
},
{
"epoch": 0.9438909281594127,
"eval_log_odds_chosen": 0.36685651540756226,
"eval_log_odds_ratio": -0.6244728565216064,
"eval_logits/chosen": -2.969223976135254,
"eval_logits/rejected": -2.9542508125305176,
"eval_logps/chosen": -0.7142534852027893,
"eval_logps/rejected": -0.9323597550392151,
"eval_loss": 0.47141149640083313,
"eval_nll_loss": 0.4360823631286621,
"eval_rewards/accuracies": 0.6527777910232544,
"eval_rewards/chosen": -0.035712677985429764,
"eval_rewards/margins": 0.01090531051158905,
"eval_rewards/rejected": -0.046617984771728516,
"eval_runtime": 138.0948,
"eval_samples_per_second": 14.439,
"eval_steps_per_second": 0.456,
"step": 900
},
{
"epoch": 0.9543786051389617,
"grad_norm": 2.4004822961845957,
"learning_rate": 2.3204774044612855e-06,
"log_odds_chosen": 0.4948676526546478,
"log_odds_ratio": -0.626745343208313,
"logits/chosen": -2.963355302810669,
"logits/rejected": -2.9515814781188965,
"logps/chosen": -0.7483548521995544,
"logps/rejected": -1.0602718591690063,
"loss": 0.4776,
"nll_loss": 0.42798590660095215,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.03741774708032608,
"rewards/margins": 0.015595847740769386,
"rewards/rejected": -0.05301359295845032,
"step": 910
},
{
"epoch": 0.9648662821185108,
"grad_norm": 2.154391749062073,
"learning_rate": 2.3078316568852547e-06,
"log_odds_chosen": 0.3418871760368347,
"log_odds_ratio": -0.6459903717041016,
"logits/chosen": -2.8877079486846924,
"logits/rejected": -2.9023048877716064,
"logps/chosen": -0.7208271622657776,
"logps/rejected": -0.9329261779785156,
"loss": 0.4496,
"nll_loss": 0.39838844537734985,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.03604135662317276,
"rewards/margins": 0.010604949668049812,
"rewards/rejected": -0.04664631187915802,
"step": 920
},
{
"epoch": 0.9753539590980598,
"grad_norm": 2.4150467379552776,
"learning_rate": 2.2953904252438353e-06,
"log_odds_chosen": 0.31212860345840454,
"log_odds_ratio": -0.6628017425537109,
"logits/chosen": -2.9404473304748535,
"logits/rejected": -2.935260772705078,
"logps/chosen": -0.7885305285453796,
"logps/rejected": -1.0043061971664429,
"loss": 0.4752,
"nll_loss": 0.48344022035598755,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.0394265279173851,
"rewards/margins": 0.010788780637085438,
"rewards/rejected": -0.05021531134843826,
"step": 930
},
{
"epoch": 0.9858416360776088,
"grad_norm": 2.2491855597526786,
"learning_rate": 2.2831482556870475e-06,
"log_odds_chosen": 0.2697109580039978,
"log_odds_ratio": -0.6924097537994385,
"logits/chosen": -2.9477505683898926,
"logits/rejected": -2.9367494583129883,
"logps/chosen": -0.7188832759857178,
"logps/rejected": -0.8695234060287476,
"loss": 0.4739,
"nll_loss": 0.44516521692276,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.03594416007399559,
"rewards/margins": 0.007532012648880482,
"rewards/rejected": -0.0434761717915535,
"step": 940
},
{
"epoch": 0.9963293130571579,
"grad_norm": 2.438616188075854,
"learning_rate": 2.2710998958306758e-06,
"log_odds_chosen": 0.26511335372924805,
"log_odds_ratio": -0.6899660229682922,
"logits/chosen": -2.9427490234375,
"logits/rejected": -2.945517063140869,
"logps/chosen": -0.7803043127059937,
"logps/rejected": -0.9409860372543335,
"loss": 0.4993,
"nll_loss": 0.4652082026004791,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.03901521861553192,
"rewards/margins": 0.008034082129597664,
"rewards/rejected": -0.047049302607774734,
"step": 950
},
{
"epoch": 0.9994756161510225,
"step": 953,
"total_flos": 0.0,
"train_loss": 0.5301580581685054,
"train_runtime": 20737.8205,
"train_samples_per_second": 2.942,
"train_steps_per_second": 0.046
}
],
"logging_steps": 10,
"max_steps": 953,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}