STAIR-Llama-3.1-8B-DPO-3 / trainer_state.json
skyai798's picture
Upload folder using huggingface_hub
3101072 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999649982499125,
"eval_steps": 500,
"global_step": 357,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0280014000700035,
"grad_norm": 61.10678368436559,
"learning_rate": 1.3888888888888888e-07,
"logits/chosen": -0.6513304710388184,
"logits/rejected": -0.6610185503959656,
"logps/chosen": -396.359619140625,
"logps/rejected": -397.0393371582031,
"loss": 0.8679,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": 0.006401772145181894,
"rewards/margins": -0.00633437093347311,
"rewards/rejected": 0.012736144475638866,
"step": 10
},
{
"epoch": 0.056002800140007,
"grad_norm": 58.411802607555295,
"learning_rate": 2.7777777777777776e-07,
"logits/chosen": -0.6498872637748718,
"logits/rejected": -0.6464060544967651,
"logps/chosen": -357.53594970703125,
"logps/rejected": -362.6917419433594,
"loss": 0.8677,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.01474761962890625,
"rewards/margins": -0.0023844907991588116,
"rewards/rejected": -0.012363128364086151,
"step": 20
},
{
"epoch": 0.0840042002100105,
"grad_norm": 58.885273724206364,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.6749114394187927,
"logits/rejected": -0.6687039136886597,
"logps/chosen": -364.6935729980469,
"logps/rejected": -363.3437805175781,
"loss": 0.8652,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.002331914845854044,
"rewards/margins": 0.0017761134076863527,
"rewards/rejected": -0.004108029417693615,
"step": 30
},
{
"epoch": 0.112005600280014,
"grad_norm": 58.81350398572691,
"learning_rate": 4.998084579146532e-07,
"logits/chosen": -0.6095571517944336,
"logits/rejected": -0.6133966445922852,
"logps/chosen": -391.51312255859375,
"logps/rejected": -383.5360412597656,
"loss": 0.857,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 0.04013464227318764,
"rewards/margins": 0.03998289257287979,
"rewards/rejected": 0.00015174821601249278,
"step": 40
},
{
"epoch": 0.1400070003500175,
"grad_norm": 53.82936706874698,
"learning_rate": 4.976569787782584e-07,
"logits/chosen": -0.6407713294029236,
"logits/rejected": -0.6338817477226257,
"logps/chosen": -394.0837707519531,
"logps/rejected": -388.13946533203125,
"loss": 0.8571,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.0042350986041128635,
"rewards/margins": 0.02888796292245388,
"rewards/rejected": -0.03312305733561516,
"step": 50
},
{
"epoch": 0.168008400420021,
"grad_norm": 94.59632664663164,
"learning_rate": 4.931352528237397e-07,
"logits/chosen": -0.6345051527023315,
"logits/rejected": -0.6252551674842834,
"logps/chosen": -362.01007080078125,
"logps/rejected": -361.896484375,
"loss": 0.8467,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.026126855984330177,
"rewards/margins": 0.04506516456604004,
"rewards/rejected": -0.07119203358888626,
"step": 60
},
{
"epoch": 0.1960098004900245,
"grad_norm": 53.596742254187554,
"learning_rate": 4.862865560637862e-07,
"logits/chosen": -0.6581880450248718,
"logits/rejected": -0.6622239351272583,
"logps/chosen": -360.1644287109375,
"logps/rejected": -370.22357177734375,
"loss": 0.8469,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.03359580785036087,
"rewards/margins": 0.0706966444849968,
"rewards/rejected": -0.10429245233535767,
"step": 70
},
{
"epoch": 0.224011200560028,
"grad_norm": 58.433831169578426,
"learning_rate": 4.771764352146005e-07,
"logits/chosen": -0.6605185270309448,
"logits/rejected": -0.6589399576187134,
"logps/chosen": -385.06634521484375,
"logps/rejected": -387.2419738769531,
"loss": 0.8296,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.0589970238506794,
"rewards/margins": 0.06647703796625137,
"rewards/rejected": -0.12547405064105988,
"step": 80
},
{
"epoch": 0.2520126006300315,
"grad_norm": 57.687812779743524,
"learning_rate": 4.658920803689553e-07,
"logits/chosen": -0.6234251260757446,
"logits/rejected": -0.63193279504776,
"logps/chosen": -380.99102783203125,
"logps/rejected": -384.75,
"loss": 0.8274,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.09747395664453506,
"rewards/margins": 0.13489681482315063,
"rewards/rejected": -0.2323707789182663,
"step": 90
},
{
"epoch": 0.280014000700035,
"grad_norm": 51.987093732637376,
"learning_rate": 4.5254149052732074e-07,
"logits/chosen": -0.5935919880867004,
"logits/rejected": -0.5995978116989136,
"logps/chosen": -365.17034912109375,
"logps/rejected": -366.9688720703125,
"loss": 0.823,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.1324506253004074,
"rewards/margins": 0.11821047961711884,
"rewards/rejected": -0.25066110491752625,
"step": 100
},
{
"epoch": 0.3080154007700385,
"grad_norm": 56.36533929974317,
"learning_rate": 4.372524399734997e-07,
"logits/chosen": -0.6224404573440552,
"logits/rejected": -0.6308005452156067,
"logps/chosen": -366.1192626953125,
"logps/rejected": -365.95452880859375,
"loss": 0.8183,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.23297986388206482,
"rewards/margins": 0.2393256425857544,
"rewards/rejected": -0.4723054766654968,
"step": 110
},
{
"epoch": 0.336016800840042,
"grad_norm": 58.24416326380675,
"learning_rate": 4.201712553872657e-07,
"logits/chosen": -0.6303149461746216,
"logits/rejected": -0.6240934729576111,
"logps/chosen": -400.3144226074219,
"logps/rejected": -398.35565185546875,
"loss": 0.8163,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.15998268127441406,
"rewards/margins": 0.1608564555644989,
"rewards/rejected": -0.3208391070365906,
"step": 120
},
{
"epoch": 0.3640182009100455,
"grad_norm": 55.406602765547156,
"learning_rate": 4.014614153978704e-07,
"logits/chosen": -0.664055585861206,
"logits/rejected": -0.6637083888053894,
"logps/chosen": -348.1646423339844,
"logps/rejected": -348.63470458984375,
"loss": 0.8041,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.1901615858078003,
"rewards/margins": 0.11691661179065704,
"rewards/rejected": -0.30707818269729614,
"step": 130
},
{
"epoch": 0.392019600980049,
"grad_norm": 68.77886528211879,
"learning_rate": 3.8130198598165444e-07,
"logits/chosen": -0.6314767599105835,
"logits/rejected": -0.6304478645324707,
"logps/chosen": -422.37127685546875,
"logps/rejected": -416.79571533203125,
"loss": 0.8088,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.2302822768688202,
"rewards/margins": 0.11769070476293564,
"rewards/rejected": -0.3479730188846588,
"step": 140
},
{
"epoch": 0.4200210010500525,
"grad_norm": 59.72315250139388,
"learning_rate": 3.598859066780754e-07,
"logits/chosen": -0.6638253331184387,
"logits/rejected": -0.6717976331710815,
"logps/chosen": -417.35394287109375,
"logps/rejected": -419.6104431152344,
"loss": 0.7981,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.1914275586605072,
"rewards/margins": 0.2206917554140091,
"rewards/rejected": -0.4121193289756775,
"step": 150
},
{
"epoch": 0.448022401120056,
"grad_norm": 55.46653773859189,
"learning_rate": 3.374181440262409e-07,
"logits/chosen": -0.660588264465332,
"logits/rejected": -0.6529449224472046,
"logps/chosen": -362.0516052246094,
"logps/rejected": -363.2063293457031,
"loss": 0.7991,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.22989945113658905,
"rewards/margins": 0.2941688001155853,
"rewards/rejected": -0.5240682363510132,
"step": 160
},
{
"epoch": 0.4760238011900595,
"grad_norm": 52.29591301111268,
"learning_rate": 3.14113729894821e-07,
"logits/chosen": -0.6663147807121277,
"logits/rejected": -0.6645540595054626,
"logps/chosen": -349.26556396484375,
"logps/rejected": -350.56536865234375,
"loss": 0.8001,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.28285256028175354,
"rewards/margins": 0.2937370836734772,
"rewards/rejected": -0.5765896439552307,
"step": 170
},
{
"epoch": 0.504025201260063,
"grad_norm": 52.652777282426925,
"learning_rate": 2.9019570347986706e-07,
"logits/chosen": -0.6935344338417053,
"logits/rejected": -0.6966893076896667,
"logps/chosen": -408.3907165527344,
"logps/rejected": -406.89556884765625,
"loss": 0.7937,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.14246144890785217,
"rewards/margins": 0.26209157705307007,
"rewards/rejected": -0.40455299615859985,
"step": 180
},
{
"epoch": 0.5320266013300665,
"grad_norm": 53.707911382607165,
"learning_rate": 2.6589297666702654e-07,
"logits/chosen": -0.6629470586776733,
"logits/rejected": -0.6508482694625854,
"logps/chosen": -381.4786376953125,
"logps/rejected": -381.14105224609375,
"loss": 0.7828,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.22650186717510223,
"rewards/margins": 0.49067601561546326,
"rewards/rejected": -0.7171779274940491,
"step": 190
},
{
"epoch": 0.56002800140007,
"grad_norm": 58.031636496032235,
"learning_rate": 2.414381431880974e-07,
"logits/chosen": -0.7053166627883911,
"logits/rejected": -0.7067330479621887,
"logps/chosen": -338.40826416015625,
"logps/rejected": -340.7874755859375,
"loss": 0.7841,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.13104045391082764,
"rewards/margins": 0.4193459451198578,
"rewards/rejected": -0.550386369228363,
"step": 200
},
{
"epoch": 0.5880294014700735,
"grad_norm": 52.285898129134694,
"learning_rate": 2.1706525253979534e-07,
"logits/chosen": -0.6645469665527344,
"logits/rejected": -0.682064414024353,
"logps/chosen": -353.6531677246094,
"logps/rejected": -361.7914123535156,
"loss": 0.7812,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.2817208766937256,
"rewards/margins": 0.40158504247665405,
"rewards/rejected": -0.6833059191703796,
"step": 210
},
{
"epoch": 0.616030801540077,
"grad_norm": 51.20855890287249,
"learning_rate": 1.9300756996985379e-07,
"logits/chosen": -0.6860191822052002,
"logits/rejected": -0.6938604116439819,
"logps/chosen": -383.1531982421875,
"logps/rejected": -388.2940979003906,
"loss": 0.7743,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.1995268315076828,
"rewards/margins": 0.29305171966552734,
"rewards/rejected": -0.49257856607437134,
"step": 220
},
{
"epoch": 0.6440322016100805,
"grad_norm": 59.00218646947897,
"learning_rate": 1.6949534396892355e-07,
"logits/chosen": -0.63894122838974,
"logits/rejected": -0.6412523984909058,
"logps/chosen": -371.1944580078125,
"logps/rejected": -369.9986877441406,
"loss": 0.7781,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.2539726793766022,
"rewards/margins": 0.2992478013038635,
"rewards/rejected": -0.5532204508781433,
"step": 230
},
{
"epoch": 0.672033601680084,
"grad_norm": 54.18024076081892,
"learning_rate": 1.4675360263490295e-07,
"logits/chosen": -0.6566568613052368,
"logits/rejected": -0.6547525525093079,
"logps/chosen": -374.5245056152344,
"logps/rejected": -372.85205078125,
"loss": 0.7789,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.27056020498275757,
"rewards/margins": 0.2600334584712982,
"rewards/rejected": -0.5305936932563782,
"step": 240
},
{
"epoch": 0.7000350017500875,
"grad_norm": 51.99890637389821,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": -0.6490362882614136,
"logits/rejected": -0.6515687108039856,
"logps/chosen": -339.3143005371094,
"logps/rejected": -345.4483642578125,
"loss": 0.7694,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.21335434913635254,
"rewards/margins": 0.34682440757751465,
"rewards/rejected": -0.5601787567138672,
"step": 250
},
{
"epoch": 0.728036401820091,
"grad_norm": 51.948937918535066,
"learning_rate": 1.0444273293265149e-07,
"logits/chosen": -0.6950569748878479,
"logits/rejected": -0.6903547644615173,
"logps/chosen": -366.1841735839844,
"logps/rejected": -369.20501708984375,
"loss": 0.7654,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.26671379804611206,
"rewards/margins": 0.5104727149009705,
"rewards/rejected": -0.7771865129470825,
"step": 260
},
{
"epoch": 0.7560378018900945,
"grad_norm": 50.70399011598055,
"learning_rate": 8.527854855097224e-08,
"logits/chosen": -0.6942373514175415,
"logits/rejected": -0.6850725412368774,
"logps/chosen": -373.9687194824219,
"logps/rejected": -375.5534973144531,
"loss": 0.777,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.2907332181930542,
"rewards/margins": 0.28084948658943176,
"rewards/rejected": -0.5715826749801636,
"step": 270
},
{
"epoch": 0.784039201960098,
"grad_norm": 57.415336554214456,
"learning_rate": 6.769086121815423e-08,
"logits/chosen": -0.680923342704773,
"logits/rejected": -0.6828472018241882,
"logps/chosen": -355.0218505859375,
"logps/rejected": -355.49542236328125,
"loss": 0.7707,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.30371373891830444,
"rewards/margins": 0.31885650753974915,
"rewards/rejected": -0.622570276260376,
"step": 280
},
{
"epoch": 0.8120406020301015,
"grad_norm": 57.63174427513397,
"learning_rate": 5.184799714145557e-08,
"logits/chosen": -0.695022702217102,
"logits/rejected": -0.6762406826019287,
"logps/chosen": -365.63861083984375,
"logps/rejected": -363.7517395019531,
"loss": 0.7698,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.2936111092567444,
"rewards/margins": 0.30915942788124084,
"rewards/rejected": -0.6027705073356628,
"step": 290
},
{
"epoch": 0.840042002100105,
"grad_norm": 56.79055475776468,
"learning_rate": 3.790158337517127e-08,
"logits/chosen": -0.6702035069465637,
"logits/rejected": -0.6661104559898376,
"logps/chosen": -377.29071044921875,
"logps/rejected": -382.6650695800781,
"loss": 0.7656,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.349121630191803,
"rewards/margins": 0.44733327627182007,
"rewards/rejected": -0.7964549660682678,
"step": 300
},
{
"epoch": 0.8680434021701086,
"grad_norm": 55.63568503616908,
"learning_rate": 2.5985096645928934e-08,
"logits/chosen": -0.6938387155532837,
"logits/rejected": -0.6860832571983337,
"logps/chosen": -408.07073974609375,
"logps/rejected": -408.2176208496094,
"loss": 0.7705,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.4276718199253082,
"rewards/margins": 0.31639137864112854,
"rewards/rejected": -0.7440632581710815,
"step": 310
},
{
"epoch": 0.896044802240112,
"grad_norm": 51.80246792386731,
"learning_rate": 1.6212585889044366e-08,
"logits/chosen": -0.6435590386390686,
"logits/rejected": -0.6448679566383362,
"logps/chosen": -388.0117492675781,
"logps/rejected": -395.35443115234375,
"loss": 0.7644,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.27469509840011597,
"rewards/margins": 0.4029006063938141,
"rewards/rejected": -0.6775957345962524,
"step": 320
},
{
"epoch": 0.9240462023101155,
"grad_norm": 54.61689196980515,
"learning_rate": 8.677580722139671e-09,
"logits/chosen": -0.6612351536750793,
"logits/rejected": -0.6655117273330688,
"logps/chosen": -411.57568359375,
"logps/rejected": -413.49468994140625,
"loss": 0.7677,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.2688027024269104,
"rewards/margins": 0.27715611457824707,
"rewards/rejected": -0.5459588766098022,
"step": 330
},
{
"epoch": 0.952047602380119,
"grad_norm": 47.07624026939366,
"learning_rate": 3.452196302677901e-09,
"logits/chosen": -0.6746488809585571,
"logits/rejected": -0.6755790710449219,
"logps/chosen": -368.6392517089844,
"logps/rejected": -369.9941101074219,
"loss": 0.7629,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.2345232516527176,
"rewards/margins": 0.3673866391181946,
"rewards/rejected": -0.601909875869751,
"step": 340
},
{
"epoch": 0.9800490024501225,
"grad_norm": 51.32121740454206,
"learning_rate": 5.864431365401879e-10,
"logits/chosen": -0.6399149894714355,
"logits/rejected": -0.6450085639953613,
"logps/chosen": -406.9984436035156,
"logps/rejected": -398.86395263671875,
"loss": 0.7734,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.28699636459350586,
"rewards/margins": 0.3435482978820801,
"rewards/rejected": -0.6305446624755859,
"step": 350
},
{
"epoch": 0.999649982499125,
"step": 357,
"total_flos": 115812661985280.0,
"train_loss": 0.8014469694356624,
"train_runtime": 7763.3592,
"train_samples_per_second": 5.887,
"train_steps_per_second": 0.046
}
],
"logging_steps": 10,
"max_steps": 357,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 115812661985280.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}