Llama3 / trainer_state.json
sabersaleh's picture
Upload folder using huggingface_hub
7245445 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6281078251766553,
"eval_steps": 400,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010468463752944255,
"grad_norm": 424.8911502777972,
"learning_rate": 3.125e-08,
"loss": 713.6646,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -3.0610547065734863,
"rewards/margins": -0.43895024061203003,
"rewards/rejected": -2.6221041679382324,
"step": 5
},
{
"epoch": 0.02093692750588851,
"grad_norm": 403.8683081084132,
"learning_rate": 6.25e-08,
"loss": 717.3508,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -2.993378162384033,
"rewards/margins": -0.3109555244445801,
"rewards/rejected": -2.682422637939453,
"step": 10
},
{
"epoch": 0.031405391258832765,
"grad_norm": 487.9238814591701,
"learning_rate": 9.375e-08,
"loss": 713.6135,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -2.5931520462036133,
"rewards/margins": -0.21937386691570282,
"rewards/rejected": -2.3737778663635254,
"step": 15
},
{
"epoch": 0.04187385501177702,
"grad_norm": 540.6870493028796,
"learning_rate": 1.25e-07,
"loss": 712.8184,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -3.164547920227051,
"rewards/margins": -0.7127091288566589,
"rewards/rejected": -2.451838970184326,
"step": 20
},
{
"epoch": 0.05234231876472128,
"grad_norm": 427.3180170525652,
"learning_rate": 1.5625e-07,
"loss": 707.0853,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -2.7615244388580322,
"rewards/margins": -0.16162791848182678,
"rewards/rejected": -2.599896192550659,
"step": 25
},
{
"epoch": 0.06281078251766553,
"grad_norm": 383763.7480098094,
"learning_rate": 1.875e-07,
"loss": 715.4415,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -2.58443546295166,
"rewards/margins": -0.2884238660335541,
"rewards/rejected": -2.2960116863250732,
"step": 30
},
{
"epoch": 0.07327924627060979,
"grad_norm": 439.43685355063843,
"learning_rate": 2.1874999999999997e-07,
"loss": 717.8594,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -2.9699530601501465,
"rewards/margins": -0.2793353796005249,
"rewards/rejected": -2.690617799758911,
"step": 35
},
{
"epoch": 0.08374771002355404,
"grad_norm": 503.22488533065024,
"learning_rate": 2.5e-07,
"loss": 710.3533,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -2.8341801166534424,
"rewards/margins": -0.11880241334438324,
"rewards/rejected": -2.7153773307800293,
"step": 40
},
{
"epoch": 0.0942161737764983,
"grad_norm": 1337.4413216082382,
"learning_rate": 2.8125e-07,
"loss": 711.881,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -2.8177173137664795,
"rewards/margins": -0.19809791445732117,
"rewards/rejected": -2.619619607925415,
"step": 45
},
{
"epoch": 0.10468463752944256,
"grad_norm": 385.7756641011158,
"learning_rate": 3.125e-07,
"loss": 705.6052,
"rewards/accuracies": 0.5625,
"rewards/chosen": -2.5368785858154297,
"rewards/margins": 0.30438369512557983,
"rewards/rejected": -2.8412623405456543,
"step": 50
},
{
"epoch": 0.11515310128238682,
"grad_norm": 422.3765258964566,
"learning_rate": 3.4374999999999994e-07,
"loss": 706.4469,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -3.026949882507324,
"rewards/margins": -0.3166283369064331,
"rewards/rejected": -2.7103216648101807,
"step": 55
},
{
"epoch": 0.12562156503533106,
"grad_norm": 394.9174924028097,
"learning_rate": 3.75e-07,
"loss": 710.1363,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -2.814709424972534,
"rewards/margins": -0.3439286947250366,
"rewards/rejected": -2.470780849456787,
"step": 60
},
{
"epoch": 0.1360900287882753,
"grad_norm": 590.9528736566529,
"learning_rate": 4.0625000000000003e-07,
"loss": 704.8263,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -3.221498489379883,
"rewards/margins": -0.13808628916740417,
"rewards/rejected": -3.083411931991577,
"step": 65
},
{
"epoch": 0.14655849254121958,
"grad_norm": 666.685124573273,
"learning_rate": 4.3749999999999994e-07,
"loss": 709.7217,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -2.896270751953125,
"rewards/margins": -0.23611800372600555,
"rewards/rejected": -2.6601529121398926,
"step": 70
},
{
"epoch": 0.15702695629416383,
"grad_norm": 422.81236685781573,
"learning_rate": 4.6874999999999996e-07,
"loss": 701.5896,
"rewards/accuracies": 0.4375,
"rewards/chosen": -3.1475062370300293,
"rewards/margins": -0.23899349570274353,
"rewards/rejected": -2.908513069152832,
"step": 75
},
{
"epoch": 0.16749542004710807,
"grad_norm": 1161.6272059916828,
"learning_rate": 5e-07,
"loss": 712.8695,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -2.549727439880371,
"rewards/margins": 0.05642218515276909,
"rewards/rejected": -2.606149673461914,
"step": 80
},
{
"epoch": 0.17796388380005235,
"grad_norm": 691.6756141822095,
"learning_rate": 5.3125e-07,
"loss": 715.2848,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -2.962017774581909,
"rewards/margins": -0.18228396773338318,
"rewards/rejected": -2.779733896255493,
"step": 85
},
{
"epoch": 0.1884323475529966,
"grad_norm": 557.7156194405,
"learning_rate": 5.625e-07,
"loss": 710.1722,
"rewards/accuracies": 0.4375,
"rewards/chosen": -3.1114089488983154,
"rewards/margins": -0.5688842535018921,
"rewards/rejected": -2.542525053024292,
"step": 90
},
{
"epoch": 0.19890081130594087,
"grad_norm": 643.081848366494,
"learning_rate": 5.9375e-07,
"loss": 708.6848,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -2.728463888168335,
"rewards/margins": 0.1268891543149948,
"rewards/rejected": -2.8553528785705566,
"step": 95
},
{
"epoch": 0.2093692750588851,
"grad_norm": 1749.0480774010928,
"learning_rate": 5.999678242522831e-07,
"loss": 712.9789,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -3.0832152366638184,
"rewards/margins": -0.22872868180274963,
"rewards/rejected": -2.8544864654541016,
"step": 100
},
{
"epoch": 0.21983773881182936,
"grad_norm": 10514.346037549345,
"learning_rate": 5.998371221059621e-07,
"loss": 697.8367,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -3.205540895462036,
"rewards/margins": -0.16175726056098938,
"rewards/rejected": -3.043783187866211,
"step": 105
},
{
"epoch": 0.23030620256477363,
"grad_norm": 2830.1739417475483,
"learning_rate": 5.996059263493219e-07,
"loss": 714.0083,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -3.1492106914520264,
"rewards/margins": -0.41294175386428833,
"rewards/rejected": -2.736268997192383,
"step": 110
},
{
"epoch": 0.24077466631771788,
"grad_norm": 3697.295888208749,
"learning_rate": 5.992743144700869e-07,
"loss": 703.9895,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -3.2083535194396973,
"rewards/margins": -0.05745415762066841,
"rewards/rejected": -3.1508989334106445,
"step": 115
},
{
"epoch": 0.2512431300706621,
"grad_norm": 1170.631987747208,
"learning_rate": 5.988423976115163e-07,
"loss": 921.5164,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -3.09690523147583,
"rewards/margins": -0.07702343910932541,
"rewards/rejected": -3.0198817253112793,
"step": 120
},
{
"epoch": 0.26171159382360637,
"grad_norm": 1391.9151015605798,
"learning_rate": 5.983103205351532e-07,
"loss": 704.9495,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -3.0641629695892334,
"rewards/margins": 0.2125791758298874,
"rewards/rejected": -3.2767422199249268,
"step": 125
},
{
"epoch": 0.2721800575765506,
"grad_norm": 39002.795574769065,
"learning_rate": 5.976782615723061e-07,
"loss": 728.894,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -3.5640883445739746,
"rewards/margins": -0.004037248902022839,
"rewards/rejected": -3.5600509643554688,
"step": 130
},
{
"epoch": 0.2826485213294949,
"grad_norm": 1164.3315552399881,
"learning_rate": 5.969464325642798e-07,
"loss": 700.7844,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -3.116656541824341,
"rewards/margins": 0.2637160122394562,
"rewards/rejected": -3.3803725242614746,
"step": 135
},
{
"epoch": 0.29311698508243916,
"grad_norm": 2549527.6361433878,
"learning_rate": 5.961150787913738e-07,
"loss": 1252.7453,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -3.451526641845703,
"rewards/margins": -0.17801007628440857,
"rewards/rejected": -3.2735161781311035,
"step": 140
},
{
"epoch": 0.3035854488353834,
"grad_norm": 2301.1157939792593,
"learning_rate": 5.951844788906746e-07,
"loss": 749.2581,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -3.379659652709961,
"rewards/margins": -0.495597779750824,
"rewards/rejected": -2.884061813354492,
"step": 145
},
{
"epoch": 0.31405391258832765,
"grad_norm": 1288.9212020876917,
"learning_rate": 5.941549447626671e-07,
"loss": 22400296550.4,
"rewards/accuracies": 0.53125,
"rewards/chosen": -3.371587038040161,
"rewards/margins": 0.21983376145362854,
"rewards/rejected": -3.5914206504821777,
"step": 150
},
{
"epoch": 0.3245223763412719,
"grad_norm": 4169.091186018576,
"learning_rate": 5.930268214666979e-07,
"loss": 689.9577,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -3.393592119216919,
"rewards/margins": 0.27183833718299866,
"rewards/rejected": -3.6654305458068848,
"step": 155
},
{
"epoch": 0.33499084009421615,
"grad_norm": 40987.876210824266,
"learning_rate": 5.918004871053251e-07,
"loss": 699.906,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -3.6145877838134766,
"rewards/margins": 0.1472555547952652,
"rewards/rejected": -3.76184344291687,
"step": 160
},
{
"epoch": 0.34545930384716045,
"grad_norm": 3446.7363874580406,
"learning_rate": 5.904763526975934e-07,
"loss": 700.4801,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -3.6712746620178223,
"rewards/margins": 0.03212170675396919,
"rewards/rejected": -3.7033963203430176,
"step": 165
},
{
"epoch": 0.3559277676001047,
"grad_norm": 4383.523843958487,
"learning_rate": 5.890548620412763e-07,
"loss": 696.9372,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -4.037501335144043,
"rewards/margins": -0.23374083638191223,
"rewards/rejected": -3.803760528564453,
"step": 170
},
{
"epoch": 0.36639623135304894,
"grad_norm": 1968.8221017002966,
"learning_rate": 5.875364915641322e-07,
"loss": 693.6001,
"rewards/accuracies": 0.5625,
"rewards/chosen": -4.061675071716309,
"rewards/margins": 0.16252286732196808,
"rewards/rejected": -4.224198818206787,
"step": 175
},
{
"epoch": 0.3768646951059932,
"grad_norm": 4165.273001929711,
"learning_rate": 5.859217501642258e-07,
"loss": 689.4774,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -3.6556944847106934,
"rewards/margins": 0.33984482288360596,
"rewards/rejected": -3.995539903640747,
"step": 180
},
{
"epoch": 0.38733315885893743,
"grad_norm": 7680.7561691485025,
"learning_rate": 5.842111790393642e-07,
"loss": 690.4501,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -4.226962566375732,
"rewards/margins": 0.14984741806983948,
"rewards/rejected": -4.376810073852539,
"step": 185
},
{
"epoch": 0.39780162261188173,
"grad_norm": 3514.664070908699,
"learning_rate": 5.824053515057091e-07,
"loss": 693.3683,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -3.8957512378692627,
"rewards/margins": 0.18127045035362244,
"rewards/rejected": -4.077021598815918,
"step": 190
},
{
"epoch": 0.408270086364826,
"grad_norm": 6684.674851679545,
"learning_rate": 5.805048728056245e-07,
"loss": 685.6387,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -4.011441707611084,
"rewards/margins": 0.16933482885360718,
"rewards/rejected": -4.180776596069336,
"step": 195
},
{
"epoch": 0.4187385501177702,
"grad_norm": 2817.010612327531,
"learning_rate": 5.785103799048218e-07,
"loss": 691.3805,
"rewards/accuracies": 0.53125,
"rewards/chosen": -4.5704665184021,
"rewards/margins": 0.05775844305753708,
"rewards/rejected": -4.628224849700928,
"step": 200
},
{
"epoch": 0.42920701387071447,
"grad_norm": 12460.132704854665,
"learning_rate": 5.764225412788754e-07,
"loss": 690.0626,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -4.749141693115234,
"rewards/margins": 0.10000785440206528,
"rewards/rejected": -4.849149703979492,
"step": 205
},
{
"epoch": 0.4396754776236587,
"grad_norm": 3156.835029013167,
"learning_rate": 5.742420566891749e-07,
"loss": 679.2428,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -3.9751620292663574,
"rewards/margins": 0.5410782098770142,
"rewards/rejected": -4.516240119934082,
"step": 210
},
{
"epoch": 0.45014394137660296,
"grad_norm": 4165.789445089526,
"learning_rate": 5.719696569483936e-07,
"loss": 679.8576,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -4.488650321960449,
"rewards/margins": 0.339005708694458,
"rewards/rejected": -4.827655792236328,
"step": 215
},
{
"epoch": 0.46061240512954726,
"grad_norm": 7010.571587146665,
"learning_rate": 5.696061036755478e-07,
"loss": 685.5709,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -4.9668354988098145,
"rewards/margins": 0.14022143185138702,
"rewards/rejected": -5.107056617736816,
"step": 220
},
{
"epoch": 0.4710808688824915,
"grad_norm": 4950.519059974548,
"learning_rate": 5.671521890407327e-07,
"loss": 680.7437,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -4.726534843444824,
"rewards/margins": 0.5676447749137878,
"rewards/rejected": -5.294179916381836,
"step": 225
},
{
"epoch": 0.48154933263543576,
"grad_norm": 7887.8682965510425,
"learning_rate": 5.64608735499618e-07,
"loss": 674.3191,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -4.650925159454346,
"rewards/margins": 0.5207107067108154,
"rewards/rejected": -5.17163610458374,
"step": 230
},
{
"epoch": 0.49201779638838,
"grad_norm": 5967.489228784308,
"learning_rate": 5.619765955177932e-07,
"loss": 680.9146,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -4.723302841186523,
"rewards/margins": 0.5448298454284668,
"rewards/rejected": -5.268132209777832,
"step": 235
},
{
"epoch": 0.5024862601413242,
"grad_norm": 4067.749182919556,
"learning_rate": 5.592566512850545e-07,
"loss": 677.9534,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -5.117281436920166,
"rewards/margins": 0.6115970611572266,
"rewards/rejected": -5.728878974914551,
"step": 240
},
{
"epoch": 0.5129547238942685,
"grad_norm": 3495.5145356721982,
"learning_rate": 5.564498144197293e-07,
"loss": 681.9477,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -5.144923210144043,
"rewards/margins": 0.386813759803772,
"rewards/rejected": -5.531736850738525,
"step": 245
},
{
"epoch": 0.5234231876472127,
"grad_norm": 4370.4657370961,
"learning_rate": 5.535570256631384e-07,
"loss": 679.4021,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -5.269853591918945,
"rewards/margins": 0.39420580863952637,
"rewards/rejected": -5.664059162139893,
"step": 250
},
{
"epoch": 0.533891651400157,
"grad_norm": 5546.329529459924,
"learning_rate": 5.505792545642954e-07,
"loss": 680.8774,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -5.6696882247924805,
"rewards/margins": 0.1662217080593109,
"rewards/rejected": -5.835909843444824,
"step": 255
},
{
"epoch": 0.5443601151531012,
"grad_norm": 3396.1113411173433,
"learning_rate": 5.475174991549528e-07,
"loss": 680.5286,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -5.477304935455322,
"rewards/margins": 0.6626185178756714,
"rewards/rejected": -6.139924049377441,
"step": 260
},
{
"epoch": 0.5548285789060455,
"grad_norm": 7509.706842299371,
"learning_rate": 5.443727856151007e-07,
"loss": 667.1712,
"rewards/accuracies": 0.59375,
"rewards/chosen": -5.695134162902832,
"rewards/margins": 0.6776683330535889,
"rewards/rejected": -6.372802734375,
"step": 265
},
{
"epoch": 0.5652970426589898,
"grad_norm": 5167.959854781231,
"learning_rate": 5.411461679290317e-07,
"loss": 678.3353,
"rewards/accuracies": 0.5625,
"rewards/chosen": -5.676094055175781,
"rewards/margins": 0.755618691444397,
"rewards/rejected": -6.431711673736572,
"step": 270
},
{
"epoch": 0.575765506411934,
"grad_norm": 3674.961462097515,
"learning_rate": 5.378387275320869e-07,
"loss": 666.944,
"rewards/accuracies": 0.625,
"rewards/chosen": -5.402568817138672,
"rewards/margins": 0.7821658253669739,
"rewards/rejected": -6.18473482131958,
"step": 275
},
{
"epoch": 0.5862339701648783,
"grad_norm": 5634.831880478573,
"learning_rate": 5.34451572948201e-07,
"loss": 670.9914,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -6.517806053161621,
"rewards/margins": 0.6073935627937317,
"rewards/rejected": -7.125199794769287,
"step": 280
},
{
"epoch": 0.5967024339178225,
"grad_norm": 10174.679987145297,
"learning_rate": 5.309858394183691e-07,
"loss": 674.4187,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -6.75530481338501,
"rewards/margins": 0.6639969944953918,
"rewards/rejected": -7.419301509857178,
"step": 285
},
{
"epoch": 0.6071708976707668,
"grad_norm": 6705.843344302837,
"learning_rate": 5.274426885201582e-07,
"loss": 680.643,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -6.8337082862854,
"rewards/margins": 0.32110291719436646,
"rewards/rejected": -7.154810905456543,
"step": 290
},
{
"epoch": 0.6176393614237111,
"grad_norm": 29305.105895087316,
"learning_rate": 5.238233077783925e-07,
"loss": 663.5017,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -5.573851585388184,
"rewards/margins": 0.6479231715202332,
"rewards/rejected": -6.221774578094482,
"step": 295
},
{
"epoch": 0.6281078251766553,
"grad_norm": 4360.840909716472,
"learning_rate": 5.201289102671411e-07,
"loss": 673.6718,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -6.299983024597168,
"rewards/margins": 0.7464480400085449,
"rewards/rejected": -7.046431064605713,
"step": 300
}
],
"logging_steps": 5,
"max_steps": 954,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}