Qwen2.5-7B-EN-Zero / trainer_state.json
watermelonhjg's picture
Upload folder using huggingface_hub
a266f29 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 10,
"global_step": 1125,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 440.27813415527345,
"epoch": 0.02666666666666667,
"grad_norm": 0.35731378197669983,
"kl": 7.561445236206054e-05,
"learning_rate": 2.654867256637168e-07,
"loss": 0.0,
"reward": 0.15875000283122062,
"reward_std": 0.25816794726997616,
"rewards/accuracy_reward": 0.12875000196509062,
"rewards/format_reward": 0.030000000586733223,
"step": 10
},
{
"completion_length": 467.41250762939455,
"epoch": 0.05333333333333334,
"grad_norm": 0.4308978319168091,
"kl": 0.00021507740020751954,
"learning_rate": 5.309734513274336e-07,
"loss": 0.0,
"reward": 0.15562500259839,
"reward_std": 0.27249111477285626,
"rewards/accuracy_reward": 0.13000000221654773,
"rewards/format_reward": 0.025625000568106772,
"step": 20
},
{
"completion_length": 396.5225093841553,
"epoch": 0.08,
"grad_norm": 52.731353759765625,
"kl": 0.004652214050292969,
"learning_rate": 7.964601769911505e-07,
"loss": 0.0002,
"reward": 0.31500000339001416,
"reward_std": 0.3808905828744173,
"rewards/accuracy_reward": 0.12812500270083546,
"rewards/format_reward": 0.18687500241212546,
"step": 30
},
{
"completion_length": 185.5206272125244,
"epoch": 0.10666666666666667,
"grad_norm": 0.5472067594528198,
"kl": 0.0395751953125,
"learning_rate": 1.0619469026548673e-06,
"loss": 0.0016,
"reward": 0.8362500123679638,
"reward_std": 0.41473600510507824,
"rewards/accuracy_reward": 0.10625000237487256,
"rewards/format_reward": 0.7300000071525574,
"step": 40
},
{
"completion_length": 92.74375104904175,
"epoch": 0.13333333333333333,
"grad_norm": 0.49235209822654724,
"kl": 0.11463623046875,
"learning_rate": 1.3274336283185841e-06,
"loss": 0.0046,
"reward": 1.1543750211596489,
"reward_std": 0.2653807267546654,
"rewards/accuracy_reward": 0.190625003259629,
"rewards/format_reward": 0.9637500062584877,
"step": 50
},
{
"completion_length": 182.87937908172609,
"epoch": 0.16,
"grad_norm": 0.7026166915893555,
"kl": 0.10775146484375,
"learning_rate": 1.592920353982301e-06,
"loss": 0.0043,
"reward": 1.330625008046627,
"reward_std": 0.3806515397503972,
"rewards/accuracy_reward": 0.4137500094715506,
"rewards/format_reward": 0.9168750137090683,
"step": 60
},
{
"completion_length": 286.4493797302246,
"epoch": 0.18666666666666668,
"grad_norm": 0.19512760639190674,
"kl": 0.071099853515625,
"learning_rate": 1.8584070796460177e-06,
"loss": 0.0028,
"reward": 1.4037500321865082,
"reward_std": 0.4500323969870806,
"rewards/accuracy_reward": 0.5518750101327896,
"rewards/format_reward": 0.8518750131130218,
"step": 70
},
{
"completion_length": 300.55125617980957,
"epoch": 0.21333333333333335,
"grad_norm": 0.19568626582622528,
"kl": 0.066259765625,
"learning_rate": 2.1238938053097345e-06,
"loss": 0.0027,
"reward": 1.5556250244379044,
"reward_std": 0.3508193654939532,
"rewards/accuracy_reward": 0.6306250087916851,
"rewards/format_reward": 0.9250000104308128,
"step": 80
},
{
"completion_length": 323.54563064575194,
"epoch": 0.24,
"grad_norm": 0.210642471909523,
"kl": 0.063372802734375,
"learning_rate": 2.3893805309734516e-06,
"loss": 0.0025,
"reward": 1.5068750262260437,
"reward_std": 0.37218285240232946,
"rewards/accuracy_reward": 0.5925000049173832,
"rewards/format_reward": 0.9143750101327897,
"step": 90
},
{
"completion_length": 335.67500762939454,
"epoch": 0.26666666666666666,
"grad_norm": 0.17836834490299225,
"kl": 0.062786865234375,
"learning_rate": 2.6548672566371683e-06,
"loss": 0.0025,
"reward": 1.5043750196695327,
"reward_std": 0.38465452194213867,
"rewards/accuracy_reward": 0.6131250146776438,
"rewards/format_reward": 0.8912500098347664,
"step": 100
},
{
"completion_length": 302.84687995910645,
"epoch": 0.29333333333333333,
"grad_norm": 0.20703768730163574,
"kl": 0.06512451171875,
"learning_rate": 2.920353982300885e-06,
"loss": 0.0026,
"reward": 1.4862500309944153,
"reward_std": 0.3997718315571547,
"rewards/accuracy_reward": 0.6006250124424696,
"rewards/format_reward": 0.8856250092387199,
"step": 110
},
{
"completion_length": 247.6512535095215,
"epoch": 0.32,
"grad_norm": 0.22252444922924042,
"kl": 0.071343994140625,
"learning_rate": 2.9996458567456176e-06,
"loss": 0.0029,
"reward": 1.4106250196695327,
"reward_std": 0.3797724399715662,
"rewards/accuracy_reward": 0.49812501221895217,
"rewards/format_reward": 0.912500011920929,
"step": 120
},
{
"completion_length": 275.83062744140625,
"epoch": 0.3466666666666667,
"grad_norm": 704.6102905273438,
"kl": 1.121209716796875,
"learning_rate": 2.997911680090067e-06,
"loss": 0.0449,
"reward": 1.491250029206276,
"reward_std": 0.33902281522750854,
"rewards/accuracy_reward": 0.5725000131875276,
"rewards/format_reward": 0.918750011920929,
"step": 130
},
{
"completion_length": 762.2556308746338,
"epoch": 0.37333333333333335,
"grad_norm": 6.110317230224609,
"kl": 169.40343017578124,
"learning_rate": 2.9947340923033686e-06,
"loss": 6.7759,
"reward": 0.2593750054948032,
"reward_std": 0.14749210346490144,
"rewards/accuracy_reward": 0.13187500317580997,
"rewards/format_reward": 0.12750000222586094,
"step": 140
},
{
"completion_length": 577.000634765625,
"epoch": 0.4,
"grad_norm": 1.6375890970230103,
"kl": 50.33268432617187,
"learning_rate": 2.99011615535883e-06,
"loss": 2.0188,
"reward": 0.5993750111199916,
"reward_std": 0.27639281619340184,
"rewards/accuracy_reward": 0.21562500661239029,
"rewards/format_reward": 0.3837500066496432,
"step": 150
},
{
"completion_length": 278.7000045776367,
"epoch": 0.4266666666666667,
"grad_norm": 0.1893930733203888,
"kl": 0.142626953125,
"learning_rate": 2.984062319172742e-06,
"loss": 0.0057,
"reward": 1.5175000250339508,
"reward_std": 0.3350706363096833,
"rewards/accuracy_reward": 0.5768750105053186,
"rewards/format_reward": 0.9406250104308128,
"step": 160
},
{
"completion_length": 283.7487537384033,
"epoch": 0.4533333333333333,
"grad_norm": 0.3801301419734955,
"kl": 0.07415771484375,
"learning_rate": 2.9765784173163723e-06,
"loss": 0.003,
"reward": 1.428125023841858,
"reward_std": 0.4501983530819416,
"rewards/accuracy_reward": 0.5725000098347663,
"rewards/format_reward": 0.8556250154972076,
"step": 170
},
{
"completion_length": 317.7350051879883,
"epoch": 0.48,
"grad_norm": 0.17540688812732697,
"kl": 0.07098388671875,
"learning_rate": 2.967671661394643e-06,
"loss": 0.0028,
"reward": 1.4887500375509262,
"reward_std": 0.346977224946022,
"rewards/accuracy_reward": 0.5512500097975135,
"rewards/format_reward": 0.9375000059604645,
"step": 180
},
{
"completion_length": 362.6912559509277,
"epoch": 0.5066666666666667,
"grad_norm": 0.17630642652511597,
"kl": 0.062762451171875,
"learning_rate": 2.957350634096912e-06,
"loss": 0.0025,
"reward": 1.4950000196695328,
"reward_std": 0.3771749962121248,
"rewards/accuracy_reward": 0.5831250146031379,
"rewards/format_reward": 0.9118750140070915,
"step": 190
},
{
"completion_length": 388.05750503540037,
"epoch": 0.5333333333333333,
"grad_norm": 0.14493422210216522,
"kl": 0.07095947265625,
"learning_rate": 2.945625280926568e-06,
"loss": 0.0028,
"reward": 1.4818750262260436,
"reward_std": 0.43593788109719755,
"rewards/accuracy_reward": 0.6106250114738941,
"rewards/format_reward": 0.8712500110268593,
"step": 200
},
{
"completion_length": 346.9868797302246,
"epoch": 0.56,
"grad_norm": 0.24546337127685547,
"kl": 0.07967529296875,
"learning_rate": 2.932506900617379e-06,
"loss": 0.0032,
"reward": 1.4625000298023223,
"reward_std": 0.43860488161444666,
"rewards/accuracy_reward": 0.5737500078976154,
"rewards/format_reward": 0.8887500122189522,
"step": 210
},
{
"completion_length": 309.0206298828125,
"epoch": 0.5866666666666667,
"grad_norm": 0.2144029587507248,
"kl": 0.11422119140625,
"learning_rate": 2.91800813424586e-06,
"loss": 0.0046,
"reward": 1.3962500274181366,
"reward_std": 0.46159769259393213,
"rewards/accuracy_reward": 0.5225000099278987,
"rewards/format_reward": 0.8737500131130218,
"step": 220
},
{
"completion_length": 313.8818809509277,
"epoch": 0.6133333333333333,
"grad_norm": 0.16003139317035675,
"kl": 0.17523193359375,
"learning_rate": 2.9021429530501337e-06,
"loss": 0.007,
"reward": 1.4012500315904617,
"reward_std": 0.4679255347698927,
"rewards/accuracy_reward": 0.5293750107288361,
"rewards/format_reward": 0.8718750134110451,
"step": 230
},
{
"completion_length": 302.87313079833984,
"epoch": 0.64,
"grad_norm": 0.17115922272205353,
"kl": 0.216015625,
"learning_rate": 2.8849266449670255e-06,
"loss": 0.0086,
"reward": 1.4156250298023223,
"reward_std": 0.5012115199118853,
"rewards/accuracy_reward": 0.5593750070780515,
"rewards/format_reward": 0.856250011920929,
"step": 240
},
{
"completion_length": 267.08875503540037,
"epoch": 0.6666666666666666,
"grad_norm": 0.237528994679451,
"kl": 0.2441162109375,
"learning_rate": 2.866375799900369e-06,
"loss": 0.0098,
"reward": 1.367500016093254,
"reward_std": 0.48268986456096175,
"rewards/accuracy_reward": 0.507500009611249,
"rewards/format_reward": 0.8600000113248825,
"step": 250
},
{
"completion_length": 271.5650035858154,
"epoch": 0.6933333333333334,
"grad_norm": 0.13727092742919922,
"kl": 0.19796142578125,
"learning_rate": 2.8465082937347156e-06,
"loss": 0.0079,
"reward": 1.4412500113248825,
"reward_std": 0.3789402700960636,
"rewards/accuracy_reward": 0.5393750105053187,
"rewards/format_reward": 0.9018750116229057,
"step": 260
},
{
"completion_length": 331.74375648498534,
"epoch": 0.72,
"grad_norm": 0.14567354321479797,
"kl": 0.2938720703125,
"learning_rate": 2.8253432711098524e-06,
"loss": 0.0118,
"reward": 1.3037500187754631,
"reward_std": 0.5119317132979632,
"rewards/accuracy_reward": 0.47687500678002837,
"rewards/format_reward": 0.8268750131130218,
"step": 270
},
{
"completion_length": 270.1787559509277,
"epoch": 0.7466666666666667,
"grad_norm": 0.16504357755184174,
"kl": 0.2083251953125,
"learning_rate": 2.802901126972727e-06,
"loss": 0.0083,
"reward": 1.407500022649765,
"reward_std": 0.4102827299386263,
"rewards/accuracy_reward": 0.5068750072270631,
"rewards/format_reward": 0.9006250113248825,
"step": 280
},
{
"completion_length": 394.0706314086914,
"epoch": 0.7733333333333333,
"grad_norm": 0.13634301722049713,
"kl": 0.244677734375,
"learning_rate": 2.7792034869245574e-06,
"loss": 0.0098,
"reward": 1.365000019967556,
"reward_std": 0.552520602196455,
"rewards/accuracy_reward": 0.5725000109523535,
"rewards/format_reward": 0.7925000131130219,
"step": 290
},
{
"completion_length": 359.3012565612793,
"epoch": 0.8,
"grad_norm": 0.12871386110782623,
"kl": 0.17838134765625,
"learning_rate": 2.7542731863820665e-06,
"loss": 0.0071,
"reward": 1.4187500134110451,
"reward_std": 0.45300735253840685,
"rewards/accuracy_reward": 0.5175000108778477,
"rewards/format_reward": 0.9012500166893005,
"step": 300
},
{
"completion_length": 319.09125633239745,
"epoch": 0.8266666666666667,
"grad_norm": 0.15812256932258606,
"kl": 0.2130859375,
"learning_rate": 2.7281342485729135e-06,
"loss": 0.0085,
"reward": 1.4262500196695327,
"reward_std": 0.47630340307950975,
"rewards/accuracy_reward": 0.5343750081956387,
"rewards/format_reward": 0.891875010728836,
"step": 310
},
{
"completion_length": 236.86187667846679,
"epoch": 0.8533333333333334,
"grad_norm": 0.14847490191459656,
"kl": 0.1891845703125,
"learning_rate": 2.7008118613865407e-06,
"loss": 0.0076,
"reward": 1.5068750202655792,
"reward_std": 0.35237235836684705,
"rewards/accuracy_reward": 0.5625000081956386,
"rewards/format_reward": 0.9443750113248826,
"step": 320
},
{
"completion_length": 280.200630569458,
"epoch": 0.88,
"grad_norm": 0.1344127207994461,
"kl": 0.1708984375,
"learning_rate": 2.6723323531027237e-06,
"loss": 0.0068,
"reward": 1.5125000149011611,
"reward_std": 0.392687563598156,
"rewards/accuracy_reward": 0.585625009983778,
"rewards/format_reward": 0.9268750116229058,
"step": 330
},
{
"completion_length": 340.8256294250488,
"epoch": 0.9066666666666666,
"grad_norm": 0.12343962490558624,
"kl": 0.20294189453125,
"learning_rate": 2.642723167021233e-06,
"loss": 0.0081,
"reward": 1.4475000262260438,
"reward_std": 0.42229729425162077,
"rewards/accuracy_reward": 0.554375009611249,
"rewards/format_reward": 0.8931250154972077,
"step": 340
},
{
"completion_length": 321.01875457763674,
"epoch": 0.9333333333333333,
"grad_norm": 0.1363190859556198,
"kl": 0.17435302734375,
"learning_rate": 2.612012835017041e-06,
"loss": 0.007,
"reward": 1.5350000232458114,
"reward_std": 0.38834156226366756,
"rewards/accuracy_reward": 0.6112500090152025,
"rewards/format_reward": 0.923750014603138,
"step": 350
},
{
"completion_length": 356.4068801879883,
"epoch": 0.96,
"grad_norm": 0.1496347039937973,
"kl": 0.245654296875,
"learning_rate": 2.5802309500465564e-06,
"loss": 0.0098,
"reward": 1.4331250190734863,
"reward_std": 0.5568725638091564,
"rewards/accuracy_reward": 0.5818750094622374,
"rewards/format_reward": 0.8512500137090683,
"step": 360
},
{
"completion_length": 315.9900047302246,
"epoch": 0.9866666666666667,
"grad_norm": 0.15564337372779846,
"kl": 0.2264892578125,
"learning_rate": 2.547408137631396e-06,
"loss": 0.0091,
"reward": 1.4825000256299972,
"reward_std": 0.47740888558328154,
"rewards/accuracy_reward": 0.5875000122934579,
"rewards/format_reward": 0.8950000122189522,
"step": 370
},
{
"completion_length": 292.8037563323975,
"epoch": 1.0133333333333334,
"grad_norm": 0.11661098897457123,
"kl": 0.18624267578125,
"learning_rate": 2.5135760263471446e-06,
"loss": 0.0075,
"reward": 1.5643750160932541,
"reward_std": 0.39525898918509483,
"rewards/accuracy_reward": 0.6393750097602606,
"rewards/format_reward": 0.9250000074505806,
"step": 380
},
{
"completion_length": 345.10375556945803,
"epoch": 1.04,
"grad_norm": 0.10983074456453323,
"kl": 0.2192138671875,
"learning_rate": 2.478767217345571e-06,
"loss": 0.0088,
"reward": 1.474375021457672,
"reward_std": 0.46512946095317603,
"rewards/accuracy_reward": 0.5981250118464232,
"rewards/format_reward": 0.8762500181794166,
"step": 390
},
{
"completion_length": 323.054377746582,
"epoch": 1.0666666666666667,
"grad_norm": 0.13792556524276733,
"kl": 0.1779541015625,
"learning_rate": 2.443015252939646e-06,
"loss": 0.0071,
"reward": 1.4987500220537187,
"reward_std": 0.3944621989503503,
"rewards/accuracy_reward": 0.578125012293458,
"rewards/format_reward": 0.9206250131130218,
"step": 400
},
{
"completion_length": 337.12000579833983,
"epoch": 1.0933333333333333,
"grad_norm": 0.10129429399967194,
"kl": 0.20455322265625,
"learning_rate": 2.406354584281642e-06,
"loss": 0.0082,
"reward": 1.52812502682209,
"reward_std": 0.4762104984372854,
"rewards/accuracy_reward": 0.6387500144541264,
"rewards/format_reward": 0.8893750131130218,
"step": 410
},
{
"completion_length": 317.5525062561035,
"epoch": 1.12,
"grad_norm": 0.12564826011657715,
"kl": 0.210009765625,
"learning_rate": 2.3688205381654686e-06,
"loss": 0.0084,
"reward": 1.4950000256299973,
"reward_std": 0.4526091780513525,
"rewards/accuracy_reward": 0.5925000101327896,
"rewards/format_reward": 0.9025000095367431,
"step": 420
},
{
"completion_length": 288.7437553405762,
"epoch": 1.1466666666666667,
"grad_norm": 0.10607220977544785,
"kl": 0.16053466796875,
"learning_rate": 2.330449282985219e-06,
"loss": 0.0064,
"reward": 1.590625023841858,
"reward_std": 0.3875279016792774,
"rewards/accuracy_reward": 0.6481250151991844,
"rewards/format_reward": 0.9425000056624413,
"step": 430
},
{
"completion_length": 359.6525058746338,
"epoch": 1.1733333333333333,
"grad_norm": 0.08999033272266388,
"kl": 0.16671142578125,
"learning_rate": 2.2912777938827377e-06,
"loss": 0.0067,
"reward": 1.5606250256299972,
"reward_std": 0.42853499911725523,
"rewards/accuracy_reward": 0.647500005364418,
"rewards/format_reward": 0.9131250083446503,
"step": 440
},
{
"completion_length": 400.6356315612793,
"epoch": 1.2,
"grad_norm": 0.09623633325099945,
"kl": 0.1906982421875,
"learning_rate": 2.251343817117798e-06,
"loss": 0.0076,
"reward": 1.514375028014183,
"reward_std": 0.49012119248509406,
"rewards/accuracy_reward": 0.6412500105798244,
"rewards/format_reward": 0.8731250122189522,
"step": 450
},
{
"completion_length": 363.6906307220459,
"epoch": 1.2266666666666666,
"grad_norm": 0.09504391998052597,
"kl": 0.1889892578125,
"learning_rate": 2.2106858336952155e-06,
"loss": 0.0076,
"reward": 1.4875000238418579,
"reward_std": 0.43876917734742166,
"rewards/accuracy_reward": 0.5943750120699406,
"rewards/format_reward": 0.8931250110268593,
"step": 460
},
{
"completion_length": 295.2150051116943,
"epoch": 1.2533333333333334,
"grad_norm": 0.14019078016281128,
"kl": 0.18583984375,
"learning_rate": 2.169343022283947e-06,
"loss": 0.0074,
"reward": 1.5243750274181367,
"reward_std": 0.41497170850634574,
"rewards/accuracy_reward": 0.5993750095367432,
"rewards/format_reward": 0.9250000089406967,
"step": 470
},
{
"completion_length": 295.10875549316404,
"epoch": 1.28,
"grad_norm": 0.18233150243759155,
"kl": 0.1969482421875,
"learning_rate": 2.127355221463915e-06,
"loss": 0.0079,
"reward": 1.4793750196695328,
"reward_std": 0.409528423845768,
"rewards/accuracy_reward": 0.5656250078231096,
"rewards/format_reward": 0.9137500107288361,
"step": 480
},
{
"completion_length": 321.9537551879883,
"epoch": 1.3066666666666666,
"grad_norm": 0.11022540926933289,
"kl": 0.22891845703125,
"learning_rate": 2.084762891336928e-06,
"loss": 0.0092,
"reward": 1.4631250321865081,
"reward_std": 0.47595020812004807,
"rewards/accuracy_reward": 0.5818750143051148,
"rewards/format_reward": 0.8812500149011612,
"step": 490
},
{
"completion_length": 329.26063079833983,
"epoch": 1.3333333333333333,
"grad_norm": 0.10155805945396423,
"kl": 0.20677490234375,
"learning_rate": 2.041607074538693e-06,
"loss": 0.0083,
"reward": 1.491875022649765,
"reward_std": 0.45835329182446005,
"rewards/accuracy_reward": 0.5962500102818012,
"rewards/format_reward": 0.8956250146031379,
"step": 500
},
{
"completion_length": 335.51375617980955,
"epoch": 1.3599999999999999,
"grad_norm": 0.11989594250917435,
"kl": 0.18310546875,
"learning_rate": 1.9979293566894888e-06,
"loss": 0.0073,
"reward": 1.529375022649765,
"reward_std": 0.4263172609731555,
"rewards/accuracy_reward": 0.6125000081956387,
"rewards/format_reward": 0.9168750137090683,
"step": 510
},
{
"completion_length": 348.62563133239746,
"epoch": 1.3866666666666667,
"grad_norm": 0.13145174086093903,
"kl": 0.18135986328125,
"learning_rate": 1.9537718263216137e-06,
"loss": 0.0073,
"reward": 1.4893750190734862,
"reward_std": 0.42451257910579443,
"rewards/accuracy_reward": 0.5843750070780516,
"rewards/format_reward": 0.9050000116229058,
"step": 520
},
{
"completion_length": 343.5318801879883,
"epoch": 1.4133333333333333,
"grad_norm": 0.11078327149152756,
"kl": 0.19310302734375,
"learning_rate": 1.909177034322215e-06,
"loss": 0.0077,
"reward": 1.5475000232458114,
"reward_std": 0.45979407913982867,
"rewards/accuracy_reward": 0.6500000119209289,
"rewards/format_reward": 0.8975000113248826,
"step": 530
},
{
"completion_length": 316.70063095092775,
"epoch": 1.44,
"grad_norm": 0.13980206847190857,
"kl": 0.1595458984375,
"learning_rate": 1.8641879529305908e-06,
"loss": 0.0064,
"reward": 1.5656250238418579,
"reward_std": 0.3269194783642888,
"rewards/accuracy_reward": 0.6243750140070915,
"rewards/format_reward": 0.9412500038743019,
"step": 540
},
{
"completion_length": 345.5168800354004,
"epoch": 1.4666666666666668,
"grad_norm": 0.12923210859298706,
"kl": 0.1690673828125,
"learning_rate": 1.818847934329465e-06,
"loss": 0.0068,
"reward": 1.4968750149011611,
"reward_std": 0.4168443286791444,
"rewards/accuracy_reward": 0.584375013411045,
"rewards/format_reward": 0.912500013411045,
"step": 550
},
{
"completion_length": 353.0875053405762,
"epoch": 1.4933333333333334,
"grad_norm": 0.1003999263048172,
"kl": 0.20703125,
"learning_rate": 1.7732006688701488e-06,
"loss": 0.0083,
"reward": 1.458750021457672,
"reward_std": 0.4720118813216686,
"rewards/accuracy_reward": 0.5718750119209289,
"rewards/format_reward": 0.8868750110268593,
"step": 560
},
{
"completion_length": 312.12188186645506,
"epoch": 1.52,
"grad_norm": 0.16526304185390472,
"kl": 0.20828857421875,
"learning_rate": 1.727290142971832e-06,
"loss": 0.0083,
"reward": 1.475625030696392,
"reward_std": 0.4474962681531906,
"rewards/accuracy_reward": 0.5737500060349703,
"rewards/format_reward": 0.901875014603138,
"step": 570
},
{
"completion_length": 318.03625679016113,
"epoch": 1.5466666666666666,
"grad_norm": 0.121968574821949,
"kl": 0.17958984375,
"learning_rate": 1.6811605967355838e-06,
"loss": 0.0072,
"reward": 1.5162500262260437,
"reward_std": 0.3832638839259744,
"rewards/accuracy_reward": 0.5818750113248825,
"rewards/format_reward": 0.9343750089406967,
"step": 580
},
{
"completion_length": 387.7318817138672,
"epoch": 1.5733333333333333,
"grad_norm": 0.11350403726100922,
"kl": 0.19808349609375,
"learning_rate": 1.6348564813138958e-06,
"loss": 0.0079,
"reward": 1.5393750190734863,
"reward_std": 0.43136950451880696,
"rewards/accuracy_reward": 0.6400000102818012,
"rewards/format_reward": 0.8993750125169754,
"step": 590
},
{
"completion_length": 416.05750732421876,
"epoch": 1.6,
"grad_norm": 0.17338570952415466,
"kl": 0.23121337890625,
"learning_rate": 1.588422416076859e-06,
"loss": 0.0092,
"reward": 1.4106250256299973,
"reward_std": 0.5426479373127222,
"rewards/accuracy_reward": 0.5600000061094761,
"rewards/format_reward": 0.8506250083446503,
"step": 600
},
{
"completion_length": 366.97062797546386,
"epoch": 1.6266666666666667,
"grad_norm": 0.5024117231369019,
"kl": 0.2103515625,
"learning_rate": 1.5419031456162405e-06,
"loss": 0.0084,
"reward": 1.4693750351667405,
"reward_std": 0.4877826740965247,
"rewards/accuracy_reward": 0.588750010728836,
"rewards/format_reward": 0.8806250095367432,
"step": 610
},
{
"completion_length": 346.7418815612793,
"epoch": 1.6533333333333333,
"grad_norm": 0.13114424049854279,
"kl": 0.25361328125,
"learning_rate": 1.4953434966288927e-06,
"loss": 0.0101,
"reward": 1.459375011920929,
"reward_std": 0.5048464283347129,
"rewards/accuracy_reward": 0.5925000093877315,
"rewards/format_reward": 0.8668750107288361,
"step": 620
},
{
"completion_length": 310.78437995910645,
"epoch": 1.6800000000000002,
"grad_norm": 0.13125382363796234,
"kl": 0.2142822265625,
"learning_rate": 1.4487883347210483e-06,
"loss": 0.0086,
"reward": 1.4768750280141831,
"reward_std": 0.4538666373118758,
"rewards/accuracy_reward": 0.5725000105798245,
"rewards/format_reward": 0.9043750137090683,
"step": 630
},
{
"completion_length": 267.90562973022463,
"epoch": 1.7066666666666666,
"grad_norm": 0.1306937038898468,
"kl": 0.1768310546875,
"learning_rate": 1.4022825211751206e-06,
"loss": 0.0071,
"reward": 1.5306250214576722,
"reward_std": 0.3767278905957937,
"rewards/accuracy_reward": 0.5837500121444463,
"rewards/format_reward": 0.9468750119209289,
"step": 640
},
{
"completion_length": 300.6637557983398,
"epoch": 1.7333333333333334,
"grad_norm": 0.1377246230840683,
"kl": 0.16981201171875,
"learning_rate": 1.355870869720669e-06,
"loss": 0.0068,
"reward": 1.5968750298023224,
"reward_std": 0.3849057173356414,
"rewards/accuracy_reward": 0.6581250108778477,
"rewards/format_reward": 0.938750010728836,
"step": 650
},
{
"completion_length": 308.26562995910643,
"epoch": 1.76,
"grad_norm": 0.1021685004234314,
"kl": 0.1703125,
"learning_rate": 1.3095981033511883e-06,
"loss": 0.0068,
"reward": 1.4662500232458116,
"reward_std": 0.3969815358519554,
"rewards/accuracy_reward": 0.537500013411045,
"rewards/format_reward": 0.9287500098347664,
"step": 660
},
{
"completion_length": 326.6156311035156,
"epoch": 1.7866666666666666,
"grad_norm": 0.15498338639736176,
"kl": 0.192919921875,
"learning_rate": 1.2635088112283316e-06,
"loss": 0.0077,
"reward": 1.4600000232458115,
"reward_std": 0.4722231462597847,
"rewards/accuracy_reward": 0.5543750062584877,
"rewards/format_reward": 0.9056250154972076,
"step": 670
},
{
"completion_length": 318.85250625610354,
"epoch": 1.8133333333333335,
"grad_norm": 0.11492400616407394,
"kl": 0.18997802734375,
"learning_rate": 1.217647405715099e-06,
"loss": 0.0076,
"reward": 1.5250000268220902,
"reward_std": 0.4434811886399984,
"rewards/accuracy_reward": 0.6143750160932541,
"rewards/format_reward": 0.9106250107288361,
"step": 680
},
{
"completion_length": 297.7112564086914,
"epoch": 1.8399999999999999,
"grad_norm": 0.10103321820497513,
"kl": 0.208349609375,
"learning_rate": 1.1720580795793865e-06,
"loss": 0.0083,
"reward": 1.4731250196695327,
"reward_std": 0.4129851894453168,
"rewards/accuracy_reward": 0.5556250110268592,
"rewards/format_reward": 0.917500016093254,
"step": 690
},
{
"completion_length": 283.28375282287595,
"epoch": 1.8666666666666667,
"grad_norm": 0.15206778049468994,
"kl": 0.18507080078125,
"learning_rate": 1.1267847634091462e-06,
"loss": 0.0074,
"reward": 1.5437500149011611,
"reward_std": 0.3982010118663311,
"rewards/accuracy_reward": 0.6137500122189522,
"rewards/format_reward": 0.9300000086426735,
"step": 700
},
{
"completion_length": 295.87500762939453,
"epoch": 1.8933333333333333,
"grad_norm": 0.09867344796657562,
"kl": 0.16317138671875,
"learning_rate": 1.0818710832801818e-06,
"loss": 0.0065,
"reward": 1.5937500238418578,
"reward_std": 0.3769227135926485,
"rewards/accuracy_reward": 0.6562500093132257,
"rewards/format_reward": 0.9375000089406967,
"step": 710
},
{
"completion_length": 309.8425048828125,
"epoch": 1.92,
"grad_norm": 0.11957939714193344,
"kl": 0.14814453125,
"learning_rate": 1.0373603187173825e-06,
"loss": 0.0059,
"reward": 1.5743750184774399,
"reward_std": 0.34016798436641693,
"rewards/accuracy_reward": 0.6218750074505806,
"rewards/format_reward": 0.9525000095367432,
"step": 720
},
{
"completion_length": 338.95750427246094,
"epoch": 1.9466666666666668,
"grad_norm": 0.12207633256912231,
"kl": 0.1755126953125,
"learning_rate": 9.932953609898924e-07,
"loss": 0.007,
"reward": 1.5612500250339507,
"reward_std": 0.40149390175938604,
"rewards/accuracy_reward": 0.6425000101327896,
"rewards/format_reward": 0.9187500134110451,
"step": 730
},
{
"completion_length": 344.77625541687013,
"epoch": 1.9733333333333334,
"grad_norm": 0.1330130398273468,
"kl": 0.18267822265625,
"learning_rate": 9.497186717804155e-07,
"loss": 0.0073,
"reward": 1.5181250244379043,
"reward_std": 0.3998035121709108,
"rewards/accuracy_reward": 0.6037500113248825,
"rewards/format_reward": 0.9143750131130218,
"step": 740
},
{
"completion_length": 338.76000556945803,
"epoch": 2.0,
"grad_norm": 0.1740158349275589,
"kl": 0.233056640625,
"learning_rate": 9.066722422684706e-07,
"loss": 0.0093,
"reward": 1.5250000178813934,
"reward_std": 0.42267096769064666,
"rewards/accuracy_reward": 0.607500009983778,
"rewards/format_reward": 0.9175000071525574,
"step": 750
},
{
"completion_length": 313.39938125610354,
"epoch": 2.026666666666667,
"grad_norm": 0.17406630516052246,
"kl": 0.16112060546875,
"learning_rate": 8.641975526670375e-07,
"loss": 0.0064,
"reward": 1.5193750202655791,
"reward_std": 0.38527730852365494,
"rewards/accuracy_reward": 0.5837500056251883,
"rewards/format_reward": 0.9356250122189522,
"step": 760
},
{
"completion_length": 335.1118816375732,
"epoch": 2.0533333333333332,
"grad_norm": 0.11110047250986099,
"kl": 0.169952392578125,
"learning_rate": 8.223355322515711e-07,
"loss": 0.0068,
"reward": 1.5487500220537185,
"reward_std": 0.4174341483041644,
"rewards/accuracy_reward": 0.6300000164657831,
"rewards/format_reward": 0.9187500104308128,
"step": 770
},
{
"completion_length": 329.0106330871582,
"epoch": 2.08,
"grad_norm": 0.12786993384361267,
"kl": 0.1651123046875,
"learning_rate": 7.811265199199153e-07,
"loss": 0.0066,
"reward": 1.4956250309944152,
"reward_std": 0.39866077806800604,
"rewards/accuracy_reward": 0.5656250093132258,
"rewards/format_reward": 0.9300000071525574,
"step": 780
},
{
"completion_length": 317.1981311798096,
"epoch": 2.1066666666666665,
"grad_norm": 0.15374892950057983,
"kl": 0.16614990234375,
"learning_rate": 7.406102253211037e-07,
"loss": 0.0066,
"reward": 1.554375022649765,
"reward_std": 0.38757612481713294,
"rewards/accuracy_reward": 0.6156250141561032,
"rewards/format_reward": 0.93875000923872,
"step": 790
},
{
"completion_length": 356.8893817901611,
"epoch": 2.1333333333333333,
"grad_norm": 0.09933959692716599,
"kl": 0.18651123046875,
"learning_rate": 7.008256905905285e-07,
"loss": 0.0075,
"reward": 1.4968750149011611,
"reward_std": 0.434663244150579,
"rewards/accuracy_reward": 0.5887500114738942,
"rewards/format_reward": 0.9081250086426735,
"step": 800
},
{
"completion_length": 334.13250694274905,
"epoch": 2.16,
"grad_norm": 0.10136093944311142,
"kl": 0.18509521484375,
"learning_rate": 6.618112527283208e-07,
"loss": 0.0074,
"reward": 1.5343750268220901,
"reward_std": 0.41338230539113285,
"rewards/accuracy_reward": 0.6100000128149986,
"rewards/format_reward": 0.9243750125169754,
"step": 810
},
{
"completion_length": 331.4456298828125,
"epoch": 2.1866666666666665,
"grad_norm": 0.10766751319169998,
"kl": 0.17252197265625,
"learning_rate": 6.236045066572228e-07,
"loss": 0.0069,
"reward": 1.5537500262260437,
"reward_std": 0.3903699716553092,
"rewards/accuracy_reward": 0.631250013038516,
"rewards/format_reward": 0.9225000113248825,
"step": 820
},
{
"completion_length": 299.4775054931641,
"epoch": 2.2133333333333334,
"grad_norm": 0.09992185235023499,
"kl": 0.1746337890625,
"learning_rate": 5.862422689955269e-07,
"loss": 0.007,
"reward": 1.5581250220537186,
"reward_std": 0.35976272616535426,
"rewards/accuracy_reward": 0.6206250134855509,
"rewards/format_reward": 0.9375000104308129,
"step": 830
},
{
"completion_length": 323.91625518798827,
"epoch": 2.24,
"grad_norm": 0.07573343813419342,
"kl": 0.167333984375,
"learning_rate": 5.497605425800119e-07,
"loss": 0.0067,
"reward": 1.5562500298023223,
"reward_std": 0.3865811740979552,
"rewards/accuracy_reward": 0.6243750125169754,
"rewards/format_reward": 0.9318750128149986,
"step": 840
},
{
"completion_length": 317.47250442504884,
"epoch": 2.2666666666666666,
"grad_norm": 0.11399682611227036,
"kl": 0.16575927734375,
"learning_rate": 5.141944817730411e-07,
"loss": 0.0066,
"reward": 1.5550000220537186,
"reward_std": 0.3845184024423361,
"rewards/accuracy_reward": 0.6225000105798244,
"rewards/format_reward": 0.9325000122189522,
"step": 850
},
{
"completion_length": 309.3156311035156,
"epoch": 2.2933333333333334,
"grad_norm": 0.16207897663116455,
"kl": 0.171240234375,
"learning_rate": 4.795783585872737e-07,
"loss": 0.0068,
"reward": 1.5768750220537187,
"reward_std": 0.35651344805955887,
"rewards/accuracy_reward": 0.6406250078231096,
"rewards/format_reward": 0.9362500071525574,
"step": 860
},
{
"completion_length": 327.7281307220459,
"epoch": 2.32,
"grad_norm": 0.14628101885318756,
"kl": 0.20667724609375,
"learning_rate": 4.4594552966061055e-07,
"loss": 0.0083,
"reward": 1.5000000178813935,
"reward_std": 0.4507721956819296,
"rewards/accuracy_reward": 0.5931250065565109,
"rewards/format_reward": 0.9068750143051147,
"step": 870
},
{
"completion_length": 349.7168792724609,
"epoch": 2.3466666666666667,
"grad_norm": 0.12691651284694672,
"kl": 0.19437255859375,
"learning_rate": 4.1332840411322373e-07,
"loss": 0.0078,
"reward": 1.5350000202655791,
"reward_std": 0.44358963407576085,
"rewards/accuracy_reward": 0.6350000135600566,
"rewards/format_reward": 0.9000000104308128,
"step": 880
},
{
"completion_length": 324.5581310272217,
"epoch": 2.3733333333333335,
"grad_norm": 0.17686228454113007,
"kl": 0.18092041015625,
"learning_rate": 3.817584123176149e-07,
"loss": 0.0072,
"reward": 1.564375028014183,
"reward_std": 0.4211408071219921,
"rewards/accuracy_reward": 0.6462500020861626,
"rewards/format_reward": 0.9181250125169754,
"step": 890
},
{
"completion_length": 320.24062728881836,
"epoch": 2.4,
"grad_norm": 0.12916938960552216,
"kl": 0.162109375,
"learning_rate": 3.5126597561182106e-07,
"loss": 0.0065,
"reward": 1.6000000268220902,
"reward_std": 0.3978784864768386,
"rewards/accuracy_reward": 0.6662500157952309,
"rewards/format_reward": 0.9337500125169754,
"step": 900
},
{
"completion_length": 329.11000671386716,
"epoch": 2.4266666666666667,
"grad_norm": 0.14607831835746765,
"kl": 0.2010498046875,
"learning_rate": 3.2188047698493277e-07,
"loss": 0.008,
"reward": 1.5743750303983688,
"reward_std": 0.39661835934966805,
"rewards/accuracy_reward": 0.641875009983778,
"rewards/format_reward": 0.9325000122189522,
"step": 910
},
{
"completion_length": 324.65750617980956,
"epoch": 2.453333333333333,
"grad_norm": 0.1194528341293335,
"kl": 0.170458984375,
"learning_rate": 2.9363023276319157e-07,
"loss": 0.0068,
"reward": 1.529375022649765,
"reward_std": 0.3923725115135312,
"rewards/accuracy_reward": 0.6093750111758709,
"rewards/format_reward": 0.9200000137090683,
"step": 920
},
{
"completion_length": 338.57312965393066,
"epoch": 2.48,
"grad_norm": 0.11762479692697525,
"kl": 0.1827392578125,
"learning_rate": 2.6654246532392954e-07,
"loss": 0.0073,
"reward": 1.5650000244379043,
"reward_std": 0.46239927411079407,
"rewards/accuracy_reward": 0.6618750102818012,
"rewards/format_reward": 0.903125011920929,
"step": 930
},
{
"completion_length": 309.353129196167,
"epoch": 2.506666666666667,
"grad_norm": 0.13050219416618347,
"kl": 0.17652587890625,
"learning_rate": 2.406432768636658e-07,
"loss": 0.0071,
"reward": 1.5493750274181366,
"reward_std": 0.4042684996500611,
"rewards/accuracy_reward": 0.6293750092387199,
"rewards/format_reward": 0.9200000166893005,
"step": 940
},
{
"completion_length": 318.59750709533694,
"epoch": 2.533333333333333,
"grad_norm": 0.10761768370866776,
"kl": 0.17911376953125,
"learning_rate": 2.1595762424561588e-07,
"loss": 0.0072,
"reward": 1.5812500208616256,
"reward_std": 0.39062286671251056,
"rewards/accuracy_reward": 0.6731250114738941,
"rewards/format_reward": 0.9081250116229057,
"step": 950
},
{
"completion_length": 320.97687950134275,
"epoch": 2.56,
"grad_norm": 0.11984766274690628,
"kl": 0.174462890625,
"learning_rate": 1.9250929495087294e-07,
"loss": 0.007,
"reward": 1.513750022649765,
"reward_std": 0.4024242129176855,
"rewards/accuracy_reward": 0.5925000108778476,
"rewards/format_reward": 0.921250008046627,
"step": 960
},
{
"completion_length": 318.3468811035156,
"epoch": 2.586666666666667,
"grad_norm": 0.0903179794549942,
"kl": 0.1794921875,
"learning_rate": 1.703208841564171e-07,
"loss": 0.0072,
"reward": 1.5906250119209289,
"reward_std": 0.40222617890685797,
"rewards/accuracy_reward": 0.6681250132620334,
"rewards/format_reward": 0.9225000128149986,
"step": 970
},
{
"completion_length": 302.10000534057616,
"epoch": 2.6133333333333333,
"grad_norm": 0.1316317617893219,
"kl": 0.1739013671875,
"learning_rate": 1.4941377296204656e-07,
"loss": 0.007,
"reward": 1.5762500196695328,
"reward_std": 0.3883319929242134,
"rewards/accuracy_reward": 0.6468750081956387,
"rewards/format_reward": 0.9293750151991844,
"step": 980
},
{
"completion_length": 319.00313262939454,
"epoch": 2.64,
"grad_norm": 0.1167697086930275,
"kl": 0.17550048828125,
"learning_rate": 1.2980810778722047e-07,
"loss": 0.007,
"reward": 1.5487500220537185,
"reward_std": 0.41231051571667193,
"rewards/accuracy_reward": 0.6293750144541264,
"rewards/format_reward": 0.9193750113248825,
"step": 990
},
{
"completion_length": 302.03875694274905,
"epoch": 2.6666666666666665,
"grad_norm": 0.11268429458141327,
"kl": 0.1760498046875,
"learning_rate": 1.1152278095764917e-07,
"loss": 0.007,
"reward": 1.5656250268220901,
"reward_std": 0.3788869069889188,
"rewards/accuracy_reward": 0.6337500095367432,
"rewards/format_reward": 0.9318750128149986,
"step": 1000
},
{
"completion_length": 314.637504196167,
"epoch": 2.6933333333333334,
"grad_norm": 0.09960366785526276,
"kl": 0.1811279296875,
"learning_rate": 9.457541250035762e-08,
"loss": 0.0072,
"reward": 1.5493750303983689,
"reward_std": 0.407411840185523,
"rewards/accuracy_reward": 0.6312500149011612,
"rewards/format_reward": 0.9181250125169754,
"step": 1010
},
{
"completion_length": 313.1293800354004,
"epoch": 2.7199999999999998,
"grad_norm": 0.11189663410186768,
"kl": 0.1806884765625,
"learning_rate": 7.898233316474724e-08,
"loss": 0.0072,
"reward": 1.5737500235438346,
"reward_std": 0.38581568617373707,
"rewards/accuracy_reward": 0.6493750059977174,
"rewards/format_reward": 0.9243750110268593,
"step": 1020
},
{
"completion_length": 294.27250480651855,
"epoch": 2.7466666666666666,
"grad_norm": 0.14609037339687347,
"kl": 0.18486328125,
"learning_rate": 6.475856868603475e-08,
"loss": 0.0074,
"reward": 1.6087500244379043,
"reward_std": 0.38516267221421,
"rewards/accuracy_reward": 0.6806250080466271,
"rewards/format_reward": 0.9281250074505806,
"step": 1030
},
{
"completion_length": 321.70563011169435,
"epoch": 2.7733333333333334,
"grad_norm": 0.10615142434835434,
"kl": 0.17933349609375,
"learning_rate": 5.191782530621553e-08,
"loss": 0.0072,
"reward": 1.5543750256299973,
"reward_std": 0.39196000918745993,
"rewards/accuracy_reward": 0.6375000063329935,
"rewards/format_reward": 0.9168750092387199,
"step": 1040
},
{
"completion_length": 329.3381317138672,
"epoch": 2.8,
"grad_norm": 0.10510344058275223,
"kl": 0.17608642578125,
"learning_rate": 4.0472476566516036e-08,
"loss": 0.007,
"reward": 1.5681250244379044,
"reward_std": 0.43267819974571464,
"rewards/accuracy_reward": 0.6543750088661909,
"rewards/format_reward": 0.9137500122189521,
"step": 1050
},
{
"completion_length": 322.91062889099123,
"epoch": 2.8266666666666667,
"grad_norm": 0.10127785056829453,
"kl": 0.168310546875,
"learning_rate": 3.043355138405418e-08,
"loss": 0.0067,
"reward": 1.5443750321865082,
"reward_std": 0.3810180738568306,
"rewards/accuracy_reward": 0.6168750144541264,
"rewards/format_reward": 0.9275000095367432,
"step": 1060
},
{
"completion_length": 327.8443794250488,
"epoch": 2.8533333333333335,
"grad_norm": 0.150223970413208,
"kl": 0.1885986328125,
"learning_rate": 2.1810723424204705e-08,
"loss": 0.0075,
"reward": 1.5512500286102295,
"reward_std": 0.4414610244333744,
"rewards/accuracy_reward": 0.6412500124424696,
"rewards/format_reward": 0.9100000098347664,
"step": 1070
},
{
"completion_length": 321.11000747680663,
"epoch": 2.88,
"grad_norm": 0.1418101191520691,
"kl": 0.19056396484375,
"learning_rate": 1.4612301778901604e-08,
"loss": 0.0076,
"reward": 1.511250025033951,
"reward_std": 0.43174309805035593,
"rewards/accuracy_reward": 0.5987500116229058,
"rewards/format_reward": 0.912500011920929,
"step": 1080
},
{
"completion_length": 326.74563217163086,
"epoch": 2.9066666666666667,
"grad_norm": 0.12356416881084442,
"kl": 0.19766845703125,
"learning_rate": 8.845222959868227e-09,
"loss": 0.0079,
"reward": 1.5175000235438347,
"reward_std": 0.394646280631423,
"rewards/accuracy_reward": 0.6131250087171793,
"rewards/format_reward": 0.9043750107288361,
"step": 1090
},
{
"completion_length": 337.4362560272217,
"epoch": 2.9333333333333336,
"grad_norm": 0.16877064108848572,
"kl": 0.18851318359375,
"learning_rate": 4.515044214485842e-09,
"loss": 0.0075,
"reward": 1.5406250208616257,
"reward_std": 0.4307627685368061,
"rewards/accuracy_reward": 0.630625007674098,
"rewards/format_reward": 0.9100000083446502,
"step": 1100
},
{
"completion_length": 334.74000549316406,
"epoch": 2.96,
"grad_norm": 0.14080122113227844,
"kl": 0.1871826171875,
"learning_rate": 1.6259381707432464e-09,
"loss": 0.0075,
"reward": 1.5362500309944154,
"reward_std": 0.4239814583212137,
"rewards/accuracy_reward": 0.6300000049173832,
"rewards/format_reward": 0.9062500104308129,
"step": 1110
},
{
"completion_length": 341.06625747680664,
"epoch": 2.986666666666667,
"grad_norm": 0.09707628190517426,
"kl": 0.1976806640625,
"learning_rate": 1.8068881642691049e-10,
"loss": 0.0079,
"reward": 1.54687502682209,
"reward_std": 0.46240887157619,
"rewards/accuracy_reward": 0.6443750120699405,
"rewards/format_reward": 0.9025000169873237,
"step": 1120
},
{
"completion_length": 330.91750717163086,
"epoch": 3.0,
"kl": 0.17158203125,
"reward": 1.5462500274181366,
"reward_std": 0.43328754380345347,
"rewards/accuracy_reward": 0.6225000083446502,
"rewards/format_reward": 0.9237500160932541,
"step": 1125,
"total_flos": 0.0,
"train_loss": 0.08514851592410155,
"train_runtime": 97826.7372,
"train_samples_per_second": 0.23,
"train_steps_per_second": 0.011
}
],
"logging_steps": 10,
"max_steps": 1125,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}