Upload checkpoint-500/trainer_state.json with huggingface_hub

Browse files

Files changed (1) hide show

checkpoint-500/trainer_state.json +741 -0

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,741 @@

+{
+  "best_metric": 0.4938061535358429,
+  "best_model_checkpoint": "/home/ray/default/save/checkpoint-500",
+  "epoch": 0.43327556325823224,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.004332755632582322,
+      "grad_norm": 0.10313185304403305,
+      "learning_rate": 8.333333333333333e-07,
+      "loss": 1.0706,
+      "step": 5
+    },
+    {
+      "epoch": 0.008665511265164644,
+      "grad_norm": 0.10810094326734543,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 1.0694,
+      "step": 10
+    },
+    {
+      "epoch": 0.012998266897746967,
+      "grad_norm": 0.09731286019086838,
+      "learning_rate": 2.5e-06,
+      "loss": 1.0706,
+      "step": 15
+    },
+    {
+      "epoch": 0.01733102253032929,
+      "grad_norm": 0.11459868401288986,
+      "learning_rate": 3.3333333333333333e-06,
+      "loss": 1.0772,
+      "step": 20
+    },
+    {
+      "epoch": 0.021663778162911613,
+      "grad_norm": 0.10845116525888443,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 1.0809,
+      "step": 25
+    },
+    {
+      "epoch": 0.025996533795493933,
+      "grad_norm": 0.12091381102800369,
+      "learning_rate": 5e-06,
+      "loss": 1.0737,
+      "step": 30
+    },
+    {
+      "epoch": 0.030329289428076257,
+      "grad_norm": 0.12315661460161209,
+      "learning_rate": 5.833333333333334e-06,
+      "loss": 1.0675,
+      "step": 35
+    },
+    {
+      "epoch": 0.03466204506065858,
+      "grad_norm": 0.13054250180721283,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 1.0638,
+      "step": 40
+    },
+    {
+      "epoch": 0.0389948006932409,
+      "grad_norm": 0.13414187729358673,
+      "learning_rate": 7.5e-06,
+      "loss": 1.0588,
+      "step": 45
+    },
+    {
+      "epoch": 0.043327556325823226,
+      "grad_norm": 0.12992985546588898,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 1.0512,
+      "step": 50
+    },
+    {
+      "epoch": 0.047660311958405546,
+      "grad_norm": 0.11500700563192368,
+      "learning_rate": 9.166666666666666e-06,
+      "loss": 1.0545,
+      "step": 55
+    },
+    {
+      "epoch": 0.05199306759098787,
+      "grad_norm": 0.10958714783191681,
+      "learning_rate": 1e-05,
+      "loss": 1.0357,
+      "step": 60
+    },
+    {
+      "epoch": 0.05632582322357019,
+      "grad_norm": 0.10853663086891174,
+      "learning_rate": 1.0833333333333334e-05,
+      "loss": 1.0268,
+      "step": 65
+    },
+    {
+      "epoch": 0.060658578856152515,
+      "grad_norm": 0.12498235702514648,
+      "learning_rate": 1.1666666666666668e-05,
+      "loss": 1.0135,
+      "step": 70
+    },
+    {
+      "epoch": 0.06499133448873484,
+      "grad_norm": 0.11716682463884354,
+      "learning_rate": 1.25e-05,
+      "loss": 1.0014,
+      "step": 75
+    },
+    {
+      "epoch": 0.06932409012131716,
+      "grad_norm": 0.09530466794967651,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 0.9824,
+      "step": 80
+    },
+    {
+      "epoch": 0.07365684575389948,
+      "grad_norm": 0.09298353642225266,
+      "learning_rate": 1.4166666666666668e-05,
+      "loss": 0.9676,
+      "step": 85
+    },
+    {
+      "epoch": 0.0779896013864818,
+      "grad_norm": 0.09433547407388687,
+      "learning_rate": 1.5e-05,
+      "loss": 0.9463,
+      "step": 90
+    },
+    {
+      "epoch": 0.08232235701906412,
+      "grad_norm": 0.09255563467741013,
+      "learning_rate": 1.5833333333333333e-05,
+      "loss": 0.9341,
+      "step": 95
+    },
+    {
+      "epoch": 0.08665511265164645,
+      "grad_norm": 0.09951213002204895,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 0.9209,
+      "step": 100
+    },
+    {
+      "epoch": 0.09098786828422877,
+      "grad_norm": 0.10951012372970581,
+      "learning_rate": 1.75e-05,
+      "loss": 0.8927,
+      "step": 105
+    },
+    {
+      "epoch": 0.09532062391681109,
+      "grad_norm": 0.110866479575634,
+      "learning_rate": 1.8333333333333333e-05,
+      "loss": 0.8755,
+      "step": 110
+    },
+    {
+      "epoch": 0.09965337954939342,
+      "grad_norm": 0.11648208647966385,
+      "learning_rate": 1.9166666666666667e-05,
+      "loss": 0.8453,
+      "step": 115
+    },
+    {
+      "epoch": 0.10398613518197573,
+      "grad_norm": 0.12016862630844116,
+      "learning_rate": 2e-05,
+      "loss": 0.8081,
+      "step": 120
+    },
+    {
+      "epoch": 0.10831889081455806,
+      "grad_norm": 0.12387488037347794,
+      "learning_rate": 2.0833333333333336e-05,
+      "loss": 0.7784,
+      "step": 125
+    },
+    {
+      "epoch": 0.11265164644714037,
+      "grad_norm": 0.12779255211353302,
+      "learning_rate": 2.1666666666666667e-05,
+      "loss": 0.7353,
+      "step": 130
+    },
+    {
+      "epoch": 0.1169844020797227,
+      "grad_norm": 0.12649372220039368,
+      "learning_rate": 2.25e-05,
+      "loss": 0.7085,
+      "step": 135
+    },
+    {
+      "epoch": 0.12131715771230503,
+      "grad_norm": 0.1445430964231491,
+      "learning_rate": 2.3333333333333336e-05,
+      "loss": 0.6753,
+      "step": 140
+    },
+    {
+      "epoch": 0.12564991334488734,
+      "grad_norm": 0.1329505294561386,
+      "learning_rate": 2.4166666666666667e-05,
+      "loss": 0.6448,
+      "step": 145
+    },
+    {
+      "epoch": 0.12998266897746968,
+      "grad_norm": 0.13544394075870514,
+      "learning_rate": 2.5e-05,
+      "loss": 0.621,
+      "step": 150
+    },
+    {
+      "epoch": 0.134315424610052,
+      "grad_norm": 0.1351090520620346,
+      "learning_rate": 2.5833333333333336e-05,
+      "loss": 0.5997,
+      "step": 155
+    },
+    {
+      "epoch": 0.1386481802426343,
+      "grad_norm": 0.127303346991539,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 0.5791,
+      "step": 160
+    },
+    {
+      "epoch": 0.14298093587521662,
+      "grad_norm": 0.1476767510175705,
+      "learning_rate": 2.7500000000000004e-05,
+      "loss": 0.57,
+      "step": 165
+    },
+    {
+      "epoch": 0.14731369150779897,
+      "grad_norm": 0.13856437802314758,
+      "learning_rate": 2.8333333333333335e-05,
+      "loss": 0.5645,
+      "step": 170
+    },
+    {
+      "epoch": 0.15164644714038128,
+      "grad_norm": 0.1533508449792862,
+      "learning_rate": 2.916666666666667e-05,
+      "loss": 0.5583,
+      "step": 175
+    },
+    {
+      "epoch": 0.1559792027729636,
+      "grad_norm": 0.13325001299381256,
+      "learning_rate": 3e-05,
+      "loss": 0.555,
+      "step": 180
+    },
+    {
+      "epoch": 0.16031195840554593,
+      "grad_norm": 0.13416974246501923,
+      "learning_rate": 3.0833333333333335e-05,
+      "loss": 0.5439,
+      "step": 185
+    },
+    {
+      "epoch": 0.16464471403812825,
+      "grad_norm": 0.1278882771730423,
+      "learning_rate": 3.1666666666666666e-05,
+      "loss": 0.537,
+      "step": 190
+    },
+    {
+      "epoch": 0.16897746967071056,
+      "grad_norm": 0.14047101140022278,
+      "learning_rate": 3.2500000000000004e-05,
+      "loss": 0.5381,
+      "step": 195
+    },
+    {
+      "epoch": 0.1733102253032929,
+      "grad_norm": 0.13340455293655396,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.5344,
+      "step": 200
+    },
+    {
+      "epoch": 0.17764298093587522,
+      "grad_norm": 0.13049094378948212,
+      "learning_rate": 3.4166666666666666e-05,
+      "loss": 0.5291,
+      "step": 205
+    },
+    {
+      "epoch": 0.18197573656845753,
+      "grad_norm": 0.16296444833278656,
+      "learning_rate": 3.5e-05,
+      "loss": 0.5342,
+      "step": 210
+    },
+    {
+      "epoch": 0.18630849220103987,
+      "grad_norm": 0.1682613343000412,
+      "learning_rate": 3.5833333333333335e-05,
+      "loss": 0.5285,
+      "step": 215
+    },
+    {
+      "epoch": 0.19064124783362218,
+      "grad_norm": 0.1439386010169983,
+      "learning_rate": 3.6666666666666666e-05,
+      "loss": 0.5268,
+      "step": 220
+    },
+    {
+      "epoch": 0.1949740034662045,
+      "grad_norm": 0.15248768031597137,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.5252,
+      "step": 225
+    },
+    {
+      "epoch": 0.19930675909878684,
+      "grad_norm": 0.1604214906692505,
+      "learning_rate": 3.8333333333333334e-05,
+      "loss": 0.5201,
+      "step": 230
+    },
+    {
+      "epoch": 0.20363951473136915,
+      "grad_norm": 0.16192543506622314,
+      "learning_rate": 3.9166666666666665e-05,
+      "loss": 0.5222,
+      "step": 235
+    },
+    {
+      "epoch": 0.20797227036395147,
+      "grad_norm": 0.14945088326931,
+      "learning_rate": 4e-05,
+      "loss": 0.5158,
+      "step": 240
+    },
+    {
+      "epoch": 0.2123050259965338,
+      "grad_norm": 0.16204427182674408,
+      "learning_rate": 4.0833333333333334e-05,
+      "loss": 0.5127,
+      "step": 245
+    },
+    {
+      "epoch": 0.21663778162911612,
+      "grad_norm": 0.1618306040763855,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.5181,
+      "step": 250
+    },
+    {
+      "epoch": 0.22097053726169844,
+      "grad_norm": 0.14193885028362274,
+      "learning_rate": 4.25e-05,
+      "loss": 0.5164,
+      "step": 255
+    },
+    {
+      "epoch": 0.22530329289428075,
+      "grad_norm": 0.12552691996097565,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 0.5149,
+      "step": 260
+    },
+    {
+      "epoch": 0.2296360485268631,
+      "grad_norm": 0.14471225440502167,
+      "learning_rate": 4.4166666666666665e-05,
+      "loss": 0.5137,
+      "step": 265
+    },
+    {
+      "epoch": 0.2339688041594454,
+      "grad_norm": 0.13988590240478516,
+      "learning_rate": 4.5e-05,
+      "loss": 0.5066,
+      "step": 270
+    },
+    {
+      "epoch": 0.23830155979202772,
+      "grad_norm": 0.13964875042438507,
+      "learning_rate": 4.5833333333333334e-05,
+      "loss": 0.5116,
+      "step": 275
+    },
+    {
+      "epoch": 0.24263431542461006,
+      "grad_norm": 0.12847208976745605,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.5095,
+      "step": 280
+    },
+    {
+      "epoch": 0.24696707105719237,
+      "grad_norm": 0.13142219185829163,
+      "learning_rate": 4.75e-05,
+      "loss": 0.5077,
+      "step": 285
+    },
+    {
+      "epoch": 0.2512998266897747,
+      "grad_norm": 0.19098567962646484,
+      "learning_rate": 4.8333333333333334e-05,
+      "loss": 0.5099,
+      "step": 290
+    },
+    {
+      "epoch": 0.255632582322357,
+      "grad_norm": 0.1430283784866333,
+      "learning_rate": 4.9166666666666665e-05,
+      "loss": 0.5036,
+      "step": 295
+    },
+    {
+      "epoch": 0.25996533795493937,
+      "grad_norm": 0.12594453990459442,
+      "learning_rate": 5e-05,
+      "loss": 0.5085,
+      "step": 300
+    },
+    {
+      "epoch": 0.26429809358752165,
+      "grad_norm": 0.14377984404563904,
+      "learning_rate": 4.999577115486055e-05,
+      "loss": 0.4978,
+      "step": 305
+    },
+    {
+      "epoch": 0.268630849220104,
+      "grad_norm": 0.12468158453702927,
+      "learning_rate": 4.998308605009268e-05,
+      "loss": 0.5095,
+      "step": 310
+    },
+    {
+      "epoch": 0.2729636048526863,
+      "grad_norm": 0.12854167819023132,
+      "learning_rate": 4.996194897716389e-05,
+      "loss": 0.5037,
+      "step": 315
+    },
+    {
+      "epoch": 0.2772963604852686,
+      "grad_norm": 0.13529527187347412,
+      "learning_rate": 4.993236708690683e-05,
+      "loss": 0.5058,
+      "step": 320
+    },
+    {
+      "epoch": 0.28162911611785096,
+      "grad_norm": 0.1377994418144226,
+      "learning_rate": 4.9894350387100126e-05,
+      "loss": 0.4998,
+      "step": 325
+    },
+    {
+      "epoch": 0.28596187175043325,
+      "grad_norm": 0.14942322671413422,
+      "learning_rate": 4.984791173908267e-05,
+      "loss": 0.5007,
+      "step": 330
+    },
+    {
+      "epoch": 0.2902946273830156,
+      "grad_norm": 0.1366725116968155,
+      "learning_rate": 4.9793066853402536e-05,
+      "loss": 0.5038,
+      "step": 335
+    },
+    {
+      "epoch": 0.29462738301559793,
+      "grad_norm": 0.13133087754249573,
+      "learning_rate": 4.9729834284501995e-05,
+      "loss": 0.5062,
+      "step": 340
+    },
+    {
+      "epoch": 0.2989601386481802,
+      "grad_norm": 0.11725670844316483,
+      "learning_rate": 4.965823542444037e-05,
+      "loss": 0.5025,
+      "step": 345
+    },
+    {
+      "epoch": 0.30329289428076256,
+      "grad_norm": 0.10628046840429306,
+      "learning_rate": 4.9578294495656965e-05,
+      "loss": 0.4999,
+      "step": 350
+    },
+    {
+      "epoch": 0.3076256499133449,
+      "grad_norm": 0.13826170563697815,
+      "learning_rate": 4.949003854277644e-05,
+      "loss": 0.4978,
+      "step": 355
+    },
+    {
+      "epoch": 0.3119584055459272,
+      "grad_norm": 0.1305851936340332,
+      "learning_rate": 4.9393497423459376e-05,
+      "loss": 0.4997,
+      "step": 360
+    },
+    {
+      "epoch": 0.31629116117850953,
+      "grad_norm": 0.11465763300657272,
+      "learning_rate": 4.928870379830124e-05,
+      "loss": 0.5037,
+      "step": 365
+    },
+    {
+      "epoch": 0.32062391681109187,
+      "grad_norm": 0.15975706279277802,
+      "learning_rate": 4.9175693119783013e-05,
+      "loss": 0.4982,
+      "step": 370
+    },
+    {
+      "epoch": 0.32495667244367415,
+      "grad_norm": 0.15360799431800842,
+      "learning_rate": 4.905450362027738e-05,
+      "loss": 0.5013,
+      "step": 375
+    },
+    {
+      "epoch": 0.3292894280762565,
+      "grad_norm": 0.14006198942661285,
+      "learning_rate": 4.8925176299114416e-05,
+      "loss": 0.5008,
+      "step": 380
+    },
+    {
+      "epoch": 0.33362218370883884,
+      "grad_norm": 0.14255651831626892,
+      "learning_rate": 4.878775490871121e-05,
+      "loss": 0.4975,
+      "step": 385
+    },
+    {
+      "epoch": 0.3379549393414211,
+      "grad_norm": 0.12012791633605957,
+      "learning_rate": 4.864228593977006e-05,
+      "loss": 0.5047,
+      "step": 390
+    },
+    {
+      "epoch": 0.34228769497400346,
+      "grad_norm": 0.11433300375938416,
+      "learning_rate": 4.848881860555035e-05,
+      "loss": 0.4986,
+      "step": 395
+    },
+    {
+      "epoch": 0.3466204506065858,
+      "grad_norm": 0.11102011054754257,
+      "learning_rate": 4.832740482521931e-05,
+      "loss": 0.4981,
+      "step": 400
+    },
+    {
+      "epoch": 0.3509532062391681,
+      "grad_norm": 0.12340573221445084,
+      "learning_rate": 4.815809920628738e-05,
+      "loss": 0.4984,
+      "step": 405
+    },
+    {
+      "epoch": 0.35528596187175043,
+      "grad_norm": 0.1148650050163269,
+      "learning_rate": 4.7980959026134044e-05,
+      "loss": 0.4942,
+      "step": 410
+    },
+    {
+      "epoch": 0.3596187175043328,
+      "grad_norm": 0.10840712487697601,
+      "learning_rate": 4.7796044212630486e-05,
+      "loss": 0.4903,
+      "step": 415
+    },
+    {
+      "epoch": 0.36395147313691506,
+      "grad_norm": 0.11093516647815704,
+      "learning_rate": 4.7603417323865547e-05,
+      "loss": 0.4957,
+      "step": 420
+    },
+    {
+      "epoch": 0.3682842287694974,
+      "grad_norm": 0.1237047016620636,
+      "learning_rate": 4.74031435269818e-05,
+      "loss": 0.499,
+      "step": 425
+    },
+    {
+      "epoch": 0.37261698440207974,
+      "grad_norm": 0.11613244563341141,
+      "learning_rate": 4.7195290576129034e-05,
+      "loss": 0.4959,
+      "step": 430
+    },
+    {
+      "epoch": 0.37694974003466203,
+      "grad_norm": 0.10890854150056839,
+      "learning_rate": 4.697992878954255e-05,
+      "loss": 0.4944,
+      "step": 435
+    },
+    {
+      "epoch": 0.38128249566724437,
+      "grad_norm": 0.11364572495222092,
+      "learning_rate": 4.6757131025753886e-05,
+      "loss": 0.4909,
+      "step": 440
+    },
+    {
+      "epoch": 0.3856152512998267,
+      "grad_norm": 0.12619757652282715,
+      "learning_rate": 4.652697265894228e-05,
+      "loss": 0.4966,
+      "step": 445
+    },
+    {
+      "epoch": 0.389948006932409,
+      "grad_norm": 0.11669816076755524,
+      "learning_rate": 4.628953155343499e-05,
+      "loss": 0.4956,
+      "step": 450
+    },
+    {
+      "epoch": 0.39428076256499134,
+      "grad_norm": 0.13808482885360718,
+      "learning_rate": 4.604488803736523e-05,
+      "loss": 0.4973,
+      "step": 455
+    },
+    {
+      "epoch": 0.3986135181975737,
+      "grad_norm": 0.11171045899391174,
+      "learning_rate": 4.579312487549649e-05,
+      "loss": 0.4903,
+      "step": 460
+    },
+    {
+      "epoch": 0.40294627383015597,
+      "grad_norm": 0.11149395257234573,
+      "learning_rate": 4.553432724122265e-05,
+      "loss": 0.4999,
+      "step": 465
+    },
+    {
+      "epoch": 0.4072790294627383,
+      "grad_norm": 0.11308333277702332,
+      "learning_rate": 4.526858268775313e-05,
+      "loss": 0.4967,
+      "step": 470
+    },
+    {
+      "epoch": 0.41161178509532065,
+      "grad_norm": 0.10610105097293854,
+      "learning_rate": 4.499598111849299e-05,
+      "loss": 0.4936,
+      "step": 475
+    },
+    {
+      "epoch": 0.41594454072790293,
+      "grad_norm": 0.11356962472200394,
+      "learning_rate": 4.471661475662792e-05,
+      "loss": 0.493,
+      "step": 480
+    },
+    {
+      "epoch": 0.4202772963604853,
+      "grad_norm": 0.10310888290405273,
+      "learning_rate": 4.443057811392445e-05,
+      "loss": 0.5002,
+      "step": 485
+    },
+    {
+      "epoch": 0.4246100519930676,
+      "grad_norm": 0.11358631402254105,
+      "learning_rate": 4.413796795875586e-05,
+      "loss": 0.4983,
+      "step": 490
+    },
+    {
+      "epoch": 0.4289428076256499,
+      "grad_norm": 0.12575574219226837,
+      "learning_rate": 4.383888328336476e-05,
+      "loss": 0.4949,
+      "step": 495
+    },
+    {
+      "epoch": 0.43327556325823224,
+      "grad_norm": 0.09878399968147278,
+      "learning_rate": 4.3533425270373216e-05,
+      "loss": 0.4953,
+      "step": 500
+    },
+    {
+      "epoch": 0.43327556325823224,
+      "eval_loss": 0.4938061535358429,
+      "eval_runtime": 140.4236,
+      "eval_samples_per_second": 2.685,
+      "eval_steps_per_second": 0.677,
+      "step": 500
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 1154,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.49519346663424e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}