Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8cfe5db208f6c477e2e0635b1f6a133dfa0bd72cedf08e35035b438e65672ba
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:32f74c466531b29224b51d876788b2ce59723ee6edf626d6489869e061013f3e
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6dc9843ea8a76c71db403d9129ac58d827e7a67c65a13da18bb87b0a19a2d501
 size 71878996

 version https://git-lfs.github.com/spec/v1
+oid sha256:252f405adc974fa23532e591518c41a5363f54089aa9dc3a0de1b5b9cefe0749
 size 71878996

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06cf6dbd5af8627d5bc86f0a43500500a4a729e4e2a276cf06055395a2aab116
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:81982bcbe911082158e6e948fd98dec9c0da0e8967eb7157b6d6fae6dd0c682e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.565327525138855,
-  "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.04033303563711792,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 26.771,
       "eval_steps_per_second": 6.693,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2544,12 +2902,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.345339247276851e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5639106631278992,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.046094897870991906,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.771,
       "eval_steps_per_second": 6.693,
       "step": 350
+    },
+    {
+      "epoch": 0.040448272881795394,
+      "grad_norm": 0.40898263454437256,
+      "learning_rate": 3.844650207332562e-06,
+      "loss": 0.5105,
+      "step": 351
+    },
+    {
+      "epoch": 0.04056351012647288,
+      "grad_norm": 0.505041241645813,
+      "learning_rate": 3.691267552111183e-06,
+      "loss": 0.5799,
+      "step": 352
+    },
+    {
+      "epoch": 0.040678747371150355,
+      "grad_norm": 0.567950963973999,
+      "learning_rate": 3.54088980417534e-06,
+      "loss": 0.5532,
+      "step": 353
+    },
+    {
+      "epoch": 0.04079398461582784,
+      "grad_norm": 0.5739919543266296,
+      "learning_rate": 3.393526721321616e-06,
+      "loss": 0.6807,
+      "step": 354
+    },
+    {
+      "epoch": 0.040909221860505315,
+      "grad_norm": 0.6619653701782227,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 0.6146,
+      "step": 355
+    },
+    {
+      "epoch": 0.04102445910518279,
+      "grad_norm": 1.0355381965637207,
+      "learning_rate": 3.1078826033397843e-06,
+      "loss": 0.5103,
+      "step": 356
+    },
+    {
+      "epoch": 0.041139696349860276,
+      "grad_norm": 0.9396152496337891,
+      "learning_rate": 2.9696201032491434e-06,
+      "loss": 0.5792,
+      "step": 357
+    },
+    {
+      "epoch": 0.04125493359453775,
+      "grad_norm": 0.6551850438117981,
+      "learning_rate": 2.8344093371128424e-06,
+      "loss": 0.6257,
+      "step": 358
+    },
+    {
+      "epoch": 0.04137017083921524,
+      "grad_norm": 0.7442685961723328,
+      "learning_rate": 2.70225907856374e-06,
+      "loss": 0.6958,
+      "step": 359
+    },
+    {
+      "epoch": 0.041485408083892714,
+      "grad_norm": 0.8106182813644409,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 0.6433,
+      "step": 360
+    },
+    {
+      "epoch": 0.04160064532857019,
+      "grad_norm": 0.8108749389648438,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.5951,
+      "step": 361
+    },
+    {
+      "epoch": 0.041715882573247674,
+      "grad_norm": 0.7107764482498169,
+      "learning_rate": 2.324256102563188e-06,
+      "loss": 0.5789,
+      "step": 362
+    },
+    {
+      "epoch": 0.04183111981792515,
+      "grad_norm": 0.8077031373977661,
+      "learning_rate": 2.204431630583548e-06,
+      "loss": 0.6543,
+      "step": 363
+    },
+    {
+      "epoch": 0.041946357062602635,
+      "grad_norm": 0.7621222734451294,
+      "learning_rate": 2.087708544541689e-06,
+      "loss": 0.608,
+      "step": 364
+    },
+    {
+      "epoch": 0.04206159430728011,
+      "grad_norm": 0.851012110710144,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 0.6374,
+      "step": 365
+    },
+    {
+      "epoch": 0.042176831551957596,
+      "grad_norm": 0.8776100873947144,
+      "learning_rate": 1.8635966245104664e-06,
+      "loss": 0.52,
+      "step": 366
+    },
+    {
+      "epoch": 0.04229206879663507,
+      "grad_norm": 0.9081662893295288,
+      "learning_rate": 1.7562223328224325e-06,
+      "loss": 0.6261,
+      "step": 367
+    },
+    {
+      "epoch": 0.04240730604131255,
+      "grad_norm": 1.609647512435913,
+      "learning_rate": 1.6519785107311891e-06,
+      "loss": 0.6758,
+      "step": 368
+    },
+    {
+      "epoch": 0.04252254328599003,
+      "grad_norm": 0.8990199565887451,
+      "learning_rate": 1.5508719224689717e-06,
+      "loss": 0.6053,
+      "step": 369
+    },
+    {
+      "epoch": 0.04263778053066751,
+      "grad_norm": 0.9323314428329468,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 0.5667,
+      "step": 370
+    },
+    {
+      "epoch": 0.042753017775344994,
+      "grad_norm": 0.945374071598053,
+      "learning_rate": 1.358096486081778e-06,
+      "loss": 0.6181,
+      "step": 371
+    },
+    {
+      "epoch": 0.04286825502002247,
+      "grad_norm": 1.0014368295669556,
+      "learning_rate": 1.2664401468786114e-06,
+      "loss": 0.6029,
+      "step": 372
+    },
+    {
+      "epoch": 0.04298349226469995,
+      "grad_norm": 1.0264681577682495,
+      "learning_rate": 1.1779460585363944e-06,
+      "loss": 0.6635,
+      "step": 373
+    },
+    {
+      "epoch": 0.04309872950937743,
+      "grad_norm": 1.105810523033142,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.573,
+      "step": 374
+    },
+    {
+      "epoch": 0.04321396675405491,
+      "grad_norm": 1.128774881362915,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 0.7388,
+      "step": 375
+    },
+    {
+      "epoch": 0.04332920399873239,
+      "grad_norm": 1.3867303133010864,
+      "learning_rate": 9.314936930293283e-07,
+      "loss": 0.6529,
+      "step": 376
+    },
+    {
+      "epoch": 0.04344444124340987,
+      "grad_norm": 1.161137342453003,
+      "learning_rate": 8.557039732283944e-07,
+      "loss": 0.6191,
+      "step": 377
+    },
+    {
+      "epoch": 0.04355967848808735,
+      "grad_norm": 1.2012664079666138,
+      "learning_rate": 7.83103156370113e-07,
+      "loss": 0.5442,
+      "step": 378
+    },
+    {
+      "epoch": 0.04367491573276483,
+      "grad_norm": 1.1599990129470825,
+      "learning_rate": 7.136959534174592e-07,
+      "loss": 0.54,
+      "step": 379
+    },
+    {
+      "epoch": 0.043790152977442307,
+      "grad_norm": 0.8282076120376587,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 0.3925,
+      "step": 380
+    },
+    {
+      "epoch": 0.04390539022211979,
+      "grad_norm": 0.9517076015472412,
+      "learning_rate": 5.844801966434832e-07,
+      "loss": 0.47,
+      "step": 381
+    },
+    {
+      "epoch": 0.04402062746679727,
+      "grad_norm": 0.8718116879463196,
+      "learning_rate": 5.246800274474439e-07,
+      "loss": 0.5393,
+      "step": 382
+    },
+    {
+      "epoch": 0.04413586471147475,
+      "grad_norm": 0.9473959803581238,
+      "learning_rate": 4.680902408635335e-07,
+      "loss": 0.5456,
+      "step": 383
+    },
+    {
+      "epoch": 0.04425110195615223,
+      "grad_norm": 0.9435731172561646,
+      "learning_rate": 4.1471450892189846e-07,
+      "loss": 0.535,
+      "step": 384
+    },
+    {
+      "epoch": 0.044366339200829705,
+      "grad_norm": 0.8984076380729675,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 0.4925,
+      "step": 385
+    },
+    {
+      "epoch": 0.04448157644550719,
+      "grad_norm": 0.9920340180397034,
+      "learning_rate": 3.1761885408435054e-07,
+      "loss": 0.5074,
+      "step": 386
+    },
+    {
+      "epoch": 0.044596813690184665,
+      "grad_norm": 1.0126951932907104,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.4881,
+      "step": 387
+    },
+    {
+      "epoch": 0.04471205093486215,
+      "grad_norm": 1.2216013669967651,
+      "learning_rate": 2.334182641175686e-07,
+      "loss": 0.602,
+      "step": 388
+    },
+    {
+      "epoch": 0.044827288179539626,
+      "grad_norm": 0.9132040739059448,
+      "learning_rate": 1.9616057881935436e-07,
+      "loss": 0.5279,
+      "step": 389
+    },
+    {
+      "epoch": 0.04494252542421711,
+      "grad_norm": 1.1324865818023682,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 0.6967,
+      "step": 390
+    },
+    {
+      "epoch": 0.04505776266889459,
+      "grad_norm": 1.058250069618225,
+      "learning_rate": 1.3134251542544774e-07,
+      "loss": 0.5102,
+      "step": 391
+    },
+    {
+      "epoch": 0.045172999913572064,
+      "grad_norm": 1.089077353477478,
+      "learning_rate": 1.0378634328099269e-07,
+      "loss": 0.5119,
+      "step": 392
+    },
+    {
+      "epoch": 0.04528823715824955,
+      "grad_norm": 1.0287598371505737,
+      "learning_rate": 7.946786493666647e-08,
+      "loss": 0.4487,
+      "step": 393
+    },
+    {
+      "epoch": 0.045403474402927024,
+      "grad_norm": 1.0808227062225342,
+      "learning_rate": 5.838865838366792e-08,
+      "loss": 0.4266,
+      "step": 394
+    },
+    {
+      "epoch": 0.04551871164760451,
+      "grad_norm": 1.3644070625305176,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 0.517,
+      "step": 395
+    },
+    {
+      "epoch": 0.045633948892281985,
+      "grad_norm": 1.2962229251861572,
+      "learning_rate": 2.595332156925534e-08,
+      "loss": 0.5079,
+      "step": 396
+    },
+    {
+      "epoch": 0.04574918613695946,
+      "grad_norm": 1.2190864086151123,
+      "learning_rate": 1.4599295990352924e-08,
+      "loss": 0.4182,
+      "step": 397
+    },
+    {
+      "epoch": 0.045864423381636946,
+      "grad_norm": 1.3271971940994263,
+      "learning_rate": 6.488751431266149e-09,
+      "loss": 0.4546,
+      "step": 398
+    },
+    {
+      "epoch": 0.04597966062631442,
+      "grad_norm": 1.3501412868499756,
+      "learning_rate": 1.622214173602199e-09,
+      "loss": 0.4312,
+      "step": 399
+    },
+    {
+      "epoch": 0.046094897870991906,
+      "grad_norm": 1.38814115524292,
+      "learning_rate": 0.0,
+      "loss": 0.352,
+      "step": 400
+    },
+    {
+      "epoch": 0.046094897870991906,
+      "eval_loss": 0.5639106631278992,
+      "eval_runtime": 544.4924,
+      "eval_samples_per_second": 26.842,
+      "eval_steps_per_second": 6.711,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.961685721212518e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null