Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0acaa248da9ecf4f3400cf0ab8373e068649f0a3d6311528adaa61ce0e1939c5
 size 35237104

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0897f39ea0e3eb8a9e882cad77c695e5cbe6a50894a0f0dc5ed77df38cd3957
 size 35237104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e72e151522b248e570fcca5a7eebba74535e7ca1458e04f4e75ff45ef409826
 size 70667778

 version https://git-lfs.github.com/spec/v1
+oid sha256:3666d9170fab37ecdf01056958b33af72ddbb19d69f38079119971855b06ebc4
 size 70667778

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49d3c33fbfc765bc68a1f87d31b9c2d50aee569f598fa846016491f9ffae80b2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f63a4b816865cad9281b4f6678544e1f8e8e8040887cfbdc9e0c12b5a9898086
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d4ae3842d5f151f968dca77e1398d806e7ccbbb71022d80233230f6c7d83bcb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9567f2f3182e832808fd621212e0dd5e8f1a88bd24ddda3ea0d289496073738c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.3278440237045288,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.0004227990140326993,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 13.793,
       "eval_steps_per_second": 6.897,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1802385752064000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.3278440237045288,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.0008455980280653986,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.793,
       "eval_steps_per_second": 6.897,
       "step": 25
+    },
+    {
+      "epoch": 0.0004397109745940072,
+      "grad_norm": 1.2110459804534912,
+      "learning_rate": 0.00017684011108568592,
+      "loss": 0.895,
+      "step": 26
+    },
+    {
+      "epoch": 0.0004566229351553152,
+      "grad_norm": 1.1382213830947876,
+      "learning_rate": 0.0001746821476984154,
+      "loss": 0.9614,
+      "step": 27
+    },
+    {
+      "epoch": 0.00047353489571662317,
+      "grad_norm": 1.0368095636367798,
+      "learning_rate": 0.00017244252047910892,
+      "loss": 0.8552,
+      "step": 28
+    },
+    {
+      "epoch": 0.0004904468562779311,
+      "grad_norm": 1.323333740234375,
+      "learning_rate": 0.00017012367842724887,
+      "loss": 1.0692,
+      "step": 29
+    },
+    {
+      "epoch": 0.0005073588168392391,
+      "grad_norm": 1.1299031972885132,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 1.0273,
+      "step": 30
+    },
+    {
+      "epoch": 0.0005242707774005471,
+      "grad_norm": 0.9993110299110413,
+      "learning_rate": 0.00016525857615241687,
+      "loss": 0.7862,
+      "step": 31
+    },
+    {
+      "epoch": 0.0005411827379618551,
+      "grad_norm": 0.9180406332015991,
+      "learning_rate": 0.0001627176358473537,
+      "loss": 0.8503,
+      "step": 32
+    },
+    {
+      "epoch": 0.000558094698523163,
+      "grad_norm": 0.852716863155365,
+      "learning_rate": 0.00016010811472830252,
+      "loss": 0.7589,
+      "step": 33
+    },
+    {
+      "epoch": 0.000575006659084471,
+      "grad_norm": 1.1683015823364258,
+      "learning_rate": 0.00015743286626829437,
+      "loss": 0.9032,
+      "step": 34
+    },
+    {
+      "epoch": 0.000591918619645779,
+      "grad_norm": 0.8834861516952515,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 0.8227,
+      "step": 35
+    },
+    {
+      "epoch": 0.000608830580207087,
+      "grad_norm": 0.788383424282074,
+      "learning_rate": 0.00015189695737812152,
+      "loss": 0.8611,
+      "step": 36
+    },
+    {
+      "epoch": 0.0006257425407683949,
+      "grad_norm": 0.7765706777572632,
+      "learning_rate": 0.00014904235038305083,
+      "loss": 0.8839,
+      "step": 37
+    },
+    {
+      "epoch": 0.0006426545013297029,
+      "grad_norm": 0.8954746127128601,
+      "learning_rate": 0.0001461341162978688,
+      "loss": 0.7209,
+      "step": 38
+    },
+    {
+      "epoch": 0.0006595664618910109,
+      "grad_norm": 0.6842917799949646,
+      "learning_rate": 0.00014317543523384928,
+      "loss": 0.644,
+      "step": 39
+    },
+    {
+      "epoch": 0.0006764784224523189,
+      "grad_norm": 0.8862132430076599,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 0.7262,
+      "step": 40
+    },
+    {
+      "epoch": 0.0006933903830136268,
+      "grad_norm": 0.8410934805870056,
+      "learning_rate": 0.00013711972489182208,
+      "loss": 0.7245,
+      "step": 41
+    },
+    {
+      "epoch": 0.0007103023435749348,
+      "grad_norm": 1.1996475458145142,
+      "learning_rate": 0.00013402931744416433,
+      "loss": 0.8627,
+      "step": 42
+    },
+    {
+      "epoch": 0.0007272143041362428,
+      "grad_norm": 1.1021193265914917,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 0.887,
+      "step": 43
+    },
+    {
+      "epoch": 0.0007441262646975507,
+      "grad_norm": 1.623423457145691,
+      "learning_rate": 0.00012774029087618446,
+      "loss": 0.9683,
+      "step": 44
+    },
+    {
+      "epoch": 0.0007610382252588587,
+      "grad_norm": 0.932873547077179,
+      "learning_rate": 0.00012454854871407994,
+      "loss": 0.882,
+      "step": 45
+    },
+    {
+      "epoch": 0.0007779501858201667,
+      "grad_norm": 0.9381756782531738,
+      "learning_rate": 0.0001213299630743747,
+      "loss": 0.8824,
+      "step": 46
+    },
+    {
+      "epoch": 0.0007948621463814747,
+      "grad_norm": 1.2354497909545898,
+      "learning_rate": 0.000118088053433211,
+      "loss": 1.093,
+      "step": 47
+    },
+    {
+      "epoch": 0.0008117741069427826,
+      "grad_norm": 1.0774998664855957,
+      "learning_rate": 0.0001148263647711842,
+      "loss": 1.032,
+      "step": 48
+    },
+    {
+      "epoch": 0.0008286860675040906,
+      "grad_norm": 0.8946607708930969,
+      "learning_rate": 0.00011154846369695863,
+      "loss": 0.8654,
+      "step": 49
+    },
+    {
+      "epoch": 0.0008455980280653986,
+      "grad_norm": 0.8054470419883728,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 0.8106,
+      "step": 50
+    },
+    {
+      "epoch": 0.0008455980280653986,
+      "eval_loss": 1.3436920642852783,
+      "eval_runtime": 1805.0607,
+      "eval_samples_per_second": 13.793,
+      "eval_steps_per_second": 6.897,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3604771504128000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null