Training in progress, step 166, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:331192ac1f6bf314815e5b10c88737996486d390146d8e785619ad7dc5ff79e5
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:f794e5c83f0b343f9e3a3ce74fafedfaa9b179bd2557cf18c4129319f11b3194
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:631686b4569ed6de8de2e33d5b47bc87418f2cf1a3133003d646c996e7dfb4d8
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:8167e5b1e056ab3bad805ec040e7b1258185d6aef7a734e363c7279f173e174a
 size 335922386

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:044ff09e8958661e9d77f3530a25f53b93580ac58f0ab4d62554f82f49c61f99
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:30af804811173e42e95fdca064724f01624f4a7180f01f32ec76aecfeaf944fa
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91621a6713336b0ef6a2d021e0a0ad7048da717f4aa80b55d1dfc17fdad2901e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3751d7c90b6106e2e2b9533ec2331aad6e66c18cc58e059c9af17b2f73c9d0de
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a8149c42bf13992ca336b5c00aa8ceaa16f084d366bdf1be14868e30d2b8361
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:376e62010036f3bdd3a656eef389aed66bdbaf88a636951248707427be62583b
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d8fe7a2cad1ff69a24b74a2c491e671d4a204c81bb46fd633038e8544921048
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5376e5b027ed022c853717be24ac05fc92b2b9af8b60781eed3e4e4ee84ebe9
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3ae2800eef0245add9e0e1be7ec7a57e0cd3c41c460c9546ae4fa2e57287fa9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fec1e40c6dba74f1038e9d765519a0d1df17ea9f1c2a8daabaf19c4ba3779056
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.8791134357452393,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.07675517497781295,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,118 @@
       "eval_samples_per_second": 32.565,
       "eval_steps_per_second": 8.467,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1248,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.563655565173326e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.8791134357452393,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.084942393642113,
   "eval_steps": 25,
+  "global_step": 166,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 32.565,
       "eval_steps_per_second": 8.467,
       "step": 150
+    },
+    {
+      "epoch": 0.07726687614433171,
+      "grad_norm": 0.33062976598739624,
+      "learning_rate": 1.1937684892050604e-05,
+      "loss": 1.3588,
+      "step": 151
+    },
+    {
+      "epoch": 0.07777857731085046,
+      "grad_norm": 0.5277450084686279,
+      "learning_rate": 1.168951435958588e-05,
+      "loss": 1.7291,
+      "step": 152
+    },
+    {
+      "epoch": 0.07829027847736922,
+      "grad_norm": 0.5764089226722717,
+      "learning_rate": 1.1458040843788312e-05,
+      "loss": 1.8712,
+      "step": 153
+    },
+    {
+      "epoch": 0.07880197964388796,
+      "grad_norm": 0.6534531712532043,
+      "learning_rate": 1.1243353582104556e-05,
+      "loss": 1.9575,
+      "step": 154
+    },
+    {
+      "epoch": 0.07931368081040673,
+      "grad_norm": 0.7147605419158936,
+      "learning_rate": 1.1045535340560744e-05,
+      "loss": 1.9211,
+      "step": 155
+    },
+    {
+      "epoch": 0.07982538197692547,
+      "grad_norm": 0.7351657748222351,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 1.9539,
+      "step": 156
+    },
+    {
+      "epoch": 0.08033708314344423,
+      "grad_norm": 0.8152849674224854,
+      "learning_rate": 1.070080443595488e-05,
+      "loss": 2.0232,
+      "step": 157
+    },
+    {
+      "epoch": 0.08084878430996298,
+      "grad_norm": 0.9168938398361206,
+      "learning_rate": 1.0554024673218807e-05,
+      "loss": 1.9743,
+      "step": 158
+    },
+    {
+      "epoch": 0.08136048547648174,
+      "grad_norm": 1.0510733127593994,
+      "learning_rate": 1.0424379680039025e-05,
+      "loss": 2.0778,
+      "step": 159
+    },
+    {
+      "epoch": 0.08187218664300049,
+      "grad_norm": 1.2245820760726929,
+      "learning_rate": 1.0311919437028318e-05,
+      "loss": 2.2151,
+      "step": 160
+    },
+    {
+      "epoch": 0.08238388780951925,
+      "grad_norm": 1.3239753246307373,
+      "learning_rate": 1.0216687299751144e-05,
+      "loss": 2.2195,
+      "step": 161
+    },
+    {
+      "epoch": 0.08289558897603799,
+      "grad_norm": 1.7548948526382446,
+      "learning_rate": 1.0138719982009242e-05,
+      "loss": 2.5937,
+      "step": 162
+    },
+    {
+      "epoch": 0.08340729014255675,
+      "grad_norm": 0.4118451774120331,
+      "learning_rate": 1.007804754168779e-05,
+      "loss": 1.4343,
+      "step": 163
+    },
+    {
+      "epoch": 0.0839189913090755,
+      "grad_norm": 0.5209285616874695,
+      "learning_rate": 1.003469336916747e-05,
+      "loss": 1.7331,
+      "step": 164
+    },
+    {
+      "epoch": 0.08443069247559425,
+      "grad_norm": 0.5564490556716919,
+      "learning_rate": 1.0008674178307085e-05,
+      "loss": 1.8639,
+      "step": 165
+    },
+    {
+      "epoch": 0.084942393642113,
+      "grad_norm": 0.6351719498634338,
+      "learning_rate": 1e-05,
+      "loss": 1.9287,
+      "step": 166
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.944151006450811e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null