Training in progress, step 166, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9cfa661c54339a670d279a82539151fa79137d5093e0b025188d9e50254cec5
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:2727cf1162bcdc2ea431dcc02d9c762ca7acfebaebb93304ffda64edf7ffe3ec
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4ca9111e3ea091b87e489018a9e97525952d3864a098a50a52e754f3f22c360
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa5547d1958277542f79ab9a0f0261b7802aa5ff9e2e55c2d97095651e68148c
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e7c49e05b21e3653355619157ebf34445c75bc033c13bc3bd09fcb19f06f8d1
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a837812d656bbd27708f52fd6b43ec1c9f0520c4b827efa1781caaf5242150d
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:211ff8c0d5e10554f2ceb347d0f6acd4f3acf6ba9002d45319f7c3abcda93013
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:18e4a513de2adf3f7e2c1ac68c21245a28b4c2292ee16f53025ec71806c9bb44
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2a4d4c9298a700cb8145e9be723fe53addaa36487b10ce757075d75d235e7dc
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf63c606aaa799919b498936a8161d20f282cdca30eb1326c2e12fad8e2ae60d
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7236385d7d483948c57cf28bf8bd5f038c9333fa029cab83bf7bdfc42e8b4ab
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3209aba9b70f7586de5581283e2884742c6c9595707339fe0f46d0ca4b014b83
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a51a61d26673f45740087c7c6bc461057f2353ff1696dc358d96750878ca6351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fec1e40c6dba74f1038e9d765519a0d1df17ea9f1c2a8daabaf19c4ba3779056
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.40452006459236145,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.24324735215121876,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,118 @@
       "eval_samples_per_second": 32.208,
       "eval_steps_per_second": 8.374,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1248,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.580912017919181e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.40452006459236145,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.2691937363806821,
   "eval_steps": 25,
+  "global_step": 166,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 32.208,
       "eval_steps_per_second": 8.374,
       "step": 150
+    },
+    {
+      "epoch": 0.24486900116556023,
+      "grad_norm": 0.14199906587600708,
+      "learning_rate": 1.1986684236740763e-05,
+      "loss": 0.4249,
+      "step": 151
+    },
+    {
+      "epoch": 0.24649065017990168,
+      "grad_norm": 0.16196103394031525,
+      "learning_rate": 1.1732279305266082e-05,
+      "loss": 0.4533,
+      "step": 152
+    },
+    {
+      "epoch": 0.24811229919424316,
+      "grad_norm": 0.1596025824546814,
+      "learning_rate": 1.1494979854388329e-05,
+      "loss": 0.41,
+      "step": 153
+    },
+    {
+      "epoch": 0.2497339482085846,
+      "grad_norm": 0.16512756049633026,
+      "learning_rate": 1.1274879698181547e-05,
+      "loss": 0.4556,
+      "step": 154
+    },
+    {
+      "epoch": 0.25135559722292605,
+      "grad_norm": 0.15834008157253265,
+      "learning_rate": 1.1072065851142718e-05,
+      "loss": 0.4458,
+      "step": 155
+    },
+    {
+      "epoch": 0.25297724623726753,
+      "grad_norm": 0.1588006466627121,
+      "learning_rate": 1.0886618493791376e-05,
+      "loss": 0.431,
+      "step": 156
+    },
+    {
+      "epoch": 0.254598895251609,
+      "grad_norm": 0.15690577030181885,
+      "learning_rate": 1.0718610940971071e-05,
+      "loss": 0.417,
+      "step": 157
+    },
+    {
+      "epoch": 0.2562205442659504,
+      "grad_norm": 0.16939620673656464,
+      "learning_rate": 1.0568109612865e-05,
+      "loss": 0.4517,
+      "step": 158
+    },
+    {
+      "epoch": 0.2578421932802919,
+      "grad_norm": 0.16963833570480347,
+      "learning_rate": 1.0435174008737416e-05,
+      "loss": 0.4638,
+      "step": 159
+    },
+    {
+      "epoch": 0.2594638422946334,
+      "grad_norm": 0.18084716796875,
+      "learning_rate": 1.0319856683411197e-05,
+      "loss": 0.4172,
+      "step": 160
+    },
+    {
+      "epoch": 0.2610854913089748,
+      "grad_norm": 0.20121614634990692,
+      "learning_rate": 1.0222203226490767e-05,
+      "loss": 0.4177,
+      "step": 161
+    },
+    {
+      "epoch": 0.26270714032331627,
+      "grad_norm": 0.21996457874774933,
+      "learning_rate": 1.0142252244338688e-05,
+      "loss": 0.4157,
+      "step": 162
+    },
+    {
+      "epoch": 0.26432878933765774,
+      "grad_norm": 0.1590254306793213,
+      "learning_rate": 1.0080035344813017e-05,
+      "loss": 0.4034,
+      "step": 163
+    },
+    {
+      "epoch": 0.26595043835199916,
+      "grad_norm": 0.13573700189590454,
+      "learning_rate": 1.0035577124771419e-05,
+      "loss": 0.4021,
+      "step": 164
+    },
+    {
+      "epoch": 0.26757208736634064,
+      "grad_norm": 0.15125057101249695,
+      "learning_rate": 1.0008895160347052e-05,
+      "loss": 0.4395,
+      "step": 165
+    },
+    {
+      "epoch": 0.2691937363806821,
+      "grad_norm": 0.16019335389137268,
+      "learning_rate": 1e-05,
+      "loss": 0.4371,
+      "step": 166
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.962875966497227e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null