Training in progress, step 44, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +137 -4

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:889db52d9a5845b10e25d23713a7009d72e24037860dcf1c949fbdeedfd6fb49
 size 646253418

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4b72a539604afffd088d99f4fd15e7fd0d7b4cdaac836ea289b243b76e2515c
 size 646253418

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47ff946a20f3e088769fe271841e2b5171bea513a2e9cd0db81480f3bc758197
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b68429e2505444bb7753f39d0cbd8bd98ad6ebd2e4b2f4134274fe93172a2556
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44ac0b40f9801ec51d3c97ea102e9b894342f2dfa8a9e7e5809067d1ac7ed815
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fe6216799d6e32c5f109f91472b5d66989d1f4fc496fc9b37d3a34b160d3016
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f509d4753359aefade564e4dd8336c9bf4aff615dd9d58aec9b91ff7ca8d9be
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:416dc2c131c048603c9169cefcebb6347b5415566511ee5328541e65f2fcb48e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d612488b8a24de5b3e96eef621d1935401b5138edad359a1690bec2a487d87f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a729de41cbc46d525dce163f24bfe8abd7ffa84fda0f2b523cf2b13f131f143d
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1229ee494750fdca01199e7b17f6407ce48ff55604d2cd9d7a7582776cffc192
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:eba890d5c37511d983154de0f89f465e597822376430ed595af6f919e721806d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.7319148936170212,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,139 @@
       "eval_samples_per_second": 29.622,
       "eval_steps_per_second": 7.702,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +354,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.812028905324544e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 3.0851063829787235,
   "eval_steps": 25,
+  "global_step": 44,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 29.622,
       "eval_steps_per_second": 7.702,
       "step": 25
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": NaN,
+      "learning_rate": 4.498655797196586e-05,
+      "loss": 0.0,
+      "step": 26
+    },
+    {
+      "epoch": 1.8680851063829786,
+      "grad_norm": NaN,
+      "learning_rate": 4.173601715150931e-05,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 1.9361702127659575,
+      "grad_norm": NaN,
+      "learning_rate": 3.8559653903512224e-05,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 2.0340425531914894,
+      "grad_norm": NaN,
+      "learning_rate": 3.547523173970989e-05,
+      "loss": 0.0,
+      "step": 29
+    },
+    {
+      "epoch": 2.1021276595744682,
+      "grad_norm": NaN,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 2.1702127659574466,
+      "grad_norm": NaN,
+      "learning_rate": 2.9650597387137014e-05,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 2.2382978723404254,
+      "grad_norm": NaN,
+      "learning_rate": 2.6942958916356998e-05,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 2.3063829787234043,
+      "grad_norm": NaN,
+      "learning_rate": 2.4392226800308622e-05,
+      "loss": 0.0,
+      "step": 33
+    },
+    {
+      "epoch": 2.374468085106383,
+      "grad_norm": NaN,
+      "learning_rate": 2.2012665767657826e-05,
+      "loss": 0.0,
+      "step": 34
+    },
+    {
+      "epoch": 2.4425531914893615,
+      "grad_norm": NaN,
+      "learning_rate": 1.981758328893866e-05,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 2.5106382978723403,
+      "grad_norm": NaN,
+      "learning_rate": 1.781925515578024e-05,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 2.578723404255319,
+      "grad_norm": NaN,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 2.646808510638298,
+      "grad_norm": NaN,
+      "learning_rate": 1.4456400944391146e-05,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 2.7148936170212767,
+      "grad_norm": NaN,
+      "learning_rate": 1.3110681311010814e-05,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 2.7829787234042556,
+      "grad_norm": NaN,
+      "learning_rate": 1.1999223739623667e-05,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 2.851063829787234,
+      "grad_norm": NaN,
+      "learning_rate": 1.1128243951817937e-05,
+      "loss": 0.0,
+      "step": 41
+    },
+    {
+      "epoch": 2.9191489361702128,
+      "grad_norm": NaN,
+      "learning_rate": 1.0502612819869217e-05,
+      "loss": 0.0,
+      "step": 42
+    },
+    {
+      "epoch": 3.0170212765957447,
+      "grad_norm": NaN,
+      "learning_rate": 1.0125829126846895e-05,
+      "loss": 0.0,
+      "step": 43
+    },
+    {
+      "epoch": 3.0851063829787235,
+      "grad_norm": NaN,
+      "learning_rate": 1e-05,
+      "loss": 0.0,
+      "step": 44
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.9491708733711974e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null