Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +49 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a66fea4bcba025327076ca9f5ee8a267ac131118ec5ca730b04f4929615c2507
 size 682685984

 version https://git-lfs.github.com/spec/v1
+oid sha256:6583e74c37837375327fcaacd69c67c712ada8ad0efcd509e528856192351a54
 size 682685984

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e11daccb80be64059fa6f8deda39d52513e335cd5e783f5c300f766180d85c7
-size 85498196

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ed235b993bd669749a971786bdd12b58648790866785c224e3eb01907ae32aa
+size 85498452

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2523622a2a710f7a29831a1a340fff695b20888bc77e8e3a7c514604e9fe6db9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e4d00ced72af3d730b15bfd9917d0bd6039eca950234f6bcb7e16c6ad08abfc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fce54f61503edf00b56f4cd65020ab3a18bbd43c84561899444870908675e793
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:74485e67705dc36efbfb69b1e54f842e1ff07894d01bb0e36d6d2526a318b300
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1802804470062256,
-  "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 0.009180966021244755,
   "eval_steps": 50,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -230,6 +230,49 @@
       "eval_samples_per_second": 27.164,
       "eval_steps_per_second": 6.791,
       "step": 250
     }
   ],
   "logging_steps": 10,
@@ -253,12 +296,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.3292886929965056e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.1761000156402588,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.011017159225493706,
   "eval_steps": 50,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.164,
       "eval_steps_per_second": 6.791,
       "step": 250
+    },
+    {
+      "epoch": 0.009548204662094545,
+      "grad_norm": 1.5391769409179688,
+      "learning_rate": 9.242458032904311e-06,
+      "loss": 4.9689,
+      "step": 260
+    },
+    {
+      "epoch": 0.009915443302944336,
+      "grad_norm": 2.42850923538208,
+      "learning_rate": 5.2346828817197655e-06,
+      "loss": 4.883,
+      "step": 270
+    },
+    {
+      "epoch": 0.010282681943794126,
+      "grad_norm": 3.6636316776275635,
+      "learning_rate": 2.3379444289913342e-06,
+      "loss": 4.6504,
+      "step": 280
+    },
+    {
+      "epoch": 0.010649920584643917,
+      "grad_norm": 4.5757622718811035,
+      "learning_rate": 5.862042845640403e-07,
+      "loss": 4.441,
+      "step": 290
+    },
+    {
+      "epoch": 0.011017159225493706,
+      "grad_norm": 13.752517700195312,
+      "learning_rate": 0.0,
+      "loss": 4.7538,
+      "step": 300
+    },
+    {
+      "epoch": 0.011017159225493706,
+      "eval_loss": 1.1761000156402588,
+      "eval_runtime": 1687.4489,
+      "eval_samples_per_second": 27.178,
+      "eval_steps_per_second": 6.795,
+      "step": 300
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5962035200196608e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null