Training in progress, step 50, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf2b4f5eff67f45410237416c4cccc74bd41ca5a511e3d9ac899173a97cd6f56
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:356fdfa90c57716429d130041eda4cdec16f1db70c2f2dee857e5f3f1377335f
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f79429790de6bada6cadc827665b017692fc33daee136383414f5a43400f49c
 size 81730196

 version https://git-lfs.github.com/spec/v1
+oid sha256:212a99aec8af69127026e90e89cdaf8292af1e82c2b8ee188c2885a7e8db4b41
 size 81730196

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc0d6a68d922e782698a018376b53b85027f73e542c47378e687b580096f86bd
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:515ed9c683dff41b64c1d3a2ae8782a348f041debee6ea93d6dd2fcee4c2fcd4
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57c230e4a3d3176d8f4a84949e99fccf4bc6db16aab7ec60a204be03a7a4d58c
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c2ce6466218d3478f547123414311551be483ec15bd6249d9e7ad7c1f4b6834
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75c3c8ae7ddabeca3cb69703f6b35f00b9125a4c27b56ccba4ed0e4669bb5433
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2180293501048218,
   "eval_steps": 13,
-  "global_step": 39,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -312,6 +312,83 @@
       "eval_samples_per_second": 11.816,
       "eval_steps_per_second": 2.961,
       "step": 39
     }
   ],
   "logging_steps": 1,
@@ -326,12 +403,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.155238761538519e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.27952480782669464,
   "eval_steps": 13,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.816,
       "eval_steps_per_second": 2.961,
       "step": 39
+    },
+    {
+      "epoch": 0.2236198462613557,
+      "grad_norm": 0.5342937707901001,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 1.8122,
+      "step": 40
+    },
+    {
+      "epoch": 0.2292103424178896,
+      "grad_norm": 0.5158257484436035,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 1.7735,
+      "step": 41
+    },
+    {
+      "epoch": 0.2348008385744235,
+      "grad_norm": 0.4467351734638214,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.794,
+      "step": 42
+    },
+    {
+      "epoch": 0.24039133473095736,
+      "grad_norm": 0.5328588485717773,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 1.7981,
+      "step": 43
+    },
+    {
+      "epoch": 0.24598183088749126,
+      "grad_norm": 0.30072852969169617,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 1.8019,
+      "step": 44
+    },
+    {
+      "epoch": 0.25157232704402516,
+      "grad_norm": 0.33151668310165405,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 1.7739,
+      "step": 45
+    },
+    {
+      "epoch": 0.25716282320055905,
+      "grad_norm": 0.5027835965156555,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.792,
+      "step": 46
+    },
+    {
+      "epoch": 0.26275331935709295,
+      "grad_norm": 0.2943630516529083,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 1.7813,
+      "step": 47
+    },
+    {
+      "epoch": 0.26834381551362685,
+      "grad_norm": 0.37901201844215393,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 1.8137,
+      "step": 48
+    },
+    {
+      "epoch": 0.27393431167016075,
+      "grad_norm": 0.6345136165618896,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 1.8183,
+      "step": 49
+    },
+    {
+      "epoch": 0.27952480782669464,
+      "grad_norm": 0.42185863852500916,
+      "learning_rate": 0.0,
+      "loss": 1.7685,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0455434309664768e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null