Training in progress, step 195, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:673e0cd0d6be2cacd9fdbe1bf6f79ad95bd9fae922ec2bf88e1a8cd477cee5b9
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:21d753263aa81b1b8454592a6857435334e81996e06a4ba300add9b2dfb23768
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e9ca09e7df6f39976075d85646ed22483c4e258ed5192039fbb8a16a0177402
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:9038d12d05746b9a6d82e5217c8510507bfbdc663715afa5583ab6868239418d
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0d0addcc11bf969393dd9428fea269c1f9808caf14a24eb1d95804a407c85d4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b359c5b6c1818e377053ca6ac8f02ef8f015748d4c9bdcdc6bf967222152a102
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e85fc8999195dd78d475e82d99b83e2dc13cd79ae7e2ade006c6bcb65bfced59
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:08bf85ff0f48b3d95c6198a52e97d2ada2b8ffebab00ffbb2f9654c1bbec3d72
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.24587512131996117,
   "eval_steps": 386,
-  "global_step": 190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1345,6 +1345,41 @@
       "learning_rate": 9.966721516310682e-05,
       "loss": 0.9526,
       "step": 190
     }
   ],
   "logging_steps": 1,
@@ -1364,7 +1399,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1240806262177792e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2523455192494338,
   "eval_steps": 386,
+  "global_step": 195,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.966721516310682e-05,
       "loss": 0.9526,
       "step": 190
+    },
+    {
+      "epoch": 0.2471692009058557,
+      "grad_norm": 0.9828710556030273,
+      "learning_rate": 9.966244949063316e-05,
+      "loss": 0.8923,
+      "step": 191
+    },
+    {
+      "epoch": 0.24846328049175023,
+      "grad_norm": 1.0729883909225464,
+      "learning_rate": 9.965765005229248e-05,
+      "loss": 1.0115,
+      "step": 192
+    },
+    {
+      "epoch": 0.24975736007764476,
+      "grad_norm": 0.9844326972961426,
+      "learning_rate": 9.965281685134796e-05,
+      "loss": 0.9855,
+      "step": 193
+    },
+    {
+      "epoch": 0.2510514396635393,
+      "grad_norm": 1.1593172550201416,
+      "learning_rate": 9.96479498910857e-05,
+      "loss": 1.0912,
+      "step": 194
+    },
+    {
+      "epoch": 0.2523455192494338,
+      "grad_norm": 0.8835370540618896,
+      "learning_rate": 9.964304917481482e-05,
+      "loss": 0.9951,
+      "step": 195
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.1799774848024576e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null