Training in progress, step 145, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:feee0a69dbcd321bf78ea0b4b76e9c5124d68ccf8b93018334bb8f2f734b7ad3
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:aafad848e81bd1d81e2af2ea9fafb99b49a6d95ebc286c86b8ee31aa5edbccf4
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d7c39e6e6222cb74622e3254d42bf0e9b0b3b058c628cd0ef84b4a2e1322e35
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9d939b609dab234b7bb9d7daace8308f1bf7b81dd32b9f6d084edcd20ac6ce3
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fb787d7b71a8f1899c8d99ef06e195074c24a984dea6c9926cd15c76a124153
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:deccb07511b8131b8a085ace65a43ba4c84d902fddc80cd24f284c9d76e3a5f6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80407f1621aa96065dd58d5f53331105dff3bac7a38e8f9d80f24f4c3788046c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c82db81101935c1ef6b1e22d89d4bc0889a9580697f0068651662ac9609b43a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.18117114202523454,
   "eval_steps": 386,
-  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -995,6 +995,41 @@
       "learning_rate": 9.986238191873874e-05,
       "loss": 0.875,
       "step": 140
     }
   ],
   "logging_steps": 1,
@@ -1014,7 +1049,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.5651120403709952e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.18764153995470723,
   "eval_steps": 386,
+  "global_step": 145,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.986238191873874e-05,
       "loss": 0.875,
       "step": 140
+    },
+    {
+      "epoch": 0.18246522161112907,
+      "grad_norm": 0.8143295645713806,
+      "learning_rate": 9.985930819224247e-05,
+      "loss": 0.8454,
+      "step": 141
+    },
+    {
+      "epoch": 0.1837593011970236,
+      "grad_norm": 0.8755755424499512,
+      "learning_rate": 9.985620056603348e-05,
+      "loss": 0.8029,
+      "step": 142
+    },
+    {
+      "epoch": 0.18505338078291814,
+      "grad_norm": 0.899174690246582,
+      "learning_rate": 9.985305904222469e-05,
+      "loss": 0.9608,
+      "step": 143
+    },
+    {
+      "epoch": 0.1863474603688127,
+      "grad_norm": 0.920137882232666,
+      "learning_rate": 9.984988362295203e-05,
+      "loss": 0.9022,
+      "step": 144
+    },
+    {
+      "epoch": 0.18764153995470723,
+      "grad_norm": 1.1012908220291138,
+      "learning_rate": 9.984667431037447e-05,
+      "loss": 0.9621,
+      "step": 145
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.6210088989556736e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null