Training in progress, step 210, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92d85651c439ce48ea3d56c40d0425842d7994baed4a47aad62e36d94c4b23f1
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:16acd35cdd17656ddb811443514f4fefe3f6d3ace02104b444f808878be0a446
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bf193ed81f93c6475fd57733a56c1fe79c8c69f8b94c784383e191d9cc545dd
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:b864d00e53abbcf0ba5a47cdcdc361043645c67523d5bbd4753f6288f40ebd11
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1abed11a84ff2a3bab289ee4dbc5e653fda08a38f63ccae1554d6dac1565b7c1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd3ede1e442e00bb3bcfd8dae231cfac01718814ec6cfa9ce6911cfe8dfd1868
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8250598c1a75cab0d8ccbc05b8ea5e24aa609b6e2d13838f6a479d4b06a86035
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d53ca07d33e8ecdd315a8273f1598103e91806274a3fd55324a36431c112bee0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2652863151083792,
   "eval_steps": 386,
-  "global_step": 205,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1450,6 +1450,41 @@
       "learning_rate": 9.9592186176036e-05,
       "loss": 0.8889,
       "step": 205
     }
   ],
   "logging_steps": 1,
@@ -1469,7 +1504,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.2917712019718144e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2717567130378518,
   "eval_steps": 386,
+  "global_step": 210,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.9592186176036e-05,
       "loss": 0.8889,
       "step": 205
+    },
+    {
+      "epoch": 0.2665803946942737,
+      "grad_norm": 1.0072762966156006,
+      "learning_rate": 9.958691436863188e-05,
+      "loss": 0.8358,
+      "step": 206
+    },
+    {
+      "epoch": 0.26787447428016825,
+      "grad_norm": 0.9463568329811096,
+      "learning_rate": 9.958160884671761e-05,
+      "loss": 0.8815,
+      "step": 207
+    },
+    {
+      "epoch": 0.26916855386606275,
+      "grad_norm": 0.9203188419342041,
+      "learning_rate": 9.957626961390047e-05,
+      "loss": 0.9312,
+      "step": 208
+    },
+    {
+      "epoch": 0.2704626334519573,
+      "grad_norm": 1.0614677667617798,
+      "learning_rate": 9.957089667381064e-05,
+      "loss": 0.9822,
+      "step": 209
+    },
+    {
+      "epoch": 0.2717567130378518,
+      "grad_norm": 0.8971818089485168,
+      "learning_rate": 9.956549003010123e-05,
+      "loss": 0.9421,
+      "step": 210
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.3476680605564928e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null