Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +53 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78dac7535dac9a926aa3d6f1cede2677776554a31ae3b03733c9be34bb45feb8
 size 3380768360

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a6d8f0f796537366c6e78ca004befff9f9c27672a628bae1e611c8bc0f94c8c
 size 3380768360

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ddc11eef2343db6f5ad7d3c768a98ae313ab5bdd1d33c62a12390009d4c92aa
 size 1757899449

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffd75d21ac4ac0a8645a72715f91e4d5f09c05dd5a2548ed04a8b49d623fc3a5
 size 1757899449

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97490d166ca8bc27bfa10807632f9ecb473b145cce74c93d287cde23f8af51fb
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5dc9af15ae765cffc21eeb6ddbc68a2629e47a5fc5164b3c35695e55c025ec4
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69b592d4efa2ff6d0584dfc8cf30049181a8d5c8977939386b39d5882c0a494e
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:213d03f95061a3291403e8d5572036299f2f6f739be51135e2941aff4f3ccff7
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.2508710801393728,
   "eval_steps": 30,
-  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -215,6 +215,56 @@
       "eval_samples_per_second": 0.291,
       "eval_steps_per_second": 0.073,
       "step": 120
     }
   ],
   "logging_steps": 5,
@@ -234,7 +284,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.763514566754386e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.5644599303135889,
   "eval_steps": 30,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.291,
       "eval_steps_per_second": 0.073,
       "step": 120
+    },
+    {
+      "epoch": 1.3031358885017421,
+      "grad_norm": 0.07795720547437668,
+      "learning_rate": 2.950898376017064e-05,
+      "loss": 0.1842,
+      "step": 125
+    },
+    {
+      "epoch": 1.3554006968641115,
+      "grad_norm": 0.07542526721954346,
+      "learning_rate": 2.573490187344596e-05,
+      "loss": 0.2031,
+      "step": 130
+    },
+    {
+      "epoch": 1.4076655052264808,
+      "grad_norm": 0.10047340393066406,
+      "learning_rate": 2.2133776843878186e-05,
+      "loss": 0.24,
+      "step": 135
+    },
+    {
+      "epoch": 1.4599303135888502,
+      "grad_norm": 0.13595731556415558,
+      "learning_rate": 1.873127678391816e-05,
+      "loss": 0.2808,
+      "step": 140
+    },
+    {
+      "epoch": 1.5121951219512195,
+      "grad_norm": 0.06210995092988014,
+      "learning_rate": 1.555165404621567e-05,
+      "loss": 0.235,
+      "step": 145
+    },
+    {
+      "epoch": 1.5644599303135889,
+      "grad_norm": 0.08401988446712494,
+      "learning_rate": 1.2617572357609564e-05,
+      "loss": 0.1849,
+      "step": 150
+    },
+    {
+      "epoch": 1.5644599303135889,
+      "eval_loss": 0.23435795307159424,
+      "eval_runtime": 1753.006,
+      "eval_samples_per_second": 0.291,
+      "eval_steps_per_second": 0.073,
+      "step": 150
     }
   ],
   "logging_steps": 5,
       "attributes": {}
     }
   },
+  "total_flos": 9.840854969157304e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null