Training in progress, step 1716, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0930d6e64875820c22c7cffca09ca1acaf6cdcc1e7968d0c5a856968a87824e8
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:12fb4cb11627ad284c1d37770a6aacfa220bc7356007fd91be56ae9b13cfa7e1
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b9d593632762fa95cd51142f1adb4c560e59f1f9d92ecb7de05d6485c887205
 size 81735892

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d7084c316ee634e4748f581b2bd6c1841fa446bb7314925c6856b0c688d9d59
 size 81735892

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b283d316b0c499174401fc8457651f1fb183c6003c46a4d25e29dfecd151147
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfdfe6864f5def6fb115e8ca14ab15e350070dafd726a524cb7ea2d2792031bc
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a6797f0f81e1d80bc4d2d6295ad3c421b4b433370ca9e0c209b11267f3ef64f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddc0a7b6e0502913c27e2140efcb7b2ae87f20e67eabbf1a4c9eb14a1147911a
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bd17fa23f67ef7fbf6e377f7e0c23474bf385755bb96f63949a2752039f1de4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3972fa5d27ce600e42d2f7fa59c3c380c3f284988e6b7b9d744cfc80bdadb18e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:212929e3bfad92319ef54b8b509922f96991c6c7d7791e9983b6f58b96c35aff
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:31938e2b87878021814bafc01027edb09402ab03f9b4018907cc3c40a6a45630
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:671918de7ffc87d6187292033f79bb1cacaa6a7d5996a986d5989df4cdad43d1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2aa591540982b0ed0561ae043be4daaa0300d3947c5a8b6265d20fe06871fc8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9906759906759907,
   "eval_steps": 500,
-  "global_step": 1700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11907,6 +11907,118 @@
       "learning_rate": 9.324009324009324e-06,
       "loss": 0.003,
       "step": 1700
     }
   ],
   "logging_steps": 1,
@@ -11921,12 +12033,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.109695330572042e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 1716,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.324009324009324e-06,
       "loss": 0.003,
       "step": 1700
+    },
+    {
+      "epoch": 0.9912587412587412,
+      "grad_norm": 0.0011362357763573527,
+      "learning_rate": 8.741258741258741e-06,
+      "loss": 0.0032,
+      "step": 1701
+    },
+    {
+      "epoch": 0.9918414918414918,
+      "grad_norm": 0.0010037495521828532,
+      "learning_rate": 8.158508158508159e-06,
+      "loss": 0.0033,
+      "step": 1702
+    },
+    {
+      "epoch": 0.9924242424242424,
+      "grad_norm": 0.0010420752223581076,
+      "learning_rate": 7.5757575757575764e-06,
+      "loss": 0.004,
+      "step": 1703
+    },
+    {
+      "epoch": 0.993006993006993,
+      "grad_norm": 0.0010835586581379175,
+      "learning_rate": 6.993006993006993e-06,
+      "loss": 0.0033,
+      "step": 1704
+    },
+    {
+      "epoch": 0.9935897435897436,
+      "grad_norm": 0.0009612834546715021,
+      "learning_rate": 6.41025641025641e-06,
+      "loss": 0.0046,
+      "step": 1705
+    },
+    {
+      "epoch": 0.9941724941724942,
+      "grad_norm": 0.0007791322423145175,
+      "learning_rate": 5.827505827505828e-06,
+      "loss": 0.003,
+      "step": 1706
+    },
+    {
+      "epoch": 0.9947552447552448,
+      "grad_norm": 0.0017918848898261786,
+      "learning_rate": 5.244755244755245e-06,
+      "loss": 0.0038,
+      "step": 1707
+    },
+    {
+      "epoch": 0.9953379953379954,
+      "grad_norm": 0.0009001931175589561,
+      "learning_rate": 4.662004662004662e-06,
+      "loss": 0.0037,
+      "step": 1708
+    },
+    {
+      "epoch": 0.995920745920746,
+      "grad_norm": 0.0008824478718452156,
+      "learning_rate": 4.079254079254079e-06,
+      "loss": 0.004,
+      "step": 1709
+    },
+    {
+      "epoch": 0.9965034965034965,
+      "grad_norm": 0.0010619634995236993,
+      "learning_rate": 3.4965034965034966e-06,
+      "loss": 0.003,
+      "step": 1710
+    },
+    {
+      "epoch": 0.997086247086247,
+      "grad_norm": 0.0007909830892458558,
+      "learning_rate": 2.913752913752914e-06,
+      "loss": 0.0023,
+      "step": 1711
+    },
+    {
+      "epoch": 0.9976689976689976,
+      "grad_norm": 0.0019757202826440334,
+      "learning_rate": 2.331002331002331e-06,
+      "loss": 0.0065,
+      "step": 1712
+    },
+    {
+      "epoch": 0.9982517482517482,
+      "grad_norm": 0.0008470152388326824,
+      "learning_rate": 1.7482517482517483e-06,
+      "loss": 0.0027,
+      "step": 1713
+    },
+    {
+      "epoch": 0.9988344988344988,
+      "grad_norm": 0.0009145813528448343,
+      "learning_rate": 1.1655011655011655e-06,
+      "loss": 0.0035,
+      "step": 1714
+    },
+    {
+      "epoch": 0.9994172494172494,
+      "grad_norm": 0.0007738119456917048,
+      "learning_rate": 5.827505827505827e-07,
+      "loss": 0.0038,
+      "step": 1715
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.0009543611668050289,
+      "learning_rate": 0.0,
+      "loss": 0.0042,
+      "step": 1716
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.176610110153897e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null