Training in progress, step 465, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a8e766c93c4cb51d235429ab576d2269c0fe74ccc12cbec07722e592fc31d83
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb22d9108d46568eed2a25c6378eb936291db1fd5def7bc554afe2f3f354388f
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9f249a9f18d595322f53870ff656ff9f0fb190feac83ed8e8e07c86b79d402d
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:feddcd4823e3bf21b23adc8b4128b78879305ba02f9d65f76af450bf7e0ad182
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4d5ea361bd8f109ccf9deae94ad5b06097316390b345e2c5b8dfcae47e6460a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:79792a0c4a549329639ba69896508be1c58dde196d323715b7d4f344f0e143f2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:702e6ac2a5684998fe08dbec2461764b61e07652058ef3b71a2777c5464d9e27
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:19b2a9c436b3bae0ae5d93979cf58505c2ebbb5587127ec39bd71f602d8b990f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.595276609511485,
   "eval_steps": 386,
-  "global_step": 460,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3243,6 +3243,41 @@
       "learning_rate": 9.716979960019173e-05,
       "loss": 0.8261,
       "step": 460
     }
   ],
   "logging_steps": 1,
@@ -3262,7 +3297,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.142510989790413e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6017470074409577,
   "eval_steps": 386,
+  "global_step": 465,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.716979960019173e-05,
       "loss": 0.8261,
       "step": 460
+    },
+    {
+      "epoch": 0.5965706890973795,
+      "grad_norm": 0.766036331653595,
+      "learning_rate": 9.715610945382177e-05,
+      "loss": 0.9601,
+      "step": 461
+    },
+    {
+      "epoch": 0.597864768683274,
+      "grad_norm": 0.9155464172363281,
+      "learning_rate": 9.714238724566393e-05,
+      "loss": 0.8716,
+      "step": 462
+    },
+    {
+      "epoch": 0.5991588482691685,
+      "grad_norm": 0.8961794972419739,
+      "learning_rate": 9.712863298504807e-05,
+      "loss": 0.9595,
+      "step": 463
+    },
+    {
+      "epoch": 0.6004529278550631,
+      "grad_norm": 0.8102921843528748,
+      "learning_rate": 9.711484668132575e-05,
+      "loss": 0.7174,
+      "step": 464
+    },
+    {
+      "epoch": 0.6017470074409577,
+      "grad_norm": 0.7466800808906555,
+      "learning_rate": 9.710102834387043e-05,
+      "loss": 0.8707,
+      "step": 465
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.198407848375091e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null