Training in progress, step 5920, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8b79b765163726683a5b97fb0c1abea490722bbbf1fc391f9b7a985bcdd16c3
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:8386ad9cbf76ebd86a333cb2de2e393ac58f2eb5d5851c4a37eaf7dfabf8e1ab
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c4be8aec95d333750d7914e18caa878b08e168adf924ebf31647ab744346164
 size 240728404

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae6e60f3154077edf3be4b7a48662dd49359a11649f46c508259fc0512184b10
 size 240728404

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d08e0bef61039a4f769cc5e4c3c08f715d445eab2242d23ea2e8a9e30cd2439d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:80758c06b27a6f832fca7f167d67f92046d71167a066f87edf1f02350e7dfed9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.38170048730816786,
   "eval_steps": 500,
-  "global_step": 5904,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -41335,6 +41335,118 @@
       "learning_rate": 6.906780795455553e-06,
       "loss": 1.2322,
       "step": 5904
     }
   ],
   "logging_steps": 1,
@@ -41354,7 +41466,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.346276383970312e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.38273490597295967,
   "eval_steps": 500,
+  "global_step": 5920,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.906780795455553e-06,
       "loss": 1.2322,
       "step": 5904
+    },
+    {
+      "epoch": 0.3817651384747174,
+      "grad_norm": 2.9935035705566406,
+      "learning_rate": 6.905832421729944e-06,
+      "loss": 1.0633,
+      "step": 5905
+    },
+    {
+      "epoch": 0.38182978964126685,
+      "grad_norm": 2.6144261360168457,
+      "learning_rate": 6.9048839677773235e-06,
+      "loss": 1.1399,
+      "step": 5906
+    },
+    {
+      "epoch": 0.3818944408078163,
+      "grad_norm": 2.6496615409851074,
+      "learning_rate": 6.9039354336376195e-06,
+      "loss": 1.1642,
+      "step": 5907
+    },
+    {
+      "epoch": 0.38195909197436584,
+      "grad_norm": 2.639331340789795,
+      "learning_rate": 6.902986819350757e-06,
+      "loss": 1.1909,
+      "step": 5908
+    },
+    {
+      "epoch": 0.3820237431409153,
+      "grad_norm": 2.60591197013855,
+      "learning_rate": 6.90203812495667e-06,
+      "loss": 1.0447,
+      "step": 5909
+    },
+    {
+      "epoch": 0.3820883943074648,
+      "grad_norm": 2.6222944259643555,
+      "learning_rate": 6.901089350495296e-06,
+      "loss": 1.1576,
+      "step": 5910
+    },
+    {
+      "epoch": 0.3821530454740143,
+      "grad_norm": 2.601048231124878,
+      "learning_rate": 6.900140496006572e-06,
+      "loss": 1.2307,
+      "step": 5911
+    },
+    {
+      "epoch": 0.38221769664056376,
+      "grad_norm": 2.1494765281677246,
+      "learning_rate": 6.899191561530441e-06,
+      "loss": 1.1478,
+      "step": 5912
+    },
+    {
+      "epoch": 0.38228234780711323,
+      "grad_norm": 2.7690351009368896,
+      "learning_rate": 6.8982425471068495e-06,
+      "loss": 1.1497,
+      "step": 5913
+    },
+    {
+      "epoch": 0.38234699897366276,
+      "grad_norm": 2.669267177581787,
+      "learning_rate": 6.897293452775746e-06,
+      "loss": 1.2059,
+      "step": 5914
+    },
+    {
+      "epoch": 0.3824116501402122,
+      "grad_norm": 2.6427619457244873,
+      "learning_rate": 6.896344278577083e-06,
+      "loss": 1.1997,
+      "step": 5915
+    },
+    {
+      "epoch": 0.3824763013067617,
+      "grad_norm": 2.8328170776367188,
+      "learning_rate": 6.8953950245508186e-06,
+      "loss": 1.174,
+      "step": 5916
+    },
+    {
+      "epoch": 0.3825409524733112,
+      "grad_norm": 2.3416833877563477,
+      "learning_rate": 6.894445690736911e-06,
+      "loss": 1.2323,
+      "step": 5917
+    },
+    {
+      "epoch": 0.3826056036398607,
+      "grad_norm": 2.6076183319091797,
+      "learning_rate": 6.89349627717532e-06,
+      "loss": 1.2821,
+      "step": 5918
+    },
+    {
+      "epoch": 0.38267025480641015,
+      "grad_norm": 2.3602590560913086,
+      "learning_rate": 6.892546783906016e-06,
+      "loss": 1.1353,
+      "step": 5919
+    },
+    {
+      "epoch": 0.38273490597295967,
+      "grad_norm": 2.70065975189209,
+      "learning_rate": 6.891597210968965e-06,
+      "loss": 1.1731,
+      "step": 5920
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.360788093260792e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null