Training in progress, step 7500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51185f7f9f5eaf067c0fba104ab4ea34667f1c1318d7724222d2153c3c86c009
 size 577789320

 version https://git-lfs.github.com/spec/v1
+oid sha256:edf345f4a3e27a4cec4b72cd6af0fa7fa4858493f48d1ba748ecbc6aefd06c07
 size 577789320

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:762da91a2d65fd1a97e73fa4248b5b59c3805c170547f6797a30b464e18916b0
 size 1155772233

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed2cac3253f5b08fe26579efe91ac53d8ff0449bd135c02868dafd16cbabb683
 size 1155772233

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bdeb767d7b7527027e2b927ca8d260baec46c420290e65d03927d23a7650644
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:365d91d890037cfbeac60120c074da46ad74cccfd1e677fb9dcd524589ceea77
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79faef7859961c2a638e4312a796703fd0c86e5877b740a2ce8b47db225af025
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c124d5873e6dde5040a6a31973ca68127e3dfe277695e3cb113af8a1a01ff662
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.402925044298172,
   "best_model_checkpoint": "mikhail-panzo/fil_b128_le5_s8000/checkpoint-4500",
-  "epoch": 608.695652173913,
   "eval_steps": 500,
-  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1099,6 +1099,84 @@
       "eval_samples_per_second": 12.67,
       "eval_steps_per_second": 1.653,
       "step": 7000
     }
   ],
   "logging_steps": 50,
@@ -1118,7 +1196,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6967083256637626e+17,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.402925044298172,
   "best_model_checkpoint": "mikhail-panzo/fil_b128_le5_s8000/checkpoint-4500",
+  "epoch": 652.1739130434783,
   "eval_steps": 500,
+  "global_step": 7500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.67,
       "eval_steps_per_second": 1.653,
       "step": 7000
+    },
+    {
+      "epoch": 613.0434782608696,
+      "grad_norm": 1.6197575330734253,
+      "learning_rate": 1.5883333333333333e-06,
+      "loss": 0.4003,
+      "step": 7050
+    },
+    {
+      "epoch": 617.3913043478261,
+      "grad_norm": 0.6977828741073608,
+      "learning_rate": 1.505e-06,
+      "loss": 0.3998,
+      "step": 7100
+    },
+    {
+      "epoch": 621.7391304347826,
+      "grad_norm": 0.665290355682373,
+      "learning_rate": 1.4216666666666667e-06,
+      "loss": 0.4003,
+      "step": 7150
+    },
+    {
+      "epoch": 626.0869565217391,
+      "grad_norm": 0.7739102840423584,
+      "learning_rate": 1.3383333333333334e-06,
+      "loss": 0.4033,
+      "step": 7200
+    },
+    {
+      "epoch": 630.4347826086956,
+      "grad_norm": 0.6389193534851074,
+      "learning_rate": 1.255e-06,
+      "loss": 0.4039,
+      "step": 7250
+    },
+    {
+      "epoch": 634.7826086956521,
+      "grad_norm": 0.6950168609619141,
+      "learning_rate": 1.1716666666666667e-06,
+      "loss": 0.4017,
+      "step": 7300
+    },
+    {
+      "epoch": 639.1304347826087,
+      "grad_norm": 0.6477563381195068,
+      "learning_rate": 1.0883333333333334e-06,
+      "loss": 0.3986,
+      "step": 7350
+    },
+    {
+      "epoch": 643.4782608695652,
+      "grad_norm": 0.5801939964294434,
+      "learning_rate": 1.0050000000000001e-06,
+      "loss": 0.3996,
+      "step": 7400
+    },
+    {
+      "epoch": 647.8260869565217,
+      "grad_norm": 0.6561626195907593,
+      "learning_rate": 9.216666666666667e-07,
+      "loss": 0.4035,
+      "step": 7450
+    },
+    {
+      "epoch": 652.1739130434783,
+      "grad_norm": 0.6258853077888489,
+      "learning_rate": 8.383333333333334e-07,
+      "loss": 0.4076,
+      "step": 7500
+    },
+    {
+      "epoch": 652.1739130434783,
+      "eval_loss": 0.40540868043899536,
+      "eval_runtime": 13.1806,
+      "eval_samples_per_second": 12.215,
+      "eval_steps_per_second": 1.593,
+      "step": 7500
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 1.817971026659378e+17,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null