Training in progress, step 63, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +60 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14e4ad5a835a88945e0ee4038f25352c397f33c5f4adde5fca373295f998c61d
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:770140205d2dac43d3cffa97db7b856382360e2b5d689600fb6a190f3a214871
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3e22b37135b24d3ba3c1b6f87d393c15fea5d409e7cec9f00e2d6cb56242c35
 size 253144

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a359e38865ee20b10b8cc286fbdbd671be6b4685839e72323ccb7768a4790a4
 size 253144

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d1f55c7fe0357563a35d240cfc7f2436b04a081cefd509d645b538cc39e7506
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff00fff9cbfcd118e54c77073688cb2b84440267aaa2d4e090ca843731701c95
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17f7add06e301ad12114b22b3a83ca3a5bc8239932ceeb6478a372e3a37dacdc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:731f9a38a306fb54b040f8655d8ca8de9e109511292676024e32dd381563f07a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5075528700906344,
   "eval_steps": 21,
-  "global_step": 42,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -129,6 +129,63 @@
       "eval_samples_per_second": 535.377,
       "eval_steps_per_second": 68.834,
       "step": 42
     }
   ],
   "logging_steps": 3,
@@ -148,7 +205,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4685127745536.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7613293051359517,
   "eval_steps": 21,
+  "global_step": 63,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 535.377,
       "eval_steps_per_second": 68.834,
       "step": 42
+    },
+    {
+      "epoch": 0.5438066465256798,
+      "grad_norm": 0.15116359293460846,
+      "learning_rate": 9.480116264104011e-05,
+      "loss": 10.3382,
+      "step": 45
+    },
+    {
+      "epoch": 0.5800604229607251,
+      "grad_norm": 0.20655445754528046,
+      "learning_rate": 9.389110615965102e-05,
+      "loss": 10.3347,
+      "step": 48
+    },
+    {
+      "epoch": 0.6163141993957704,
+      "grad_norm": 0.15545906126499176,
+      "learning_rate": 9.291280532867302e-05,
+      "loss": 10.3275,
+      "step": 51
+    },
+    {
+      "epoch": 0.6525679758308157,
+      "grad_norm": 0.189162015914917,
+      "learning_rate": 9.186778126501916e-05,
+      "loss": 10.3294,
+      "step": 54
+    },
+    {
+      "epoch": 0.6888217522658611,
+      "grad_norm": 0.21338708698749542,
+      "learning_rate": 9.075765883062093e-05,
+      "loss": 10.3236,
+      "step": 57
+    },
+    {
+      "epoch": 0.7250755287009063,
+      "grad_norm": 0.23534299433231354,
+      "learning_rate": 8.958416410600187e-05,
+      "loss": 10.3183,
+      "step": 60
+    },
+    {
+      "epoch": 0.7613293051359517,
+      "grad_norm": 0.2692500054836273,
+      "learning_rate": 8.834912170647101e-05,
+      "loss": 10.3116,
+      "step": 63
+    },
+    {
+      "epoch": 0.7613293051359517,
+      "eval_loss": 10.310102462768555,
+      "eval_runtime": 0.2628,
+      "eval_samples_per_second": 532.785,
+      "eval_steps_per_second": 68.501,
+      "step": 63
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 7027691618304.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null