Training in progress, step 535, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32f9aed1f2401361ea2ec2e007476a61dfde89a92cbfa844aa250aa8aae479b9
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:173c968110406154bbd6958e0575f0041084fe35c92872803251c373142f7599
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45205e8e1552e7490a9b7109bcece0f8aed45b92c38d969d378e0ccf54848cae
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:731ab2eb7506e7b8fba727ae9e889e4883eba19e71ccb1aee37d0543327dcf26
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4561e3ee2c17cf50619bdcd83396129cd0378f1bf28e100d14054fa49d2339e3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f9ee5b1533ef89e38b0a831d8e4b8b744f0cbca34e3e202f7d262ca1861c412
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cbba19fa588799a647ab10e3adc360f543d9651c93cfcae6eb523c65fdc2328
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:25f6265b720d778f2ce309335230d277e55711db968e54ca2d8c342eedbbfb57
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6858621805241022,
   "eval_steps": 386,
-  "global_step": 530,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3733,6 +3733,41 @@
       "learning_rate": 9.613459075424034e-05,
       "loss": 0.8122,
       "step": 530
     }
   ],
   "logging_steps": 1,
@@ -3752,7 +3787,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.92506700997591e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6923325784535749,
   "eval_steps": 386,
+  "global_step": 535,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.613459075424034e-05,
       "loss": 0.8122,
       "step": 530
+    },
+    {
+      "epoch": 0.6871562601099968,
+      "grad_norm": 0.7333494424819946,
+      "learning_rate": 9.611868000118452e-05,
+      "loss": 0.8027,
+      "step": 531
+    },
+    {
+      "epoch": 0.6884503396958913,
+      "grad_norm": 0.7772257924079895,
+      "learning_rate": 9.61027378916968e-05,
+      "loss": 0.8538,
+      "step": 532
+    },
+    {
+      "epoch": 0.6897444192817859,
+      "grad_norm": 0.7605924606323242,
+      "learning_rate": 9.60867644366163e-05,
+      "loss": 0.875,
+      "step": 533
+    },
+    {
+      "epoch": 0.6910384988676803,
+      "grad_norm": 0.8444223999977112,
+      "learning_rate": 9.607075964680352e-05,
+      "loss": 1.0179,
+      "step": 534
+    },
+    {
+      "epoch": 0.6923325784535749,
+      "grad_norm": 0.7386454939842224,
+      "learning_rate": 9.605472353314023e-05,
+      "loss": 0.9023,
+      "step": 535
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.980963868560589e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null