Training in progress, step 3600, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8973c002011f7f1b9898a29aed0c9bab5843eb09cdda6107a4a5cced83da0ae1
 size 13982248

 version https://git-lfs.github.com/spec/v1
+oid sha256:42d62c631ca13cfab3a8d4e18d1338c8812ac7251234fda355f962f76c871066
 size 13982248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f7f81d4d1f3bbb4230cc8c8dc308cda42affb803c02645e414ec74a4c1f265e
 size 7062522

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5a7aeac9850dfd4db801131fdbab98330fcd65542d55a2e5dc82fa5741fbbe3
 size 7062522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da4d99ffa87904e6b79db14e0ce4002f8d75f4ea2e6431aa6caa66c2cc565031
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc3d6dc000d3ba51393607c0971658acfb92d6dc5b372d9d0679847e9b97f57f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ebd0c8b6de9b2d27d16b51d811c091dfd903215da9d823fb4789b5397d7f1dd1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed823fd53aa11b151a56285d111464eb705a2816abfaf8a7d50c1ca9441a3e0d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2864,
   "eval_steps": 500,
-  "global_step": 3580,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1260,6 +1260,13 @@
       "learning_rate": 2.8287046721439487e-05,
       "loss": 3.5881,
       "step": 3580
     }
   ],
   "logging_steps": 20,
@@ -1267,7 +1274,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 20,
-  "total_flos": 8475145066708992.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.288,
   "eval_steps": 500,
+  "global_step": 3600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.8287046721439487e-05,
       "loss": 3.5881,
       "step": 3580
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 53.12272644042969,
+      "learning_rate": 2.8264432360950355e-05,
+      "loss": 3.5626,
+      "step": 3600
     }
   ],
   "logging_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 20,
+  "total_flos": 8519236213211136.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null