Training in progress, epoch 9, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ea2e65249359f7498c99e819c5c440b435850f09822fe34f748594fe4bf15ce
 size 30026872

 version https://git-lfs.github.com/spec/v1
+oid sha256:b22f7889e393eef2c41e0eef350a180536835e34c14a864af29a5b66201b2ade
 size 30026872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ca25f8a0191359369a8d56e18784c849725db5f190a3786194d98a5293cd299
 size 60252034

 version https://git-lfs.github.com/spec/v1
+oid sha256:28e5bc3ff089c126a0962162d715588b242cae0becd028cf4a99ba19493297f3
 size 60252034

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b13df9b8a6e932f60cc824973005b08e233a907e28346b89a8b1e69dc0619bf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f0077f1f8d22a03518b4343c89ed7062a32fe871c532552f2316a11dfe0b2ae
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:484a7481700feab9fe61bb6bd94f7263dd845a7be122d0e765a896201ef74e87
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9577a2f09c8cfc154aa878462b440d77446bdadf80155c053bc9a6c49feaa8d5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.0,
   "eval_steps": 500,
-  "global_step": 2234,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -161,6 +161,20 @@
       "learning_rate": 0.0002,
       "loss": 0.2684,
       "step": 2200
     }
   ],
   "logging_steps": 100,
@@ -175,12 +189,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.3669888277374566e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.989939637826962,
   "eval_steps": 500,
+  "global_step": 2480,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0002,
       "loss": 0.2684,
       "step": 2200
+    },
+    {
+      "epoch": 9.26559356136821,
+      "grad_norm": 0.37764859199523926,
+      "learning_rate": 0.0002,
+      "loss": 0.2411,
+      "step": 2300
+    },
+    {
+      "epoch": 9.668008048289739,
+      "grad_norm": 0.4754630923271179,
+      "learning_rate": 0.0002,
+      "loss": 0.2334,
+      "step": 2400
     }
   ],
   "logging_steps": 100,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.737749459619021e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null