Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bcb30213c268b08c544b2c7db8161f621672109ce6ac437fbf66b545997526d
 size 101752088

 version https://git-lfs.github.com/spec/v1
+oid sha256:700019b4cc90c4ccfef934b93e7efafea436715bfa2b61c858bf365343c1ba32
 size 101752088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0645116a7c0984cadcde7eccc2e07e2cf29f24691185e9c2bb5c66a51d9adad
 size 52046596

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0f3c0fa37a780786a00d26a44a81dd51da3cfd5944375cd956a61dc2b87e71e
 size 52046596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0580e74248926dc2cb618fc9ab207371e56a1f66f91fbcaaa73e7d0342f25366
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:83927fe0efc06382dff96f72c027c114b9cfb264f5d9c65a1275f1784c3318fa
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb97b24f34ff3e53eec5be9cf35c1a7161c58dbc2fed7dda160fb3eb64e5f353
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.011328592107747499,
   "eval_steps": 25,
-  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -669,6 +669,84 @@
       "learning_rate": 6.030737921409169e-06,
       "loss": 0.0975,
       "step": 90
     }
   ],
   "logging_steps": 1,
@@ -683,12 +761,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.92684561465344e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.012587324564163886,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.030737921409169e-06,
       "loss": 0.0975,
       "step": 90
+    },
+    {
+      "epoch": 0.011454465353389138,
+      "grad_norm": 0.0066988165490329266,
+      "learning_rate": 4.8943483704846475e-06,
+      "loss": 0.0004,
+      "step": 91
+    },
+    {
+      "epoch": 0.011580338599030775,
+      "grad_norm": 0.0022131705190986395,
+      "learning_rate": 3.873830406168111e-06,
+      "loss": 0.0004,
+      "step": 92
+    },
+    {
+      "epoch": 0.011706211844672415,
+      "grad_norm": 0.01956171914935112,
+      "learning_rate": 2.970427372400353e-06,
+      "loss": 0.0015,
+      "step": 93
+    },
+    {
+      "epoch": 0.011832085090314054,
+      "grad_norm": 0.0475313700735569,
+      "learning_rate": 2.1852399266194314e-06,
+      "loss": 0.0016,
+      "step": 94
+    },
+    {
+      "epoch": 0.011957958335955693,
+      "grad_norm": 3.4122633934020996,
+      "learning_rate": 1.5192246987791981e-06,
+      "loss": 0.7071,
+      "step": 95
+    },
+    {
+      "epoch": 0.01208383158159733,
+      "grad_norm": 0.13446319103240967,
+      "learning_rate": 9.731931258429638e-07,
+      "loss": 0.0256,
+      "step": 96
+    },
+    {
+      "epoch": 0.01220970482723897,
+      "grad_norm": 0.10620174556970596,
+      "learning_rate": 5.478104631726711e-07,
+      "loss": 0.0039,
+      "step": 97
+    },
+    {
+      "epoch": 0.01233557807288061,
+      "grad_norm": 0.5097644329071045,
+      "learning_rate": 2.4359497401758024e-07,
+      "loss": 0.0671,
+      "step": 98
+    },
+    {
+      "epoch": 0.012461451318522249,
+      "grad_norm": 1.6247103214263916,
+      "learning_rate": 6.09172980904238e-08,
+      "loss": 0.0878,
+      "step": 99
+    },
+    {
+      "epoch": 0.012587324564163886,
+      "grad_norm": 0.002280471846461296,
+      "learning_rate": 0.0,
+      "loss": 0.0001,
+      "step": 100
+    },
+    {
+      "epoch": 0.012587324564163886,
+      "eval_loss": NaN,
+      "eval_runtime": 3565.9397,
+      "eval_samples_per_second": 0.938,
+      "eval_steps_per_second": 0.469,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.5853840162816e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null