Training in progress, step 44500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:440a49b94440cacff83f386734abd67ba7ebf0590110ed4471a084cfe5123375
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c3a41320d77f15c9a600c514111a9e25e4cbd36ae0205d94fa0c4cb342f789a
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa8b9c57b8597bab044debeab7cc337cd4ec0227fbde8dd82185f8b22bf3a034
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6c9fdfcdbc56237f182572da63342d4ff7b8e0b0e45095657eb0e743d19f6ce
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d5486da749b598a61d62adc9f231f7450ebff3f0e201a6edd91a104ce06d0fc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:377a47bd0e88de4a26ae8d013d8cc57599e57dfed8febc9da0fea06bb8f5c79f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91c87f62d5aed9d7a6d425bf9b7734b81e555049c990c49c2d251a8a04bf9890
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c2f59490e3dd2b05891cf4a120f5b3dd4417dfcdb085e76dd005b201e1dc531
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.9333333333333336,
   "eval_steps": 500,
-  "global_step": 44000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3103,6 +3103,41 @@
       "learning_rate": 4.5155555555555554e-07,
       "loss": 0.051,
       "step": 44000
     }
   ],
   "logging_steps": 100,
@@ -3122,7 +3157,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0717658873856e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.966666666666667,
   "eval_steps": 500,
+  "global_step": 44500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.5155555555555554e-07,
       "loss": 0.051,
       "step": 44000
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 0.1060103327035904,
+      "learning_rate": 4.071111111111112e-07,
+      "loss": 0.0461,
+      "step": 44100
+    },
+    {
+      "epoch": 2.9466666666666668,
+      "grad_norm": 0.43563324213027954,
+      "learning_rate": 3.6266666666666674e-07,
+      "loss": 0.0553,
+      "step": 44200
+    },
+    {
+      "epoch": 2.953333333333333,
+      "grad_norm": 0.11668545007705688,
+      "learning_rate": 3.182222222222223e-07,
+      "loss": 0.0514,
+      "step": 44300
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": 0.20122574269771576,
+      "learning_rate": 2.7377777777777783e-07,
+      "loss": 0.0582,
+      "step": 44400
+    },
+    {
+      "epoch": 2.966666666666667,
+      "grad_norm": 0.2473125159740448,
+      "learning_rate": 2.2933333333333335e-07,
+      "loss": 0.0569,
+      "step": 44500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.0839450451968e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null