Training in progress, step 5904, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51ac3bb4c5fcc7547d13a6f3177921af8b0ac323a92c1b2ebd751378b49005c5
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8b79b765163726683a5b97fb0c1abea490722bbbf1fc391f9b7a985bcdd16c3
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c8d921bc3819800c6bfb305e9ed76511a11b74bf2e480cf600d82fb81c1fd2d
 size 240728404

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c4be8aec95d333750d7914e18caa878b08e168adf924ebf31647ab744346164
 size 240728404

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e44d2265428e258699e7753397e520393bda7a4c701384d0238f0149ff68231
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d08e0bef61039a4f769cc5e4c3c08f715d445eab2242d23ea2e8a9e30cd2439d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3806660686433761,
   "eval_steps": 500,
-  "global_step": 5888,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -41223,6 +41223,118 @@
       "learning_rate": 6.921943831625517e-06,
       "loss": 1.087,
       "step": 5888
     }
   ],
   "logging_steps": 1,
@@ -41242,7 +41354,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.332207877349507e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.38170048730816786,
   "eval_steps": 500,
+  "global_step": 5904,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.921943831625517e-06,
       "loss": 1.087,
       "step": 5888
+    },
+    {
+      "epoch": 0.38073071980992557,
+      "grad_norm": 2.504150390625,
+      "learning_rate": 6.92099674695922e-06,
+      "loss": 1.1731,
+      "step": 5889
+    },
+    {
+      "epoch": 0.38079537097647503,
+      "grad_norm": 2.604978322982788,
+      "learning_rate": 6.920049581427559e-06,
+      "loss": 1.0603,
+      "step": 5890
+    },
+    {
+      "epoch": 0.38086002214302456,
+      "grad_norm": 2.4367499351501465,
+      "learning_rate": 6.91910233507041e-06,
+      "loss": 1.1082,
+      "step": 5891
+    },
+    {
+      "epoch": 0.380924673309574,
+      "grad_norm": 2.471052885055542,
+      "learning_rate": 6.918155007927646e-06,
+      "loss": 1.164,
+      "step": 5892
+    },
+    {
+      "epoch": 0.3809893244761235,
+      "grad_norm": 2.616631507873535,
+      "learning_rate": 6.917207600039144e-06,
+      "loss": 1.2193,
+      "step": 5893
+    },
+    {
+      "epoch": 0.381053975642673,
+      "grad_norm": 2.8797757625579834,
+      "learning_rate": 6.9162601114447855e-06,
+      "loss": 1.1593,
+      "step": 5894
+    },
+    {
+      "epoch": 0.3811186268092225,
+      "grad_norm": 2.8945086002349854,
+      "learning_rate": 6.915312542184458e-06,
+      "loss": 1.0885,
+      "step": 5895
+    },
+    {
+      "epoch": 0.38118327797577195,
+      "grad_norm": 3.071239709854126,
+      "learning_rate": 6.91436489229805e-06,
+      "loss": 1.1257,
+      "step": 5896
+    },
+    {
+      "epoch": 0.3812479291423215,
+      "grad_norm": 2.603156805038452,
+      "learning_rate": 6.913417161825449e-06,
+      "loss": 1.1818,
+      "step": 5897
+    },
+    {
+      "epoch": 0.38131258030887094,
+      "grad_norm": 2.5029022693634033,
+      "learning_rate": 6.912469350806554e-06,
+      "loss": 1.0913,
+      "step": 5898
+    },
+    {
+      "epoch": 0.3813772314754204,
+      "grad_norm": 2.4416632652282715,
+      "learning_rate": 6.911521459281265e-06,
+      "loss": 1.2458,
+      "step": 5899
+    },
+    {
+      "epoch": 0.38144188264196993,
+      "grad_norm": 2.3060193061828613,
+      "learning_rate": 6.910573487289479e-06,
+      "loss": 1.0826,
+      "step": 5900
+    },
+    {
+      "epoch": 0.3815065338085194,
+      "grad_norm": 2.7792911529541016,
+      "learning_rate": 6.909625434871104e-06,
+      "loss": 1.2411,
+      "step": 5901
+    },
+    {
+      "epoch": 0.38157118497506887,
+      "grad_norm": 2.7977821826934814,
+      "learning_rate": 6.90867730206605e-06,
+      "loss": 1.0796,
+      "step": 5902
+    },
+    {
+      "epoch": 0.3816358361416184,
+      "grad_norm": 2.568824529647827,
+      "learning_rate": 6.907729088914228e-06,
+      "loss": 1.1366,
+      "step": 5903
+    },
+    {
+      "epoch": 0.38170048730816786,
+      "grad_norm": 2.810534954071045,
+      "learning_rate": 6.906780795455553e-06,
+      "loss": 1.2322,
+      "step": 5904
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.346276383970312e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null