Training in progress, step 90, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af267734d45ab8807778ad6c8034835e5516c44f08ff54b6b0b37983658ad08d
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:432eec1fba0080d963fdc6ae82a834cb1e734041f7aff1118b89ad7b18c207db
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:247139f56b8ec3e96f45512ce47e9bbbde14d0c26d612c653a70b2b450d5f3e0
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:05128a102a188bf1805db3f87861a8ec32a704f4a163c27ce580f009fdd12730
 size 23159290

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fb86d30a39fedc74b03df6b4c27c7a36b5a0080347bc29b002aed513ce38fe7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5181f6310a2ee8021297af86cebbb89fed67729cda31b24d40e1d23f86a348a4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49b8a1dbbf2c2a0b7fde326d57c34bd6c5e5d17e0aaf8b19016c1f721c049db1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb97b24f34ff3e53eec5be9cf35c1a7161c58dbc2fed7dda160fb3eb64e5f353
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1909307875894988,
   "eval_steps": 25,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -599,6 +599,76 @@
       "learning_rate": 2.339555568810221e-05,
       "loss": 0.6797,
       "step": 80
     }
   ],
   "logging_steps": 1,
@@ -618,7 +688,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.548356111302656e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.21479713603818615,
   "eval_steps": 25,
+  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.339555568810221e-05,
       "loss": 0.6797,
       "step": 80
+    },
+    {
+      "epoch": 0.19331742243436753,
+      "grad_norm": 0.3806484341621399,
+      "learning_rate": 2.119892463932781e-05,
+      "loss": 0.6941,
+      "step": 81
+    },
+    {
+      "epoch": 0.1957040572792363,
+      "grad_norm": 0.5402078032493591,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 0.7054,
+      "step": 82
+    },
+    {
+      "epoch": 0.19809069212410502,
+      "grad_norm": 0.5552183985710144,
+      "learning_rate": 1.7096242744495837e-05,
+      "loss": 0.6607,
+      "step": 83
+    },
+    {
+      "epoch": 0.20047732696897375,
+      "grad_norm": 0.5727123022079468,
+      "learning_rate": 1.5195190384357404e-05,
+      "loss": 0.8021,
+      "step": 84
+    },
+    {
+      "epoch": 0.20286396181384247,
+      "grad_norm": 0.45054370164871216,
+      "learning_rate": 1.339745962155613e-05,
+      "loss": 0.9153,
+      "step": 85
+    },
+    {
+      "epoch": 0.2052505966587112,
+      "grad_norm": 0.43339234590530396,
+      "learning_rate": 1.1705240714107302e-05,
+      "loss": 0.675,
+      "step": 86
+    },
+    {
+      "epoch": 0.20763723150357996,
+      "grad_norm": 0.5368764996528625,
+      "learning_rate": 1.0120595370083318e-05,
+      "loss": 0.6827,
+      "step": 87
+    },
+    {
+      "epoch": 0.2100238663484487,
+      "grad_norm": 0.5663527846336365,
+      "learning_rate": 8.645454235739903e-06,
+      "loss": 0.9642,
+      "step": 88
+    },
+    {
+      "epoch": 0.21241050119331742,
+      "grad_norm": 0.46338632702827454,
+      "learning_rate": 7.281614543321269e-06,
+      "loss": 0.9681,
+      "step": 89
+    },
+    {
+      "epoch": 0.21479713603818615,
+      "grad_norm": 0.4531870186328888,
+      "learning_rate": 6.030737921409169e-06,
+      "loss": 0.8122,
+      "step": 90
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.741900625215488e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null