Training in progress, step 2000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b3aefb0fbd710b7cd994f4c43892e2b7be9c484498cffcf26bcbe066348a36f
 size 121537408

 version https://git-lfs.github.com/spec/v1
+oid sha256:20491829e0be4b1b4e8e0e277797b9bf1c8ad29983a72101f2fc3f64084fa5b8
 size 121537408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c798f26631e369bb9d847535b11306542209fa62620793e0cea66711e56a76b
 size 61998229

 version https://git-lfs.github.com/spec/v1
+oid sha256:310a9f3c78b1bf175abf83bdf7f5bcbe5a72b117dc5643746a9e552cbaf5b1dc
 size 61998229

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bf60e7c9d9858c9c78135386b6f7a9a021fd02a4d9f1a54b5e62667812d6cc3
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:37d835d6083fdb1e5238226cd4cbca157d080c178a650e6d8ecd9ca4cd32b543
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9190399999999999,
   "eval_steps": 200,
-  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -218,6 +218,76 @@
       "learning_rate": 1.8855743544078363e-05,
       "loss": 1.2614,
       "step": 1500
     }
   ],
   "logging_steps": 50,
@@ -237,7 +307,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.790044790658048e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.55808,
   "eval_steps": 200,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.8855743544078363e-05,
       "loss": 1.2614,
       "step": 1500
+    },
+    {
+      "epoch": 1.98304,
+      "grad_norm": 3.592540979385376,
+      "learning_rate": 1.77426536064114e-05,
+      "loss": 1.2626,
+      "step": 1550
+    },
+    {
+      "epoch": 2.04608,
+      "grad_norm": 3.5584168434143066,
+      "learning_rate": 1.6629563668744434e-05,
+      "loss": 1.2448,
+      "step": 1600
+    },
+    {
+      "epoch": 2.11008,
+      "grad_norm": 3.483705759048462,
+      "learning_rate": 1.5516473731077473e-05,
+      "loss": 1.2606,
+      "step": 1650
+    },
+    {
+      "epoch": 2.17408,
+      "grad_norm": 3.3508358001708984,
+      "learning_rate": 1.4403383793410507e-05,
+      "loss": 1.2423,
+      "step": 1700
+    },
+    {
+      "epoch": 2.23808,
+      "grad_norm": 3.4544994831085205,
+      "learning_rate": 1.3290293855743544e-05,
+      "loss": 1.2494,
+      "step": 1750
+    },
+    {
+      "epoch": 2.30208,
+      "grad_norm": 3.5390095710754395,
+      "learning_rate": 1.2177203918076581e-05,
+      "loss": 1.2459,
+      "step": 1800
+    },
+    {
+      "epoch": 2.36608,
+      "grad_norm": 3.303083658218384,
+      "learning_rate": 1.1064113980409617e-05,
+      "loss": 1.2496,
+      "step": 1850
+    },
+    {
+      "epoch": 2.4300800000000002,
+      "grad_norm": 3.295470714569092,
+      "learning_rate": 9.951024042742654e-06,
+      "loss": 1.2363,
+      "step": 1900
+    },
+    {
+      "epoch": 2.49408,
+      "grad_norm": 3.5985326766967773,
+      "learning_rate": 8.837934105075691e-06,
+      "loss": 1.2258,
+      "step": 1950
+    },
+    {
+      "epoch": 2.55808,
+      "grad_norm": 3.7451696395874023,
+      "learning_rate": 7.724844167408728e-06,
+      "loss": 1.2202,
+      "step": 2000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 2.3883581752743936e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null