Training in progress, step 180, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +53 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a6d8f0f796537366c6e78ca004befff9f9c27672a628bae1e611c8bc0f94c8c
 size 3380768360

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd22194ae981192953c10bd8c7d7c229d363fe3051c02792c9530080c309db2d
 size 3380768360

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffd75d21ac4ac0a8645a72715f91e4d5f09c05dd5a2548ed04a8b49d623fc3a5
 size 1757899449

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd7667b7685377e93cb674f532d76551a3a23b1d3a5e2dd6690c6cc456e5c3b4
 size 1757899449

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5dc9af15ae765cffc21eeb6ddbc68a2629e47a5fc5164b3c35695e55c025ec4
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:7278f9a4041731694f91598435752e0692e41d16df60add795880e049f862551
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:213d03f95061a3291403e8d5572036299f2f6f739be51135e2941aff4f3ccff7
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce07496a37a4ed6b1a548b1e73eeccecc1cf6f60eafddb58dcb81201d88a83bc
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.5644599303135889,
   "eval_steps": 30,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -265,6 +265,56 @@
       "eval_samples_per_second": 0.291,
       "eval_steps_per_second": 0.073,
       "step": 150
     }
   ],
   "logging_steps": 5,
@@ -284,7 +334,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.840854969157304e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.8780487804878048,
   "eval_steps": 30,
+  "global_step": 180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.291,
       "eval_steps_per_second": 0.073,
       "step": 150
+    },
+    {
+      "epoch": 1.6167247386759582,
+      "grad_norm": 0.07571443915367126,
+      "learning_rate": 9.949945276530781e-06,
+      "loss": 0.205,
+      "step": 155
+    },
+    {
+      "epoch": 1.6689895470383276,
+      "grad_norm": 0.08948186039924622,
+      "learning_rate": 7.5677871252624485e-06,
+      "loss": 0.2501,
+      "step": 160
+    },
+    {
+      "epoch": 1.721254355400697,
+      "grad_norm": 0.185760036110878,
+      "learning_rate": 5.488077459582425e-06,
+      "loss": 0.3175,
+      "step": 165
+    },
+    {
+      "epoch": 1.773519163763066,
+      "grad_norm": 0.055869363248348236,
+      "learning_rate": 3.7256400418220262e-06,
+      "loss": 0.1723,
+      "step": 170
+    },
+    {
+      "epoch": 1.8257839721254356,
+      "grad_norm": 0.0660533756017685,
+      "learning_rate": 2.2930371799975594e-06,
+      "loss": 0.1959,
+      "step": 175
+    },
+    {
+      "epoch": 1.8780487804878048,
+      "grad_norm": 0.07585973292589188,
+      "learning_rate": 1.2004801861442371e-06,
+      "loss": 0.2145,
+      "step": 180
+    },
+    {
+      "epoch": 1.8780487804878048,
+      "eval_loss": 0.23282098770141602,
+      "eval_runtime": 1752.5559,
+      "eval_samples_per_second": 0.291,
+      "eval_steps_per_second": 0.073,
+      "step": 180
     }
   ],
   "logging_steps": 5,
       "attributes": {}
     }
   },
+  "total_flos": 1.1783682305701724e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null