Training in progress, step 30, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:492482922737f774b1c554ae6b0df707f45da2ec1fea952f8d3cdc663975eaf9
 size 101752088

 version https://git-lfs.github.com/spec/v1
+oid sha256:b768e3a4d249cd9bb3425b75641f3e3d66c2119e027046042a354d0900705a9c
 size 101752088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c7677d437b82578dbb78df01b1f16713302d84d2d5beb0bcf6fc95926b4f2e6
 size 52046596

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8bc270d065791693bfe545dd959cf593a80072a2fe6e4cb2f28f6520970ef91
 size 52046596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49762064c622a61f788bab27148eaa124166994071a98ad304aeda08b72320f4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4c6a3932b0c6757b2a554606edacf63dde2370212156fc61645da06ea61feaa
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79fd9663c22308e7cda458f2f27a3161480f323121be11ab10f5e1ea3f30fc6d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:624cde959d3a917007c76687e7ed04f5f5ce5a570abfa20dd466a4e55f6684fa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0025174649128327773,
   "eval_steps": 25,
-  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -155,6 +155,84 @@
       "learning_rate": 0.00019396926207859084,
       "loss": 0.4888,
       "step": 20
     }
   ],
   "logging_steps": 1,
@@ -174,7 +252,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.31707680325632e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.003776197369249166,
   "eval_steps": 25,
+  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019396926207859084,
       "loss": 0.4888,
       "step": 20
+    },
+    {
+      "epoch": 0.002643338158474416,
+      "grad_norm": 6.595178127288818,
+      "learning_rate": 0.00019271838545667876,
+      "loss": 0.6385,
+      "step": 21
+    },
+    {
+      "epoch": 0.002769211404116055,
+      "grad_norm": 3.2132580280303955,
+      "learning_rate": 0.0001913545457642601,
+      "loss": 0.518,
+      "step": 22
+    },
+    {
+      "epoch": 0.002895084649757694,
+      "grad_norm": 16.919002532958984,
+      "learning_rate": 0.0001898794046299167,
+      "loss": 0.3769,
+      "step": 23
+    },
+    {
+      "epoch": 0.0030209578953993327,
+      "grad_norm": 0.020209377631545067,
+      "learning_rate": 0.00018829475928589271,
+      "loss": 0.0002,
+      "step": 24
+    },
+    {
+      "epoch": 0.0031468311410409715,
+      "grad_norm": 0.8074254989624023,
+      "learning_rate": 0.00018660254037844388,
+      "loss": 0.0241,
+      "step": 25
+    },
+    {
+      "epoch": 0.0031468311410409715,
+      "eval_loss": NaN,
+      "eval_runtime": 3493.5914,
+      "eval_samples_per_second": 0.957,
+      "eval_steps_per_second": 0.479,
+      "step": 25
+    },
+    {
+      "epoch": 0.003272704386682611,
+      "grad_norm": 19.329341888427734,
+      "learning_rate": 0.0001848048096156426,
+      "loss": 0.7896,
+      "step": 26
+    },
+    {
+      "epoch": 0.0033985776323242497,
+      "grad_norm": 8.426005363464355,
+      "learning_rate": 0.00018290375725550417,
+      "loss": 0.4921,
+      "step": 27
+    },
+    {
+      "epoch": 0.0035244508779658885,
+      "grad_norm": 6.269211292266846,
+      "learning_rate": 0.00018090169943749476,
+      "loss": 0.5567,
+      "step": 28
+    },
+    {
+      "epoch": 0.0036503241236075274,
+      "grad_norm": 4.1337480545043945,
+      "learning_rate": 0.00017880107536067218,
+      "loss": 0.0482,
+      "step": 29
+    },
+    {
+      "epoch": 0.003776197369249166,
+      "grad_norm": 0.4672463834285736,
+      "learning_rate": 0.0001766044443118978,
+      "loss": 0.0424,
+      "step": 30
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.97561520488448e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null