Training in progress, step 2476, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e011c421a60c7810ee24b763468071afa26d0387457e13695871b896b3574643
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e296b9bb163171f7d3292e7f975cd8851abb7f06f131d59f3fc27ddc0971e89
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2eb75a806bcf4890f062200854fe7f6686e84cf4c12a91ba62120df4c2eb5adf
 size 75455810

 version https://git-lfs.github.com/spec/v1
+oid sha256:2409c85204b25b0c68db7e75cff61155bb985b9ed5e78015f6ae41839de13b08
 size 75455810

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8d921c4c665dbdab2f06d441e60a0ada3a95bd5abf389b9ce1b6155ead82a6b9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:477febd7eab5cb9a16378af748559d02a17dca77e420484aefc4f371d4457c5a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbe24ee8253c280cdee86e896039139243254be40cb03c9cd81a3d19b30dd4a2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c588c84c1abcb13cdaadd37cc81933f6bbcbd611f3b25ba28f6d4d519a813632
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9578364404432342,
   "eval_steps": 500,
-  "global_step": 2472,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -17311,6 +17311,34 @@
       "learning_rate": 4.4965673749054474e-08,
       "loss": 1.3548,
       "step": 2472
     }
   ],
   "logging_steps": 1.0,
@@ -17330,7 +17358,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.444950205418074e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9593863375960551,
   "eval_steps": 500,
+  "global_step": 2476,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.4965673749054474e-08,
       "loss": 1.3548,
       "step": 2472
+    },
+    {
+      "epoch": 0.9582239147314394,
+      "grad_norm": 0.17760907113552094,
+      "learning_rate": 4.4138052797422225e-08,
+      "loss": 1.3602,
+      "step": 2473
+    },
+    {
+      "epoch": 0.9586113890196446,
+      "grad_norm": 0.1899784654378891,
+      "learning_rate": 4.331808542797855e-08,
+      "loss": 1.3683,
+      "step": 2474
+    },
+    {
+      "epoch": 0.9589988633078498,
+      "grad_norm": 0.18728189170360565,
+      "learning_rate": 4.2505772907038836e-08,
+      "loss": 1.3418,
+      "step": 2475
+    },
+    {
+      "epoch": 0.9593863375960551,
+      "grad_norm": 0.18516357243061066,
+      "learning_rate": 4.170111648909736e-08,
+      "loss": 1.3903,
+      "step": 2476
     }
   ],
   "logging_steps": 1.0,
       "attributes": {}
     }
   },
+  "total_flos": 2.448841409909757e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null