Training in progress, epoch 2

Browse files

Files changed (5) hide show

model.safetensors +1 -1
tmp-checkpoint-350/model.safetensors +1 -1
tmp-checkpoint-350/optimizer.pt +1 -1
tmp-checkpoint-350/rng_state.pth +1 -1
tmp-checkpoint-350/trainer_state.json +18 -18

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a0439899010bb3a86a7f68722d69040ee0edd4cd396a46c51755f94f06e78f4
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ffe3afa822d315a1900625a7578ec708aa4cdda2c4865921057dec1afe89fe2
 size 267832560

tmp-checkpoint-350/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc6628694c950dd5fbe46f68ce2987c750081a50bc283657d3212f07f03f972f
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ffe3afa822d315a1900625a7578ec708aa4cdda2c4865921057dec1afe89fe2
 size 267832560

tmp-checkpoint-350/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5877be18420a112359a1a007e8e3b106c6f005a1f1e9c87fd1b5df566d875441
 size 535727290

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c18f5e9ccd427ec5bda64d8cbd7a0ccadfe49285f3b28f5d4b60f391d7d0e45
 size 535727290

tmp-checkpoint-350/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe0e7135622ace68d7968efe929220efcc643079f367f734515eda6abfb77956
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5225d9c40c3c71998b118bcf9e76d16d005cdec607bb7aa3c0a29d56cf212f89
 size 14244

tmp-checkpoint-350/trainer_state.json CHANGED Viewed

@@ -11,63 +11,63 @@
     {
       "epoch": 0.29,
       "learning_rate": 1.0000000000000002e-06,
-      "loss": 0.5477,
       "step": 50
     },
     {
       "epoch": 0.57,
       "learning_rate": 2.0000000000000003e-06,
-      "loss": 0.5383,
       "step": 100
     },
     {
       "epoch": 0.86,
       "learning_rate": 3e-06,
-      "loss": 0.5464,
       "step": 150
     },
     {
       "epoch": 1.0,
       "eval_accuracy": 0.7685714285714286,
       "eval_f1": 0.8691437802907915,
-      "eval_loss": 0.5194858312606812,
-      "eval_runtime": 10.6792,
-      "eval_samples_per_second": 65.548,
-      "eval_steps_per_second": 4.12,
       "step": 175
     },
     {
       "epoch": 1.14,
       "learning_rate": 4.000000000000001e-06,
-      "loss": 0.5248,
       "step": 200
     },
     {
       "epoch": 1.43,
       "learning_rate": 5e-06,
-      "loss": 0.5475,
       "step": 250
     },
     {
       "epoch": 1.71,
       "learning_rate": 6e-06,
-      "loss": 0.5114,
       "step": 300
     },
     {
       "epoch": 2.0,
       "learning_rate": 7e-06,
-      "loss": 0.5552,
       "step": 350
     },
     {
       "epoch": 2.0,
-      "eval_accuracy": 0.7685714285714286,
-      "eval_f1": 0.8691437802907915,
-      "eval_loss": 0.525272011756897,
-      "eval_runtime": 15.9825,
-      "eval_samples_per_second": 43.798,
-      "eval_steps_per_second": 2.753,
       "step": 350
     }
   ],
@@ -76,7 +76,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
-  "total_flos": 826596567613440.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

     {
       "epoch": 0.29,
       "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.5249,
       "step": 50
     },
     {
       "epoch": 0.57,
       "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.5264,
       "step": 100
     },
     {
       "epoch": 0.86,
       "learning_rate": 3e-06,
+      "loss": 0.521,
       "step": 150
     },
     {
       "epoch": 1.0,
       "eval_accuracy": 0.7685714285714286,
       "eval_f1": 0.8691437802907915,
+      "eval_loss": 0.5161310434341431,
+      "eval_runtime": 11.1188,
+      "eval_samples_per_second": 62.957,
+      "eval_steps_per_second": 3.957,
       "step": 175
     },
     {
       "epoch": 1.14,
       "learning_rate": 4.000000000000001e-06,
+      "loss": 0.5021,
       "step": 200
     },
     {
       "epoch": 1.43,
       "learning_rate": 5e-06,
+      "loss": 0.5169,
       "step": 250
     },
     {
       "epoch": 1.71,
       "learning_rate": 6e-06,
+      "loss": 0.5231,
       "step": 300
     },
     {
       "epoch": 2.0,
       "learning_rate": 7e-06,
+      "loss": 0.5115,
       "step": 350
     },
     {
       "epoch": 2.0,
+      "eval_accuracy": 0.7757142857142857,
+      "eval_f1": 0.8726682887266829,
+      "eval_loss": 0.5628861784934998,
+      "eval_runtime": 17.6371,
+      "eval_samples_per_second": 39.689,
+      "eval_steps_per_second": 2.495,
       "step": 350
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
+  "total_flos": 741817432473600.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null