Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b12d3b911120541ebe7aa102393e84fbcbac0e37c7e886c42b6b3718f63ce541
 size 389074464

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f1ca4c9ca800ab7cef62b43022799eaf238902020ff82e01ceb4c79b33b7eaa
 size 389074464

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85815faeca4b36be8ad3a82198f60066b3373e9fd0aa39046068ec1c3ca9ec2d
-size 198011252

 version https://git-lfs.github.com/spec/v1
+oid sha256:7213eadcda674412fec55daf671e252891bf56017b1a8df4f491f3d8812c0ed8
+size 198011700

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a5069d9b58274816b67711402d949cef80a188cefe18c81de22655f6b0081a2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:df5dd822aa84213b6c6ce0291c75117cea29332f2771c334aa15ad7608903c0b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51dce63f1f986f2a2c8c65cc81461308842cff62f1990d3ffd57c28c8cfce8b4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8a78bab0e108d3c186898a780257121eba92c7434d580326e33b855003a3757
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5527585744857788,
-  "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 0.047014574518100614,
   "eval_steps": 50,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -230,6 +230,49 @@
       "eval_samples_per_second": 20.198,
       "eval_steps_per_second": 5.052,
       "step": 250
     }
   ],
   "logging_steps": 10,
@@ -258,7 +301,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.7160536693080064e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5459651947021484,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.056417489421720736,
   "eval_steps": 50,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 20.198,
       "eval_steps_per_second": 5.052,
       "step": 250
+    },
+    {
+      "epoch": 0.048895157498824636,
+      "grad_norm": 0.7539621591567993,
+      "learning_rate": 0.00011708001710637128,
+      "loss": 0.6915,
+      "step": 260
+    },
+    {
+      "epoch": 0.05077574047954866,
+      "grad_norm": 0.7228267788887024,
+      "learning_rate": 0.00010969934665046512,
+      "loss": 0.5286,
+      "step": 270
+    },
+    {
+      "epoch": 0.052656323460272686,
+      "grad_norm": 0.774669885635376,
+      "learning_rate": 0.00010230065334953492,
+      "loss": 0.5363,
+      "step": 280
+    },
+    {
+      "epoch": 0.05453690644099671,
+      "grad_norm": 0.7888806462287903,
+      "learning_rate": 9.491998289362875e-05,
+      "loss": 0.5055,
+      "step": 290
+    },
+    {
+      "epoch": 0.056417489421720736,
+      "grad_norm": 1.0556362867355347,
+      "learning_rate": 8.759329316730539e-05,
+      "loss": 0.499,
+      "step": 300
+    },
+    {
+      "epoch": 0.056417489421720736,
+      "eval_loss": 0.5459651947021484,
+      "eval_runtime": 110.2865,
+      "eval_samples_per_second": 20.302,
+      "eval_steps_per_second": 5.078,
+      "step": 300
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.2535313083531264e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null