Training in progress, step 284, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +60 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47d0a48cb21ecf7db40a461679c66c506d6cfce3cd3eb8bf6e5d0f056870dcad
 size 1521616

 version https://git-lfs.github.com/spec/v1
+oid sha256:26226a0ed5fc078123a27e5d0e3a363288fca9118e63f2f7f87d0122233668ee
 size 1521616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87ce9374edcd3515eb5bae4c9f5b33ce8a590226d39f6b9a7aa63808bc1b8f87
-size 1923578

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0909eeb4115aadf68e80281a69764465be936d01c1d760552f9a95d387f12b9
+size 1923706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0e78a99081077a5b76d0235937a3b74eb8046bd77c8cc2ff3a6d6cab0d236cc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e04b0f2df816607a4c8831ab4eb641e4ebe05967ddc2acfa4a47402d04dc09e3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7dbf4abce9ef0842fe48244ef30ddf3b876559fbed9651b267b9cce55ab7a6d9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e782346571a35cc87bfc48f2a321ddc2384cd7948f88ca7d7f2b32f169c07e6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 6.470786094665527,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 2.1164021164021163,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -171,6 +171,62 @@
       "eval_samples_per_second": 223.132,
       "eval_steps_per_second": 55.783,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -194,12 +250,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 117420732973056.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 6.470786094665527,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 3.005291005291005,
   "eval_steps": 100,
+  "global_step": 284,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 223.132,
       "eval_steps_per_second": 55.783,
       "step": 200
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 1.4395525455474854,
+      "learning_rate": 3.3886016253128326e-05,
+      "loss": 6.2536,
+      "step": 210
+    },
+    {
+      "epoch": 2.328042328042328,
+      "grad_norm": 1.4456923007965088,
+      "learning_rate": 2.5736686013646228e-05,
+      "loss": 6.6578,
+      "step": 220
+    },
+    {
+      "epoch": 2.433862433862434,
+      "grad_norm": 1.5295335054397583,
+      "learning_rate": 1.8562563466230576e-05,
+      "loss": 6.5134,
+      "step": 230
+    },
+    {
+      "epoch": 2.5396825396825395,
+      "grad_norm": 1.9139333963394165,
+      "learning_rate": 1.2457857435084408e-05,
+      "loss": 6.5716,
+      "step": 240
+    },
+    {
+      "epoch": 2.6455026455026456,
+      "grad_norm": 1.5985289812088013,
+      "learning_rate": 7.502733428044683e-06,
+      "loss": 6.5857,
+      "step": 250
+    },
+    {
+      "epoch": 2.751322751322751,
+      "grad_norm": 1.7326596975326538,
+      "learning_rate": 3.7622609227231818e-06,
+      "loss": 6.4011,
+      "step": 260
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 1.2264372110366821,
+      "learning_rate": 1.2855588900269056e-06,
+      "loss": 6.6658,
+      "step": 270
+    },
+    {
+      "epoch": 2.962962962962963,
+      "grad_norm": 1.4076132774353027,
+      "learning_rate": 1.0515077583498344e-07,
+      "loss": 6.3814,
+      "step": 280
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 166705538924544.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null