Training in progress, step 75, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +179 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59c4c05e5e3cb61a90bdbe376893631ff9b8cbfd8b9b4bedc9ac7530dfbe0424
 size 100690288

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5ecc50627cc77392872a84b376379dc560a5df887496051d111c54b00b31dd6
 size 100690288

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd754297635d1977aab2e1e385298b526f37f57c0743868247bffb7fc6c869fc
 size 51344890

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f14630fb28e1b5777364785c5667f77ea38864f28ef2592bb1de20ae4cda296
 size 51344890

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36b610fb21265dc0edb1dcaa81260a2a39f7a9fce784c2fe9dd7d5294b8b5b51
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9adb9e2a16a0f0dbafc69665500a503dec73cd414d2b4ced1960e6c4098ad9df
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83c409614fe7c4294c30c517ac4a1097721f51ec5d3456b5de1ee4e4e2aed9ba
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:212837ccb433e5430b061dc107b19dc09e932e6cfb62a751187d0903b7b0d94e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.8454655408859253,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.2336448598130841,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,181 @@
       "eval_samples_per_second": 14.909,
       "eval_steps_per_second": 3.743,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +571,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.2973587459735552e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.8454655408859253,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.35046728971962615,
   "eval_steps": 50,
+  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.909,
       "eval_steps_per_second": 3.743,
       "step": 50
+    },
+    {
+      "epoch": 0.2383177570093458,
+      "grad_norm": 10.01890754699707,
+      "learning_rate": 4.007047666771274e-05,
+      "loss": 28.1479,
+      "step": 51
+    },
+    {
+      "epoch": 0.24299065420560748,
+      "grad_norm": 10.493062973022461,
+      "learning_rate": 3.728908329032567e-05,
+      "loss": 26.7191,
+      "step": 52
+    },
+    {
+      "epoch": 0.24766355140186916,
+      "grad_norm": 11.016109466552734,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 29.1286,
+      "step": 53
+    },
+    {
+      "epoch": 0.2523364485981308,
+      "grad_norm": 10.54951000213623,
+      "learning_rate": 3.1859614732467954e-05,
+      "loss": 28.8225,
+      "step": 54
+    },
+    {
+      "epoch": 0.2570093457943925,
+      "grad_norm": 10.172712326049805,
+      "learning_rate": 2.9229249349905684e-05,
+      "loss": 32.9307,
+      "step": 55
+    },
+    {
+      "epoch": 0.2616822429906542,
+      "grad_norm": 13.219228744506836,
+      "learning_rate": 2.6666633838716314e-05,
+      "loss": 28.8002,
+      "step": 56
+    },
+    {
+      "epoch": 0.26635514018691586,
+      "grad_norm": 9.766500473022461,
+      "learning_rate": 2.418012691805191e-05,
+      "loss": 27.8569,
+      "step": 57
+    },
+    {
+      "epoch": 0.27102803738317754,
+      "grad_norm": 9.871047973632812,
+      "learning_rate": 2.1777839056661554e-05,
+      "loss": 34.0156,
+      "step": 58
+    },
+    {
+      "epoch": 0.2757009345794392,
+      "grad_norm": 11.04659366607666,
+      "learning_rate": 1.946760601822809e-05,
+      "loss": 30.437,
+      "step": 59
+    },
+    {
+      "epoch": 0.2803738317757009,
+      "grad_norm": 32.65912628173828,
+      "learning_rate": 1.725696330273575e-05,
+      "loss": 28.4228,
+      "step": 60
+    },
+    {
+      "epoch": 0.2850467289719626,
+      "grad_norm": 13.58231258392334,
+      "learning_rate": 1.5153121567235335e-05,
+      "loss": 32.4821,
+      "step": 61
+    },
+    {
+      "epoch": 0.2897196261682243,
+      "grad_norm": 10.420000076293945,
+      "learning_rate": 1.3162943106179749e-05,
+      "loss": 27.4436,
+      "step": 62
+    },
+    {
+      "epoch": 0.29439252336448596,
+      "grad_norm": 10.50948715209961,
+      "learning_rate": 1.1292919468045877e-05,
+      "loss": 27.5072,
+      "step": 63
+    },
+    {
+      "epoch": 0.29906542056074764,
+      "grad_norm": 10.826915740966797,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 30.5895,
+      "step": 64
+    },
+    {
+      "epoch": 0.3037383177570093,
+      "grad_norm": 10.006555557250977,
+      "learning_rate": 7.937323358440935e-06,
+      "loss": 32.246,
+      "step": 65
+    },
+    {
+      "epoch": 0.308411214953271,
+      "grad_norm": 10.863489151000977,
+      "learning_rate": 6.462696144011149e-06,
+      "loss": 27.2672,
+      "step": 66
+    },
+    {
+      "epoch": 0.3130841121495327,
+      "grad_norm": 10.34022331237793,
+      "learning_rate": 5.13007856543209e-06,
+      "loss": 29.9912,
+      "step": 67
+    },
+    {
+      "epoch": 0.3177570093457944,
+      "grad_norm": 11.068024635314941,
+      "learning_rate": 3.9438173442575e-06,
+      "loss": 34.9261,
+      "step": 68
+    },
+    {
+      "epoch": 0.32242990654205606,
+      "grad_norm": 10.7153902053833,
+      "learning_rate": 2.9077818180237693e-06,
+      "loss": 31.9632,
+      "step": 69
+    },
+    {
+      "epoch": 0.32710280373831774,
+      "grad_norm": 11.57532787322998,
+      "learning_rate": 2.0253513192751373e-06,
+      "loss": 34.1264,
+      "step": 70
+    },
+    {
+      "epoch": 0.3317757009345794,
+      "grad_norm": 10.075494766235352,
+      "learning_rate": 1.2994041528833266e-06,
+      "loss": 31.5173,
+      "step": 71
+    },
+    {
+      "epoch": 0.3364485981308411,
+      "grad_norm": 10.921475410461426,
+      "learning_rate": 7.323082076153509e-07,
+      "loss": 29.5536,
+      "step": 72
+    },
+    {
+      "epoch": 0.3411214953271028,
+      "grad_norm": 9.99160099029541,
+      "learning_rate": 3.2591323257248893e-07,
+      "loss": 30.6071,
+      "step": 73
+    },
+    {
+      "epoch": 0.34579439252336447,
+      "grad_norm": 9.956338882446289,
+      "learning_rate": 8.15448036932176e-08,
+      "loss": 28.1912,
+      "step": 74
+    },
+    {
+      "epoch": 0.35046728971962615,
+      "grad_norm": 10.755599975585938,
+      "learning_rate": 0.0,
+      "loss": 28.4376,
+      "step": 75
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.9444284927246336e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null