Training in progress, step 130, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +84 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7890d150186359623165f222ca4830c2a039fc03af0d56fe99e682d415fc31f6
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1f94ce60ad18907bfe378be4ba63c3cb07211d25772e7578153e59c360d0334
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d30705974fe66df92448604887f4ff2019bdf74272d27933dc0a7e38b3354a13
 size 46057082

 version https://git-lfs.github.com/spec/v1
+oid sha256:af22c1f6025a1e35c54cfaffb9aa264061cac162f2b52caade7115ff260d713e
 size 46057082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98bbd4b4faf598bcf20005208e29928176693d773d6281a511c45efeae497be2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad4c4d2d769c6f52183fdfe62140ef02b36aa1e936b1d8050f51672d3d58fb1e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e549a35cd7e532c378c88126565a201f68fd1d73868bbbba082980ce1de2c27
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3511d75105f53c278279e3dade6f856082c8693b0424c0bf567bdcf23028dd2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6011497974395752,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.0338863395693611,
   "eval_steps": 10,
-  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -951,6 +951,84 @@
       "eval_samples_per_second": 5.596,
       "eval_steps_per_second": 5.596,
       "step": 120
     }
   ],
   "logging_steps": 1,
@@ -965,7 +1043,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -979,7 +1057,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.174564302225408e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.5968632698059082,
+  "best_model_checkpoint": "miner_id_24/checkpoint-130",
+  "epoch": 0.03671020120014119,
   "eval_steps": 10,
+  "global_step": 130,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.596,
       "eval_steps_per_second": 5.596,
       "step": 120
+    },
+    {
+      "epoch": 0.03416872573243911,
+      "grad_norm": 1.4283939599990845,
+      "learning_rate": 0.00017572742764761055,
+      "loss": 1.4789,
+      "step": 121
+    },
+    {
+      "epoch": 0.03445111189551712,
+      "grad_norm": 1.3361456394195557,
+      "learning_rate": 0.00017530714660036112,
+      "loss": 0.784,
+      "step": 122
+    },
+    {
+      "epoch": 0.03473349805859513,
+      "grad_norm": 1.0861424207687378,
+      "learning_rate": 0.00017488376997127283,
+      "loss": 2.2809,
+      "step": 123
+    },
+    {
+      "epoch": 0.035015884221673135,
+      "grad_norm": 4.459283351898193,
+      "learning_rate": 0.0001744573151637007,
+      "loss": 1.4483,
+      "step": 124
+    },
+    {
+      "epoch": 0.03529827038475115,
+      "grad_norm": 1.324436902999878,
+      "learning_rate": 0.00017402779970753155,
+      "loss": 2.6136,
+      "step": 125
+    },
+    {
+      "epoch": 0.03558065654782916,
+      "grad_norm": 3.7964041233062744,
+      "learning_rate": 0.0001735952412584635,
+      "loss": 1.092,
+      "step": 126
+    },
+    {
+      "epoch": 0.035863042710907164,
+      "grad_norm": 2.560436725616455,
+      "learning_rate": 0.00017315965759728014,
+      "loss": 1.4307,
+      "step": 127
+    },
+    {
+      "epoch": 0.036145428873985176,
+      "grad_norm": 1.473990797996521,
+      "learning_rate": 0.00017272106662911973,
+      "loss": 1.1344,
+      "step": 128
+    },
+    {
+      "epoch": 0.03642781503706318,
+      "grad_norm": 3.3736298084259033,
+      "learning_rate": 0.00017227948638273916,
+      "loss": 1.5746,
+      "step": 129
+    },
+    {
+      "epoch": 0.03671020120014119,
+      "grad_norm": 1.5858126878738403,
+      "learning_rate": 0.00017183493500977278,
+      "loss": 1.3798,
+      "step": 130
+    },
+    {
+      "epoch": 0.03671020120014119,
+      "eval_loss": 1.5968632698059082,
+      "eval_runtime": 133.2364,
+      "eval_samples_per_second": 5.599,
+      "eval_steps_per_second": 5.599,
+      "step": 130
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.272444660744192e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null