Training in progress, step 90, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +284 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e08ab7b791ebbbb4e871d195a6fc4ee2df134ef19992061b4b1256e9b152e0f
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:c660385d6439f2cd1581de9800d768a0a284dd447b152e5b6977566ce9e48ab7
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c806f6bc29b6d07ab79a957cb9df8dfe0e63a0aaa9324c0acd806074dbcff10
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e241ec47be04250fdac190488e678f4df59217a13e8339ad6068a6100f3b16d
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e03bbb1dc0b204afe696c49141d1f7715a5640b470267f05b3268a13da657cb9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:736357e081c134dab8a78c3d9516592c805f8c9e3877b443e8a1c64b81e67753
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e218532adb96c5c297ad4a44b4d0ab3e8c0f451fb19877122c6a6558e70ebb59
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1969755bc21fc7060aa3a0aac2955c1e49faa350215ac29549a54ef247148d87
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.6604854464530945,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.5571030640668524,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,286 @@
       "eval_samples_per_second": 8.725,
       "eval_steps_per_second": 2.181,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +676,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.54256789372928e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.6604854464530945,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 1.0027855153203342,
   "eval_steps": 50,
+  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.725,
       "eval_steps_per_second": 2.181,
       "step": 50
+    },
+    {
+      "epoch": 0.5682451253481894,
+      "grad_norm": 0.8395673632621765,
+      "learning_rate": 4.8037009212046586e-05,
+      "loss": 0.5995,
+      "step": 51
+    },
+    {
+      "epoch": 0.5793871866295265,
+      "grad_norm": 1.080180287361145,
+      "learning_rate": 4.607704521360776e-05,
+      "loss": 0.687,
+      "step": 52
+    },
+    {
+      "epoch": 0.5905292479108635,
+      "grad_norm": 1.0408283472061157,
+      "learning_rate": 4.412313012710813e-05,
+      "loss": 0.6857,
+      "step": 53
+    },
+    {
+      "epoch": 0.6016713091922006,
+      "grad_norm": 1.0837056636810303,
+      "learning_rate": 4.2178276747988446e-05,
+      "loss": 0.7375,
+      "step": 54
+    },
+    {
+      "epoch": 0.6128133704735376,
+      "grad_norm": 1.1068450212478638,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.6629,
+      "step": 55
+    },
+    {
+      "epoch": 0.6239554317548747,
+      "grad_norm": 0.9491481184959412,
+      "learning_rate": 3.832773180720475e-05,
+      "loss": 0.6415,
+      "step": 56
+    },
+    {
+      "epoch": 0.6350974930362117,
+      "grad_norm": 0.9771397709846497,
+      "learning_rate": 3.642797750674629e-05,
+      "loss": 0.6092,
+      "step": 57
+    },
+    {
+      "epoch": 0.6462395543175488,
+      "grad_norm": 0.9483528733253479,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 0.6702,
+      "step": 58
+    },
+    {
+      "epoch": 0.6573816155988857,
+      "grad_norm": 1.1656486988067627,
+      "learning_rate": 3.2694147146125345e-05,
+      "loss": 0.7169,
+      "step": 59
+    },
+    {
+      "epoch": 0.6685236768802229,
+      "grad_norm": 1.362808346748352,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.7768,
+      "step": 60
+    },
+    {
+      "epoch": 0.6796657381615598,
+      "grad_norm": 1.020258903503418,
+      "learning_rate": 2.9067013123128613e-05,
+      "loss": 0.6284,
+      "step": 61
+    },
+    {
+      "epoch": 0.6908077994428969,
+      "grad_norm": 1.527144432067871,
+      "learning_rate": 2.7300475013022663e-05,
+      "loss": 0.5853,
+      "step": 62
+    },
+    {
+      "epoch": 0.7019498607242339,
+      "grad_norm": 1.3415321111679077,
+      "learning_rate": 2.556893792515227e-05,
+      "loss": 0.5872,
+      "step": 63
+    },
+    {
+      "epoch": 0.713091922005571,
+      "grad_norm": 1.19660484790802,
+      "learning_rate": 2.3875071764202563e-05,
+      "loss": 0.5428,
+      "step": 64
+    },
+    {
+      "epoch": 0.724233983286908,
+      "grad_norm": 1.7170783281326294,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.604,
+      "step": 65
+    },
+    {
+      "epoch": 0.7353760445682451,
+      "grad_norm": 1.8448792695999146,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 0.6284,
+      "step": 66
+    },
+    {
+      "epoch": 0.7465181058495822,
+      "grad_norm": 0.6753265261650085,
+      "learning_rate": 1.9045302534508297e-05,
+      "loss": 0.6319,
+      "step": 67
+    },
+    {
+      "epoch": 0.7576601671309192,
+      "grad_norm": 0.8194894790649414,
+      "learning_rate": 1.7527597583490822e-05,
+      "loss": 0.5969,
+      "step": 68
+    },
+    {
+      "epoch": 0.7688022284122563,
+      "grad_norm": 0.8043373227119446,
+      "learning_rate": 1.605996272335291e-05,
+      "loss": 0.6543,
+      "step": 69
+    },
+    {
+      "epoch": 0.7799442896935933,
+      "grad_norm": 0.9129596948623657,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.7098,
+      "step": 70
+    },
+    {
+      "epoch": 0.7910863509749304,
+      "grad_norm": 0.8895006775856018,
+      "learning_rate": 1.3283874528215733e-05,
+      "loss": 0.5837,
+      "step": 71
+    },
+    {
+      "epoch": 0.8022284122562674,
+      "grad_norm": 0.7797386646270752,
+      "learning_rate": 1.1979701719998453e-05,
+      "loss": 0.6462,
+      "step": 72
+    },
+    {
+      "epoch": 0.8133704735376045,
+      "grad_norm": 1.0365535020828247,
+      "learning_rate": 1.0734153455962765e-05,
+      "loss": 0.7263,
+      "step": 73
+    },
+    {
+      "epoch": 0.8245125348189415,
+      "grad_norm": 0.8929746747016907,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.5768,
+      "step": 74
+    },
+    {
+      "epoch": 0.8356545961002786,
+      "grad_norm": 0.9315441250801086,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.6044,
+      "step": 75
+    },
+    {
+      "epoch": 0.8467966573816156,
+      "grad_norm": 0.9474170804023743,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 0.6207,
+      "step": 76
+    },
+    {
+      "epoch": 0.8579387186629527,
+      "grad_norm": 1.0279390811920166,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 0.6206,
+      "step": 77
+    },
+    {
+      "epoch": 0.8690807799442897,
+      "grad_norm": 1.0129948854446411,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.6151,
+      "step": 78
+    },
+    {
+      "epoch": 0.8802228412256268,
+      "grad_norm": 0.9171512126922607,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 0.6025,
+      "step": 79
+    },
+    {
+      "epoch": 0.8913649025069638,
+      "grad_norm": 1.0913035869598389,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.7344,
+      "step": 80
+    },
+    {
+      "epoch": 0.9025069637883009,
+      "grad_norm": 1.4194581508636475,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 0.7239,
+      "step": 81
+    },
+    {
+      "epoch": 0.9136490250696379,
+      "grad_norm": 1.1939295530319214,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.636,
+      "step": 82
+    },
+    {
+      "epoch": 0.924791086350975,
+      "grad_norm": 1.0078566074371338,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 0.5,
+      "step": 83
+    },
+    {
+      "epoch": 0.935933147632312,
+      "grad_norm": 1.0714085102081299,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 0.4576,
+      "step": 84
+    },
+    {
+      "epoch": 0.947075208913649,
+      "grad_norm": 1.0616954565048218,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.4891,
+      "step": 85
+    },
+    {
+      "epoch": 0.958217270194986,
+      "grad_norm": 1.1367872953414917,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.589,
+      "step": 86
+    },
+    {
+      "epoch": 0.9693593314763231,
+      "grad_norm": 1.2933335304260254,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 0.5224,
+      "step": 87
+    },
+    {
+      "epoch": 0.9805013927576601,
+      "grad_norm": 1.6042691469192505,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 0.5322,
+      "step": 88
+    },
+    {
+      "epoch": 0.9916434540389972,
+      "grad_norm": 0.7805180549621582,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 0.6384,
+      "step": 89
+    },
+    {
+      "epoch": 1.0027855153203342,
+      "grad_norm": 1.1597189903259277,
+      "learning_rate": 0.0,
+      "loss": 0.7541,
+      "step": 90
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.3576622208712704e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null