Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:570161dd5954d73aadd88fabc3b5f7c4729f189e5f13423c8a4d3776f917447c
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:98deeb214a1731ecbba934fa91c1ac1341442c22680719cc6304cd65063841d6
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f1c21051c8e00e7067d66912653198d08ca57dd14908f82a01475f1390a9b54
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:1df828a632409a9dc7b0bbe73d9fbdcec428d540d06d768ecd7aeac2d516cfdb
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d66deda20d48ad650cb01d8ba2a7e2551efa84eed11e9d0d825a7f6a7290e0e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:522aae7e0367c41d9193df7531da85bad478577f061dcd4d40a1c895cc85aad8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3925478458404541,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.09468202619536058,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.117,
       "eval_steps_per_second": 5.558,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.3933441700318413e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3890053629875183,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1262427015938141,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.117,
       "eval_steps_per_second": 5.558,
       "step": 150
+    },
+    {
+      "epoch": 0.09531323970332965,
+      "grad_norm": 1.0027241706848145,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 0.9268,
+      "step": 151
+    },
+    {
+      "epoch": 0.09594445321129873,
+      "grad_norm": 1.1282353401184082,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 1.1528,
+      "step": 152
+    },
+    {
+      "epoch": 0.09657566671926779,
+      "grad_norm": 1.147811770439148,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 1.2677,
+      "step": 153
+    },
+    {
+      "epoch": 0.09720688022723686,
+      "grad_norm": 1.2426568269729614,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 1.4516,
+      "step": 154
+    },
+    {
+      "epoch": 0.09783809373520594,
+      "grad_norm": 1.2873423099517822,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 1.3455,
+      "step": 155
+    },
+    {
+      "epoch": 0.098469307243175,
+      "grad_norm": 1.295615553855896,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 1.4676,
+      "step": 156
+    },
+    {
+      "epoch": 0.09910052075114407,
+      "grad_norm": 1.2483463287353516,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 1.2907,
+      "step": 157
+    },
+    {
+      "epoch": 0.09973173425911315,
+      "grad_norm": 1.3809590339660645,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 1.489,
+      "step": 158
+    },
+    {
+      "epoch": 0.10036294776708221,
+      "grad_norm": 1.3790940046310425,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 1.4244,
+      "step": 159
+    },
+    {
+      "epoch": 0.10099416127505129,
+      "grad_norm": 1.3485262393951416,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 1.3816,
+      "step": 160
+    },
+    {
+      "epoch": 0.10162537478302036,
+      "grad_norm": 1.447893738746643,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 1.6498,
+      "step": 161
+    },
+    {
+      "epoch": 0.10225658829098942,
+      "grad_norm": 1.3701245784759521,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 1.5385,
+      "step": 162
+    },
+    {
+      "epoch": 0.1028878017989585,
+      "grad_norm": 1.4609767198562622,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 1.5178,
+      "step": 163
+    },
+    {
+      "epoch": 0.10351901530692757,
+      "grad_norm": 1.452957272529602,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.7092,
+      "step": 164
+    },
+    {
+      "epoch": 0.10415022881489663,
+      "grad_norm": 1.5146371126174927,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 1.4947,
+      "step": 165
+    },
+    {
+      "epoch": 0.10478144232286571,
+      "grad_norm": 1.5953258275985718,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 1.5606,
+      "step": 166
+    },
+    {
+      "epoch": 0.10541265583083478,
+      "grad_norm": 1.5583423376083374,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.63,
+      "step": 167
+    },
+    {
+      "epoch": 0.10604386933880385,
+      "grad_norm": 1.5322779417037964,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 1.6487,
+      "step": 168
+    },
+    {
+      "epoch": 0.10667508284677292,
+      "grad_norm": 1.5332443714141846,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 1.5777,
+      "step": 169
+    },
+    {
+      "epoch": 0.107306296354742,
+      "grad_norm": 1.5660288333892822,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.7087,
+      "step": 170
+    },
+    {
+      "epoch": 0.10793750986271106,
+      "grad_norm": 1.5223867893218994,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 1.5593,
+      "step": 171
+    },
+    {
+      "epoch": 0.10856872337068013,
+      "grad_norm": 1.7410922050476074,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 1.512,
+      "step": 172
+    },
+    {
+      "epoch": 0.1091999368786492,
+      "grad_norm": 1.534555196762085,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.4891,
+      "step": 173
+    },
+    {
+      "epoch": 0.10983115038661827,
+      "grad_norm": 1.5364614725112915,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 1.7064,
+      "step": 174
+    },
+    {
+      "epoch": 0.11046236389458734,
+      "grad_norm": 1.4982576370239258,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 1.5146,
+      "step": 175
+    },
+    {
+      "epoch": 0.11109357740255642,
+      "grad_norm": 1.5748322010040283,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.6166,
+      "step": 176
+    },
+    {
+      "epoch": 0.11172479091052548,
+      "grad_norm": 1.5555611848831177,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 1.545,
+      "step": 177
+    },
+    {
+      "epoch": 0.11235600441849455,
+      "grad_norm": 1.5673760175704956,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 1.599,
+      "step": 178
+    },
+    {
+      "epoch": 0.11298721792646363,
+      "grad_norm": 1.5919015407562256,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.7003,
+      "step": 179
+    },
+    {
+      "epoch": 0.11361843143443269,
+      "grad_norm": 1.6712528467178345,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 1.6724,
+      "step": 180
+    },
+    {
+      "epoch": 0.11424964494240177,
+      "grad_norm": 1.5928508043289185,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 1.5945,
+      "step": 181
+    },
+    {
+      "epoch": 0.11488085845037084,
+      "grad_norm": 1.6457548141479492,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.624,
+      "step": 182
+    },
+    {
+      "epoch": 0.11551207195833992,
+      "grad_norm": 1.6404932737350464,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 1.5381,
+      "step": 183
+    },
+    {
+      "epoch": 0.11614328546630898,
+      "grad_norm": 1.762967586517334,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 1.6746,
+      "step": 184
+    },
+    {
+      "epoch": 0.11677449897427805,
+      "grad_norm": 1.674850344657898,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.8446,
+      "step": 185
+    },
+    {
+      "epoch": 0.11740571248224713,
+      "grad_norm": 1.6967201232910156,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 1.5615,
+      "step": 186
+    },
+    {
+      "epoch": 0.11803692599021619,
+      "grad_norm": 1.7771048545837402,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 1.6186,
+      "step": 187
+    },
+    {
+      "epoch": 0.11866813949818526,
+      "grad_norm": 1.873599886894226,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.8486,
+      "step": 188
+    },
+    {
+      "epoch": 0.11929935300615434,
+      "grad_norm": 1.7888107299804688,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 1.6824,
+      "step": 189
+    },
+    {
+      "epoch": 0.1199305665141234,
+      "grad_norm": 1.8312265872955322,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 1.6272,
+      "step": 190
+    },
+    {
+      "epoch": 0.12056178002209247,
+      "grad_norm": 1.7326016426086426,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 1.65,
+      "step": 191
+    },
+    {
+      "epoch": 0.12119299353006155,
+      "grad_norm": 1.9180359840393066,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 1.8604,
+      "step": 192
+    },
+    {
+      "epoch": 0.12182420703803061,
+      "grad_norm": 1.9231981039047241,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 1.7107,
+      "step": 193
+    },
+    {
+      "epoch": 0.12245542054599969,
+      "grad_norm": 1.9787672758102417,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.6555,
+      "step": 194
+    },
+    {
+      "epoch": 0.12308663405396876,
+      "grad_norm": 1.8951798677444458,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 1.5909,
+      "step": 195
+    },
+    {
+      "epoch": 0.12371784756193782,
+      "grad_norm": 1.8766393661499023,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 1.6283,
+      "step": 196
+    },
+    {
+      "epoch": 0.1243490610699069,
+      "grad_norm": 2.040215253829956,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 1.5961,
+      "step": 197
+    },
+    {
+      "epoch": 0.12498027457787597,
+      "grad_norm": 2.012611150741577,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 1.423,
+      "step": 198
+    },
+    {
+      "epoch": 0.12561148808584505,
+      "grad_norm": 2.092088460922241,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 1.343,
+      "step": 199
+    },
+    {
+      "epoch": 0.1262427015938141,
+      "grad_norm": 2.52824068069458,
+      "learning_rate": 0.0,
+      "loss": 1.5699,
+      "step": 200
+    },
+    {
+      "epoch": 0.1262427015938141,
+      "eval_loss": 0.3890053629875183,
+      "eval_runtime": 240.4119,
+      "eval_samples_per_second": 11.098,
+      "eval_steps_per_second": 5.549,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.194703055064924e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null