Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69fc19a0abfe22abdfdcbfe4f47397d09e330ef9772baa1659eeb195a7585643
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:6987ed4d94c034a69fcce223f7a4aa796905cf0f1dfa7349c46a8b7062eb2f30
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a351eae7f107e38de0955c82ff60366add7598376305cc720e7666897c9cf8d3
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9c2203af6e979c6bcf3bae2c50e0dfd6ce92e1f33658ffe09ad5f4964cf9a48
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b34e5ef57fde7d8bd234862a3761c7712c2514b2e9efc82e6319158eea01153
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc009a7cef8eadf54bbfafd4f6dc10043ee1321fba380ceeb1936d4a203a3473
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.1478922367095947,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.7151370679380215,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 14.068,
       "eval_steps_per_second": 3.537,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.0298364776087552e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.1066830158233643,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.9535160905840286,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.068,
       "eval_steps_per_second": 3.537,
       "step": 150
+    },
+    {
+      "epoch": 0.7199046483909416,
+      "grad_norm": 0.5570347309112549,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 2.2434,
+      "step": 151
+    },
+    {
+      "epoch": 0.7246722288438617,
+      "grad_norm": 0.5925551056861877,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 1.9218,
+      "step": 152
+    },
+    {
+      "epoch": 0.7294398092967819,
+      "grad_norm": 0.7982468008995056,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 1.9788,
+      "step": 153
+    },
+    {
+      "epoch": 0.734207389749702,
+      "grad_norm": 0.8105083107948303,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 1.915,
+      "step": 154
+    },
+    {
+      "epoch": 0.7389749702026222,
+      "grad_norm": 0.8352587223052979,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 2.2492,
+      "step": 155
+    },
+    {
+      "epoch": 0.7437425506555423,
+      "grad_norm": 0.674999475479126,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 2.3595,
+      "step": 156
+    },
+    {
+      "epoch": 0.7485101311084624,
+      "grad_norm": 0.7149543166160583,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 2.1465,
+      "step": 157
+    },
+    {
+      "epoch": 0.7532777115613826,
+      "grad_norm": 0.723102331161499,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 2.1712,
+      "step": 158
+    },
+    {
+      "epoch": 0.7580452920143027,
+      "grad_norm": 0.6700135469436646,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 2.2641,
+      "step": 159
+    },
+    {
+      "epoch": 0.7628128724672228,
+      "grad_norm": 0.6606097221374512,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 2.0487,
+      "step": 160
+    },
+    {
+      "epoch": 0.767580452920143,
+      "grad_norm": 0.7337142825126648,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 2.1306,
+      "step": 161
+    },
+    {
+      "epoch": 0.7723480333730631,
+      "grad_norm": 0.6384457349777222,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.0698,
+      "step": 162
+    },
+    {
+      "epoch": 0.7771156138259833,
+      "grad_norm": 0.6712755560874939,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 2.0717,
+      "step": 163
+    },
+    {
+      "epoch": 0.7818831942789034,
+      "grad_norm": 0.7360457181930542,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 2.1774,
+      "step": 164
+    },
+    {
+      "epoch": 0.7866507747318237,
+      "grad_norm": 0.724876344203949,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 2.0904,
+      "step": 165
+    },
+    {
+      "epoch": 0.7914183551847438,
+      "grad_norm": 0.8232304453849792,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 2.2429,
+      "step": 166
+    },
+    {
+      "epoch": 0.7961859356376639,
+      "grad_norm": 0.8048411011695862,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 2.0947,
+      "step": 167
+    },
+    {
+      "epoch": 0.8009535160905841,
+      "grad_norm": 0.7875757217407227,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 1.9854,
+      "step": 168
+    },
+    {
+      "epoch": 0.8057210965435042,
+      "grad_norm": 0.8814699649810791,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 2.0022,
+      "step": 169
+    },
+    {
+      "epoch": 0.8104886769964244,
+      "grad_norm": 0.8261225819587708,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 2.1227,
+      "step": 170
+    },
+    {
+      "epoch": 0.8152562574493445,
+      "grad_norm": 0.8244904279708862,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 2.1583,
+      "step": 171
+    },
+    {
+      "epoch": 0.8200238379022646,
+      "grad_norm": 0.8068302869796753,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 2.0933,
+      "step": 172
+    },
+    {
+      "epoch": 0.8247914183551848,
+      "grad_norm": 0.8379300236701965,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 2.152,
+      "step": 173
+    },
+    {
+      "epoch": 0.8295589988081049,
+      "grad_norm": 0.9288865923881531,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 2.0066,
+      "step": 174
+    },
+    {
+      "epoch": 0.834326579261025,
+      "grad_norm": 0.9022939205169678,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 2.1387,
+      "step": 175
+    },
+    {
+      "epoch": 0.8390941597139452,
+      "grad_norm": 0.9126566052436829,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 2.0102,
+      "step": 176
+    },
+    {
+      "epoch": 0.8438617401668653,
+      "grad_norm": 0.9721250534057617,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 2.2065,
+      "step": 177
+    },
+    {
+      "epoch": 0.8486293206197855,
+      "grad_norm": 0.9469491243362427,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 2.09,
+      "step": 178
+    },
+    {
+      "epoch": 0.8533969010727056,
+      "grad_norm": 0.9847070574760437,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 2.0802,
+      "step": 179
+    },
+    {
+      "epoch": 0.8581644815256257,
+      "grad_norm": 1.0105953216552734,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 1.9934,
+      "step": 180
+    },
+    {
+      "epoch": 0.8629320619785459,
+      "grad_norm": 1.0612765550613403,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.7581,
+      "step": 181
+    },
+    {
+      "epoch": 0.867699642431466,
+      "grad_norm": 1.0604398250579834,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 2.1245,
+      "step": 182
+    },
+    {
+      "epoch": 0.8724672228843862,
+      "grad_norm": 1.0303086042404175,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 2.1671,
+      "step": 183
+    },
+    {
+      "epoch": 0.8772348033373063,
+      "grad_norm": 1.0367300510406494,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 1.9276,
+      "step": 184
+    },
+    {
+      "epoch": 0.8820023837902264,
+      "grad_norm": 1.1234383583068848,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.9505,
+      "step": 185
+    },
+    {
+      "epoch": 0.8867699642431466,
+      "grad_norm": 1.2010043859481812,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 2.1285,
+      "step": 186
+    },
+    {
+      "epoch": 0.8915375446960667,
+      "grad_norm": 1.1489535570144653,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 1.9923,
+      "step": 187
+    },
+    {
+      "epoch": 0.8963051251489869,
+      "grad_norm": 1.2861065864562988,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 2.1916,
+      "step": 188
+    },
+    {
+      "epoch": 0.901072705601907,
+      "grad_norm": 1.2785236835479736,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 2.185,
+      "step": 189
+    },
+    {
+      "epoch": 0.9058402860548271,
+      "grad_norm": 1.339624285697937,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 2.3496,
+      "step": 190
+    },
+    {
+      "epoch": 0.9106078665077473,
+      "grad_norm": 1.5450823307037354,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 2.4015,
+      "step": 191
+    },
+    {
+      "epoch": 0.9153754469606674,
+      "grad_norm": 1.3474223613739014,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 2.2637,
+      "step": 192
+    },
+    {
+      "epoch": 0.9201430274135876,
+      "grad_norm": 1.6195087432861328,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 2.4341,
+      "step": 193
+    },
+    {
+      "epoch": 0.9249106078665077,
+      "grad_norm": 1.5226205587387085,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 2.4441,
+      "step": 194
+    },
+    {
+      "epoch": 0.929678188319428,
+      "grad_norm": 1.7517061233520508,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 2.517,
+      "step": 195
+    },
+    {
+      "epoch": 0.9344457687723481,
+      "grad_norm": 1.7285345792770386,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 2.4959,
+      "step": 196
+    },
+    {
+      "epoch": 0.9392133492252682,
+      "grad_norm": 1.9032076597213745,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 2.4887,
+      "step": 197
+    },
+    {
+      "epoch": 0.9439809296781884,
+      "grad_norm": 2.0340332984924316,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 2.1746,
+      "step": 198
+    },
+    {
+      "epoch": 0.9487485101311085,
+      "grad_norm": 2.3427014350891113,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 2.1673,
+      "step": 199
+    },
+    {
+      "epoch": 0.9535160905840286,
+      "grad_norm": 3.5722577571868896,
+      "learning_rate": 0.0,
+      "loss": 2.2554,
+      "step": 200
+    },
+    {
+      "epoch": 0.9535160905840286,
+      "eval_loss": 2.1066830158233643,
+      "eval_runtime": 25.1636,
+      "eval_samples_per_second": 14.068,
+      "eval_steps_per_second": 3.537,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.7075670040169677e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null