Training in progress, step 187, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +263 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9ffe62f1849b80958b99e6b4e8c2fcb4897cee71468be689d0d6373b56c8012
 size 800116456

 version https://git-lfs.github.com/spec/v1
+oid sha256:8129a125b7d224c5a4dfd0b138e2a475efc421c9c18ee8977d89da91427e7ccd
 size 800116456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d692e75e9df5a5c99c5484f09862d7a411c20f904624e25e8cfb2fe87d4467a
 size 406743412

 version https://git-lfs.github.com/spec/v1
+oid sha256:19601cf15b5a60ab982a2450c152fece1efb1b67f2b54bd8c9a408cb98e87fd7
 size 406743412

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c9012c3fe222b5695a316b250842451e195a7008f8f7f717d19dfbfb595fa56
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5ee87d49defcad0f47dc1a4a09069d6c2c6fe7169522597d866d4f1680aac6a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9fe8004c8f6d6407f8607c14c640df815a3d2f0ac0214bc2a59f44ed0abb635
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:668ec8597c3f420a63a11711389ba814235340c6b18085fdcc8191e9cdf0a8c6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.6144959330558777,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 2.4096385542168672,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,265 @@
       "eval_samples_per_second": 6.221,
       "eval_steps_per_second": 1.6,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1371,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.16321607800193e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.6144959330558777,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 3.004016064257028,
   "eval_steps": 50,
+  "global_step": 187,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.221,
       "eval_steps_per_second": 1.6,
       "step": 150
+    },
+    {
+      "epoch": 2.42570281124498,
+      "grad_norm": 0.7063714861869812,
+      "learning_rate": 9.864418103451828e-06,
+      "loss": 0.2084,
+      "step": 151
+    },
+    {
+      "epoch": 2.4417670682730925,
+      "grad_norm": 0.6370130777359009,
+      "learning_rate": 9.34151817719166e-06,
+      "loss": 0.2145,
+      "step": 152
+    },
+    {
+      "epoch": 2.4578313253012047,
+      "grad_norm": 0.692208468914032,
+      "learning_rate": 8.831426594527975e-06,
+      "loss": 0.2234,
+      "step": 153
+    },
+    {
+      "epoch": 2.4738955823293174,
+      "grad_norm": 0.6455745100975037,
+      "learning_rate": 8.334304045874247e-06,
+      "loss": 0.1762,
+      "step": 154
+    },
+    {
+      "epoch": 2.4899598393574296,
+      "grad_norm": 0.5893248319625854,
+      "learning_rate": 7.850307136104247e-06,
+      "loss": 0.3005,
+      "step": 155
+    },
+    {
+      "epoch": 2.5060240963855422,
+      "grad_norm": 0.4999232888221741,
+      "learning_rate": 7.379588335217874e-06,
+      "loss": 0.4372,
+      "step": 156
+    },
+    {
+      "epoch": 2.522088353413655,
+      "grad_norm": 0.5085187554359436,
+      "learning_rate": 6.922295930309691e-06,
+      "loss": 0.4771,
+      "step": 157
+    },
+    {
+      "epoch": 2.538152610441767,
+      "grad_norm": 0.5058614015579224,
+      "learning_rate": 6.478573978855146e-06,
+      "loss": 0.4607,
+      "step": 158
+    },
+    {
+      "epoch": 2.5542168674698793,
+      "grad_norm": 0.5453181862831116,
+      "learning_rate": 6.048562263329138e-06,
+      "loss": 0.4285,
+      "step": 159
+    },
+    {
+      "epoch": 2.570281124497992,
+      "grad_norm": 0.5697405338287354,
+      "learning_rate": 5.6323962471714286e-06,
+      "loss": 0.3803,
+      "step": 160
+    },
+    {
+      "epoch": 2.5863453815261046,
+      "grad_norm": 0.6553964018821716,
+      "learning_rate": 5.23020703211255e-06,
+      "loss": 0.3647,
+      "step": 161
+    },
+    {
+      "epoch": 2.602409638554217,
+      "grad_norm": 0.6540874242782593,
+      "learning_rate": 4.842121316873821e-06,
+      "loss": 0.3588,
+      "step": 162
+    },
+    {
+      "epoch": 2.6184738955823295,
+      "grad_norm": 0.7053024768829346,
+      "learning_rate": 4.468261357254339e-06,
+      "loss": 0.3134,
+      "step": 163
+    },
+    {
+      "epoch": 2.6345381526104417,
+      "grad_norm": 0.7029718160629272,
+      "learning_rate": 4.108744927617669e-06,
+      "loss": 0.244,
+      "step": 164
+    },
+    {
+      "epoch": 2.6506024096385543,
+      "grad_norm": 0.8717929720878601,
+      "learning_rate": 3.763685283790208e-06,
+      "loss": 0.2039,
+      "step": 165
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.684095561504364,
+      "learning_rate": 3.4331911273830784e-06,
+      "loss": 0.2209,
+      "step": 166
+    },
+    {
+      "epoch": 2.682730923694779,
+      "grad_norm": 0.7062694430351257,
+      "learning_rate": 3.117366571548608e-06,
+      "loss": 0.247,
+      "step": 167
+    },
+    {
+      "epoch": 2.6987951807228914,
+      "grad_norm": 0.63615483045578,
+      "learning_rate": 2.816311108182368e-06,
+      "loss": 0.1926,
+      "step": 168
+    },
+    {
+      "epoch": 2.714859437751004,
+      "grad_norm": 0.6758939027786255,
+      "learning_rate": 2.530119576580936e-06,
+      "loss": 0.1595,
+      "step": 169
+    },
+    {
+      "epoch": 2.7309236947791167,
+      "grad_norm": 0.589474081993103,
+      "learning_rate": 2.258882133565404e-06,
+      "loss": 0.3024,
+      "step": 170
+    },
+    {
+      "epoch": 2.746987951807229,
+      "grad_norm": 0.5665660500526428,
+      "learning_rate": 2.0026842250799038e-06,
+      "loss": 0.5262,
+      "step": 171
+    },
+    {
+      "epoch": 2.7630522088353415,
+      "grad_norm": 0.5001568794250488,
+      "learning_rate": 1.7616065592742038e-06,
+      "loss": 0.4539,
+      "step": 172
+    },
+    {
+      "epoch": 2.7791164658634537,
+      "grad_norm": 0.5900115370750427,
+      "learning_rate": 1.5357250810788314e-06,
+      "loss": 0.4777,
+      "step": 173
+    },
+    {
+      "epoch": 2.7951807228915664,
+      "grad_norm": 0.64364093542099,
+      "learning_rate": 1.3251109482806666e-06,
+      "loss": 0.4843,
+      "step": 174
+    },
+    {
+      "epoch": 2.8112449799196786,
+      "grad_norm": 0.6132477521896362,
+      "learning_rate": 1.1298305091066664e-06,
+      "loss": 0.3922,
+      "step": 175
+    },
+    {
+      "epoch": 2.8273092369477912,
+      "grad_norm": 0.6978268027305603,
+      "learning_rate": 9.499452813226284e-07,
+      "loss": 0.3957,
+      "step": 176
+    },
+    {
+      "epoch": 2.8433734939759034,
+      "grad_norm": 0.6823281645774841,
+      "learning_rate": 7.855119328537109e-07,
+      "loss": 0.2961,
+      "step": 177
+    },
+    {
+      "epoch": 2.859437751004016,
+      "grad_norm": 0.7309689521789551,
+      "learning_rate": 6.365822639327723e-07,
+      "loss": 0.2964,
+      "step": 178
+    },
+    {
+      "epoch": 2.8755020080321287,
+      "grad_norm": 0.6952879428863525,
+      "learning_rate": 5.032031907821089e-07,
+      "loss": 0.24,
+      "step": 179
+    },
+    {
+      "epoch": 2.891566265060241,
+      "grad_norm": 0.6516870260238647,
+      "learning_rate": 3.854167308337708e-07,
+      "loss": 0.1984,
+      "step": 180
+    },
+    {
+      "epoch": 2.907630522088353,
+      "grad_norm": 0.7209360003471375,
+      "learning_rate": 2.8325998949314536e-07,
+      "loss": 0.223,
+      "step": 181
+    },
+    {
+      "epoch": 2.923694779116466,
+      "grad_norm": 0.7331954836845398,
+      "learning_rate": 1.9676514844987337e-07,
+      "loss": 0.277,
+      "step": 182
+    },
+    {
+      "epoch": 2.9397590361445785,
+      "grad_norm": 0.572056233882904,
+      "learning_rate": 1.2595945553992573e-07,
+      "loss": 0.1437,
+      "step": 183
+    },
+    {
+      "epoch": 2.9558232931726907,
+      "grad_norm": 0.7486905455589294,
+      "learning_rate": 7.086521616190279e-08,
+      "loss": 0.2169,
+      "step": 184
+    },
+    {
+      "epoch": 2.9718875502008033,
+      "grad_norm": 0.7117255926132202,
+      "learning_rate": 3.149978625032191e-08,
+      "loss": 0.3794,
+      "step": 185
+    },
+    {
+      "epoch": 2.9879518072289155,
+      "grad_norm": 0.6114065051078796,
+      "learning_rate": 7.875566808107637e-09,
+      "loss": 0.2117,
+      "step": 186
+    },
+    {
+      "epoch": 3.004016064257028,
+      "grad_norm": 2.410719156265259,
+      "learning_rate": 0.0,
+      "loss": 0.4233,
+      "step": 187
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.146618326889267e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null