dada22231 commited on
Commit
5773989
·
verified ·
1 Parent(s): bbfc486

Training in progress, step 33, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:923b73a88308bb0d1d1cb0c219b95ae0f2c9d701ac17626333ea058ada85f809
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65afd1fbfb289acdfc2dad9ab51171e3c47ae51c1f7fa56be3775c40bf49e722
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80058062562e6d82f489cfdbf83de2f34d2b98eded5d6dfbb3e3d0ef19d0d2a4
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3658547d28c3a6e869f7ff40c443d6693285979de668f3d88204f1ba3dfbcd7
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4370452686fcd9a18215a07354b2da2386fa3cd7e6ce298b5bf17d885387d3c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e65457c496b52a3dfee048d2351edf3bdd77f1d0d91a73d32b6ffece8c2629df
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2fd01b0471130b62e04ab9901bd5133d77aa02cdf83327c22671b8f2a3c5218
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5b7017b2d9c7d92c1bcbd6ba1e8a45be0084fd0db5062263165298ec8d1792
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b78aa7a3cff5624d66e1cb35cdf67a0e0823349725146cedaf218a1f0342ff32
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7651419a43670ff1865f0cf3e1cdc4d1de812555470f86ee8be7fca6e8cb35e8
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f166aba3cda0798d535aba25a21add4274fa102347f26d09727980e297e8967
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b35caf84524dc8db4b2a3d75b4084ca310f0916a4c8150eff2d9512bcc8b12
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3277e7386e7d1f090188f038a1b7e93c95ae7289f4d9bd037cfc517b4000a96
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241e4bf4fc78d16a947d399dce6bc1d0f0ed33816fbcbf4d95fdfe0f51a1eea8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6004818081855774,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 2.3255813953488373,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,62 @@
198
  "eval_samples_per_second": 14.266,
199
  "eval_steps_per_second": 3.709,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +277,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 2.994299446839214e+17,
230
  "train_batch_size": 1,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6004818081855774,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
+ "epoch": 3.0697674418604652,
5
  "eval_steps": 25,
6
+ "global_step": 33,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 14.266,
199
  "eval_steps_per_second": 3.709,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 2.4186046511627906,
204
+ "grad_norm": 0.2399173527956009,
205
+ "learning_rate": 2.0855884478824412e-05,
206
+ "loss": 0.4546,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 2.511627906976744,
211
+ "grad_norm": 0.21842080354690552,
212
+ "learning_rate": 1.806564514567258e-05,
213
+ "loss": 0.4159,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 2.604651162790698,
218
+ "grad_norm": 0.09714235365390778,
219
+ "learning_rate": 1.5654402273493805e-05,
220
+ "loss": 0.3683,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 2.697674418604651,
225
+ "grad_norm": 0.1338994950056076,
226
+ "learning_rate": 1.3646898477089626e-05,
227
+ "loss": 0.405,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 2.7906976744186047,
232
+ "grad_norm": 0.18371757864952087,
233
+ "learning_rate": 1.2063733461997805e-05,
234
+ "loss": 0.4067,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 2.883720930232558,
239
+ "grad_norm": 0.12735600769519806,
240
+ "learning_rate": 1.092115264363775e-05,
241
+ "loss": 0.4753,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 2.9767441860465116,
246
+ "grad_norm": 0.1807902604341507,
247
+ "learning_rate": 1.023088044736472e-05,
248
+ "loss": 0.4981,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 3.0697674418604652,
253
+ "grad_norm": 0.3723318576812744,
254
+ "learning_rate": 1e-05,
255
+ "loss": 0.4004,
256
+ "step": 33
257
  }
258
  ],
259
  "logging_steps": 1,
 
277
  "should_evaluate": false,
278
  "should_log": false,
279
  "should_save": true,
280
+ "should_training_stop": true
281
  },
282
  "attributes": {}
283
  }
284
  },
285
+ "total_flos": 3.953855388419359e+17,
286
  "train_batch_size": 1,
287
  "trial_name": null,
288
  "trial_params": null