vdos commited on
Commit
9d71fb4
1 Parent(s): 35d24fc

Training in progress, step 37, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9015c86dc25befeb0d3169bebf1301e01f29c674a0a9e83bbb833c86915100c
3
  size 37352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b643a0d885230f5a0437db3e4af4a1fe36b5d89a67c8b170ba333a64fe01e4
3
  size 37352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93fafa34a5f4b89022303d36440045581e58ce710c360feae1b59fe992181252
3
  size 83444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feafc958b07dacce078c9826ab8f00b78eb58f48b057a3a515086f080c74daf1
3
  size 83444
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c73512b05b9beb970d9c230af4efa7a8dc3b4fd36ad026ca0f590f2e8737d48
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b96f0eb56ba8557a197667600806dea2be8234032945e9d08e5e3794c926c3ee
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef0d2ea6335bd3947ae84603b4094a4b89eb75a80756a6b1fb0ef75533f6f325
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1525a0af52304123e8a1fffde73b195054e95c85af6a6d5d4de550411aeddf0e
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd13ff2740c031b96a5874af3878e8fb3d0a65e4d673f4fc602556ae4260824
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45db4df4a0e8a74454a6fb2495e35a8da476b43a053eb44427c0697806b6d4b4
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55f0ef04c1cec259f55fb71c771fbbae41c159be02436d43fb898a4a590cf404
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e7f837d9c225387f6d912efdd7a06c07d64c89e7579546b8f97fcd4f7ba942
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fec53e945c45054b00fddcb1b630559fc3ac4abaf959b53db326cce0461de3a8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07dad002b9eddae7ca1e092b675b66e89a8b668e40c1ddc7dfb47cd4a5daaef6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 12.4584321975708,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 2.0725388601036268,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,90 @@
198
  "eval_samples_per_second": 380.993,
199
  "eval_steps_per_second": 51.109,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +305,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 421527552000.0,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
  "best_metric": 12.4584321975708,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
+ "epoch": 3.0673575129533677,
5
  "eval_steps": 25,
6
+ "global_step": 37,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 380.993,
199
  "eval_steps_per_second": 51.109,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 2.155440414507772,
204
+ "grad_norm": 0.023453839123249054,
205
+ "learning_rate": 2.245515092739488e-05,
206
+ "loss": 12.3639,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 2.238341968911917,
211
+ "grad_norm": 0.02777229994535446,
212
+ "learning_rate": 1.8825509907063327e-05,
213
+ "loss": 12.6757,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 2.321243523316062,
218
+ "grad_norm": 0.02316541038453579,
219
+ "learning_rate": 1.544686755065677e-05,
220
+ "loss": 12.2681,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 2.4041450777202074,
225
+ "grad_norm": 0.02422947622835636,
226
+ "learning_rate": 1.2346426699819458e-05,
227
+ "loss": 12.4471,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 2.4870466321243523,
232
+ "grad_norm": 0.02776755392551422,
233
+ "learning_rate": 9.549150281252633e-06,
234
+ "loss": 12.5286,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 2.5699481865284977,
239
+ "grad_norm": 0.022363506257534027,
240
+ "learning_rate": 7.077560319906695e-06,
241
+ "loss": 12.452,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 2.6528497409326426,
246
+ "grad_norm": 0.024167869240045547,
247
+ "learning_rate": 4.951556604879048e-06,
248
+ "loss": 12.3364,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 2.7357512953367875,
253
+ "grad_norm": 0.029816903173923492,
254
+ "learning_rate": 3.18825646801314e-06,
255
+ "loss": 12.6708,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 2.818652849740933,
260
+ "grad_norm": 0.022897807881236076,
261
+ "learning_rate": 1.8018569652073381e-06,
262
+ "loss": 12.2847,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 2.901554404145078,
267
+ "grad_norm": 0.022587254643440247,
268
+ "learning_rate": 8.035205700685167e-07,
269
+ "loss": 12.4786,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 2.9844559585492227,
274
+ "grad_norm": 0.028138399124145508,
275
+ "learning_rate": 2.012853002380466e-07,
276
+ "loss": 12.6596,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 3.0673575129533677,
281
+ "grad_norm": 0.04486376419663429,
282
+ "learning_rate": 0.0,
283
+ "loss": 24.0609,
284
+ "step": 37
285
  }
286
  ],
287
  "logging_steps": 1,
 
305
  "should_evaluate": false,
306
  "should_log": false,
307
  "should_save": true,
308
+ "should_training_stop": true
309
  },
310
  "attributes": {}
311
  }
312
  },
313
+ "total_flos": 623860776960.0,
314
  "train_batch_size": 2,
315
  "trial_name": null,
316
  "trial_params": null