leixa commited on
Commit
ebace33
·
verified ·
1 Parent(s): a98a0c2

Training in progress, step 126, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff8ab265d07c6eecf718607b5120d82a9dd48aa873031f384d6025f0af085749
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e861928e1b2ca38a4a6746866d7d8f7b7ca001e137c5b880cdc7ca389cae56a0
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6309403c2680202e09f8fd0c2a08f99dfe3acbfe19d3d0778c0665f7f005aed6
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d926fce7eee99bf36a4de34f618c75332ae83e4fbcc18a94351c99ca4b7722e1
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5b1aae0830d1010a88ca597c0f1ed03901b00f32510cacc8649169e36f65fbe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5043a9d527c42b332561475083f4469c87b7663281bbf8ae1e7c09b5adbc61a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ecf5d3685ba2ba738f94f0ff54d87b59bd9a5e0c4c32dcfd75219f10311d69a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9350fe25e75c3aec8e0f08fabcc3ea69e0fa51f62eea810c9a733906e4363f8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2719033232628398,
5
  "eval_steps": 21,
6
- "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -300,6 +300,63 @@
300
  "eval_samples_per_second": 515.557,
301
  "eval_steps_per_second": 66.286,
302
  "step": 105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  }
304
  ],
305
  "logging_steps": 3,
@@ -319,7 +376,7 @@
319
  "attributes": {}
320
  }
321
  },
322
- "total_flos": 11712819363840.0,
323
  "train_batch_size": 8,
324
  "trial_name": null,
325
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.525679758308157,
5
  "eval_steps": 21,
6
+ "global_step": 126,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
300
  "eval_samples_per_second": 515.557,
301
  "eval_steps_per_second": 66.286,
302
  "step": 105
303
+ },
304
+ {
305
+ "epoch": 1.308157099697885,
306
+ "grad_norm": 0.21600359678268433,
307
+ "learning_rate": 6.394324377647028e-05,
308
+ "loss": 10.1603,
309
+ "step": 108
310
+ },
311
+ {
312
+ "epoch": 1.3444108761329305,
313
+ "grad_norm": 0.24075965583324432,
314
+ "learning_rate": 6.203940082845144e-05,
315
+ "loss": 10.0864,
316
+ "step": 111
317
+ },
318
+ {
319
+ "epoch": 1.3806646525679758,
320
+ "grad_norm": 0.25287488102912903,
321
+ "learning_rate": 6.011683834586473e-05,
322
+ "loss": 10.6661,
323
+ "step": 114
324
+ },
325
+ {
326
+ "epoch": 1.4169184290030212,
327
+ "grad_norm": 0.2387695461511612,
328
+ "learning_rate": 5.8178545636514145e-05,
329
+ "loss": 9.6976,
330
+ "step": 117
331
+ },
332
+ {
333
+ "epoch": 1.4531722054380665,
334
+ "grad_norm": 0.21192365884780884,
335
+ "learning_rate": 5.622753646644102e-05,
336
+ "loss": 10.451,
337
+ "step": 120
338
+ },
339
+ {
340
+ "epoch": 1.4894259818731117,
341
+ "grad_norm": 0.18546977639198303,
342
+ "learning_rate": 5.426684437395196e-05,
343
+ "loss": 10.2875,
344
+ "step": 123
345
+ },
346
+ {
347
+ "epoch": 1.525679758308157,
348
+ "grad_norm": 0.2497938573360443,
349
+ "learning_rate": 5.229951795290353e-05,
350
+ "loss": 10.3627,
351
+ "step": 126
352
+ },
353
+ {
354
+ "epoch": 1.525679758308157,
355
+ "eval_loss": 10.205331802368164,
356
+ "eval_runtime": 0.2653,
357
+ "eval_samples_per_second": 527.718,
358
+ "eval_steps_per_second": 67.849,
359
+ "step": 126
360
  }
361
  ],
362
  "logging_steps": 3,
 
376
  "attributes": {}
377
  }
378
  },
379
+ "total_flos": 14055383236608.0,
380
  "train_batch_size": 8,
381
  "trial_name": null,
382
  "trial_params": null