ashanhr commited on
Commit
e05c6ae
1 Parent(s): 514a3b6

Training in progress, step 21700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd295564c886cab31b38fdc522144c8184d26012f8017f463f8b19068b69ce54
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c8f7133601f6241adb6bd6bf01a75d224f71ba6cb54aeb09296f655d47f3a1c
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10129ed8317e1eb717c2b209ed001fa2e165c880f232b8ab7fc6026685e36a65
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173421e8ecf6978117ae5b199e2a50f45c7c092152472c947077b42e03c89cca
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d5ca3cb0ab23b9c64be740c79b07d64816cd7d9490aeeae08ccc4a3f3b37ddc
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241e1f0ab55a70d32c9f789ad9ea31e0da83a3e1004167f836a2db341c5607c0
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a20e1a39d049694d044e652201b8a810f3562871d67cc396aaad5e0bbc2f803
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1d0bd8c568f0c227459d6e419ca5b7a73769c308ffd09703cd18e64758377c
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48225e4ec8a66fcccd258aec5acd97fada08046f07bbc5a0c629141f0b9c9d7d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be01cce06a3b36303f6edddcac607adba712c87a93866bac4ad77aaf758f37b2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.008629762155335,
5
  "eval_steps": 100,
6
- "global_step": 21400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3431,6 +3431,54 @@
3431
  "eval_samples_per_second": 26.074,
3432
  "eval_steps_per_second": 3.26,
3433
  "step": 21400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3434
  }
3435
  ],
3436
  "logging_steps": 100,
@@ -3438,7 +3486,7 @@
3438
  "num_input_tokens_seen": 0,
3439
  "num_train_epochs": 30,
3440
  "save_steps": 100,
3441
- "total_flos": 2.3455923104904626e+20,
3442
  "train_batch_size": 8,
3443
  "trial_name": null,
3444
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.134918964428541,
5
  "eval_steps": 100,
6
+ "global_step": 21700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3431
  "eval_samples_per_second": 26.074,
3432
  "eval_steps_per_second": 3.26,
3433
  "step": 21400
3434
+ },
3435
+ {
3436
+ "epoch": 9.05,
3437
+ "grad_norm": 2.522552251815796,
3438
+ "learning_rate": 3.5168197879858664e-05,
3439
+ "loss": 1.2512,
3440
+ "step": 21500
3441
+ },
3442
+ {
3443
+ "epoch": 9.05,
3444
+ "eval_cer": 0.44793418325446793,
3445
+ "eval_loss": 2.5103232860565186,
3446
+ "eval_runtime": 384.7009,
3447
+ "eval_samples_per_second": 24.637,
3448
+ "eval_steps_per_second": 3.08,
3449
+ "step": 21500
3450
+ },
3451
+ {
3452
+ "epoch": 9.09,
3453
+ "grad_norm": 17.708566665649414,
3454
+ "learning_rate": 3.5097526501766785e-05,
3455
+ "loss": 1.2248,
3456
+ "step": 21600
3457
+ },
3458
+ {
3459
+ "epoch": 9.09,
3460
+ "eval_cer": 0.4432878064995503,
3461
+ "eval_loss": 2.5621256828308105,
3462
+ "eval_runtime": 370.664,
3463
+ "eval_samples_per_second": 25.57,
3464
+ "eval_steps_per_second": 3.197,
3465
+ "step": 21600
3466
+ },
3467
+ {
3468
+ "epoch": 9.13,
3469
+ "grad_norm": 2.740123987197876,
3470
+ "learning_rate": 3.502685512367491e-05,
3471
+ "loss": 1.3298,
3472
+ "step": 21700
3473
+ },
3474
+ {
3475
+ "epoch": 9.13,
3476
+ "eval_cer": 0.441982617027101,
3477
+ "eval_loss": 2.49664568901062,
3478
+ "eval_runtime": 372.6231,
3479
+ "eval_samples_per_second": 25.436,
3480
+ "eval_steps_per_second": 3.18,
3481
+ "step": 21700
3482
  }
3483
  ],
3484
  "logging_steps": 100,
 
3486
  "num_input_tokens_seen": 0,
3487
  "num_train_epochs": 30,
3488
  "save_steps": 100,
3489
+ "total_flos": 2.3783863994215714e+20,
3490
  "train_batch_size": 8,
3491
  "trial_name": null,
3492
  "trial_params": null