ashanhr commited on
Commit
2a63b53
1 Parent(s): f1dab1b

Training in progress, step 47900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c737cef3e8496bd78bd7ec0023f68befe3d9a9c05598534030ccd26c85918ac6
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1adfcc7c1f34a33f4a2bfbac3eb295bc3f331ec01a66d851a4cfb6f83c1aff40
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2723715a10a82482c591b82cb51edd59abae6401d5ba96ea07090b181b9d643
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed007356b71ab84b0e8369afe1f731d7b87590e01e741341a30fabb57ea24819
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b283c3ae87191de5fca07697a3974a9d234e69848cf8838bc28bee17cf75ce3
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9f402496bd178d4de64e033a566a7386cd249090a2d60c9e41debb84daa8866
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:392684c134f5cf29a9c14bb0a154c1d7d4d520741978198c633be27f537faa31
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f110e151f1624ce1e56568b61663aad2f2940177db6d8936e6a6eed4b8851d5b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9916ddee38224af3ba1b5cf7e13befa56143fa30905487537b8489305fe5fcae
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f550947596db3e46a433785e4ed8ccd479a0748ceb41ef26d7a8a3b5971eb4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.079983161439696,
5
  "eval_steps": 100,
6
- "global_step": 47700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7639,6 +7639,38 @@
7639
  "eval_samples_per_second": 24.789,
7640
  "eval_steps_per_second": 3.099,
7641
  "step": 47700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7642
  }
7643
  ],
7644
  "logging_steps": 100,
@@ -7646,7 +7678,7 @@
7646
  "num_input_tokens_seen": 0,
7647
  "num_train_epochs": 30,
7648
  "save_steps": 100,
7649
- "total_flos": 5.227217240325222e+20,
7650
  "train_batch_size": 8,
7651
  "trial_name": null,
7652
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.164175962955166,
5
  "eval_steps": 100,
6
+ "global_step": 47900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7639
  "eval_samples_per_second": 24.789,
7640
  "eval_steps_per_second": 3.099,
7641
  "step": 47700
7642
+ },
7643
+ {
7644
+ "epoch": 20.12,
7645
+ "grad_norm": 2.015397071838379,
7646
+ "learning_rate": 1.6587985865724383e-05,
7647
+ "loss": 0.4211,
7648
+ "step": 47800
7649
+ },
7650
+ {
7651
+ "epoch": 20.12,
7652
+ "eval_cer": 0.3458312150482969,
7653
+ "eval_loss": 2.9953062534332275,
7654
+ "eval_runtime": 408.1484,
7655
+ "eval_samples_per_second": 23.222,
7656
+ "eval_steps_per_second": 2.903,
7657
+ "step": 47800
7658
+ },
7659
+ {
7660
+ "epoch": 20.16,
7661
+ "grad_norm": 7.887564182281494,
7662
+ "learning_rate": 1.651731448763251e-05,
7663
+ "loss": 0.415,
7664
+ "step": 47900
7665
+ },
7666
+ {
7667
+ "epoch": 20.16,
7668
+ "eval_cer": 0.3473783778499081,
7669
+ "eval_loss": 3.28759765625,
7670
+ "eval_runtime": 390.4889,
7671
+ "eval_samples_per_second": 24.272,
7672
+ "eval_steps_per_second": 3.035,
7673
+ "step": 47900
7674
  }
7675
  ],
7676
  "logging_steps": 100,
 
7678
  "num_input_tokens_seen": 0,
7679
  "num_train_epochs": 30,
7680
  "save_steps": 100,
7681
+ "total_flos": 5.249064179783837e+20,
7682
  "train_batch_size": 8,
7683
  "trial_name": null,
7684
  "trial_params": null