ashanhr commited on
Commit
a0e1538
1 Parent(s): cf2f12f

Training in progress, step 36900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2e7d744e605fdeb47c357c118df95f001014156b76f00db93fae8270b36d782
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0646fcf5cf97fb5503e2fbbc9e98c3960fec3e656e2c994965054eac8467df2b
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a0e0ccb6fbb91ed7c1319992741aa3473b384fd1962f0a2bdef33fd8f0424a5
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c691890dc2b6f3211ced073de26ccb0d7c08362c3a77fadc55959c2aea6190dd
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2329d5a7fedc8f15c09d69541a87944d288c3f9c2684c5d81d7725abd374732
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65fd8a1c0a883c5d4e5251be70dcd09108e90ba7f22053441648adf6f980589f
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:792ef05d47d8fddd57baa2e5e4c79c5f1008526305f0fc51b7e4b0ff90c07793
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d17156c6a1f8e32f040e09547b8bc921e30af508cd3be63ca008523dbabbb3
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28baba878695b471c12cd9b5814de5f1bb170226354cdd018df9f14201aec249
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc136ee3addae9eeb904fb18be82d28a7b235388655ca82f3c8bea405623e110
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.449379078088823,
5
  "eval_steps": 100,
6
- "global_step": 36700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5879,6 +5879,38 @@
5879
  "eval_samples_per_second": 25.396,
5880
  "eval_steps_per_second": 3.175,
5881
  "step": 36700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5882
  }
5883
  ],
5884
  "logging_steps": 100,
@@ -5886,7 +5918,7 @@
5886
  "num_input_tokens_seen": 0,
5887
  "num_train_epochs": 30,
5888
  "save_steps": 100,
5889
- "total_flos": 4.022627751818035e+20,
5890
  "train_batch_size": 8,
5891
  "trial_name": null,
5892
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.533571879604294,
5
  "eval_steps": 100,
6
+ "global_step": 36900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5879
  "eval_samples_per_second": 25.396,
5880
  "eval_steps_per_second": 3.175,
5881
  "step": 36700
5882
+ },
5883
+ {
5884
+ "epoch": 15.49,
5885
+ "grad_norm": 1.7568877935409546,
5886
+ "learning_rate": 2.4359010600706712e-05,
5887
+ "loss": 0.8502,
5888
+ "step": 36800
5889
+ },
5890
+ {
5891
+ "epoch": 15.49,
5892
+ "eval_cer": 0.3838430253021,
5893
+ "eval_loss": 2.477693796157837,
5894
+ "eval_runtime": 393.1117,
5895
+ "eval_samples_per_second": 24.11,
5896
+ "eval_steps_per_second": 3.014,
5897
+ "step": 36800
5898
+ },
5899
+ {
5900
+ "epoch": 15.53,
5901
+ "grad_norm": 1.5188319683074951,
5902
+ "learning_rate": 2.4288339222614844e-05,
5903
+ "loss": 1.1122,
5904
+ "step": 36900
5905
+ },
5906
+ {
5907
+ "epoch": 15.53,
5908
+ "eval_cer": 0.3783705173829729,
5909
+ "eval_loss": 2.0704104900360107,
5910
+ "eval_runtime": 376.2486,
5911
+ "eval_samples_per_second": 25.191,
5912
+ "eval_steps_per_second": 3.15,
5913
+ "step": 36900
5914
  }
5915
  ],
5916
  "logging_steps": 100,
 
5918
  "num_input_tokens_seen": 0,
5919
  "num_train_epochs": 30,
5920
  "save_steps": 100,
5921
+ "total_flos": 4.044598170257006e+20,
5922
  "train_batch_size": 8,
5923
  "trial_name": null,
5924
  "trial_params": null