ashanhr commited on
Commit
09376f6
1 Parent(s): e802b8d

Training in progress, step 24500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1d86a67812950b8066ed1bd4785f9e5f74864e9e185c3c7d65a737e73412eb
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:562e4a9f15032cff37840fc99c24b36c2938f6824c4ca58590bc282b9eceefae
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9bb1ef4ed06294b4512beffd9b8417f18d9d6c0bf30582a61d1b2418d2d7ea6
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa372ea2585805112312160143a4a2b5f3a5057ba30e3e0eeff9dd31782e88f
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2964d4840b876fd5ea402f55cbb3d3ecf1ccd015054120c9986b8308a80fe3b6
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cf928a2adfc5e5cc511e1fa21a7d847528c68010f1f44667b1fe6fd9def055
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29deb45594f2ffe1b33e01ccb1b744191d09d4b58ab92a56ff666176b2e013c6
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff48e968f7fe23fe47e4973cf98e3ccc5305e7ef8b406ef287a82f2825196c0
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed638b60d93524052d154552e2b26184c00d0eedea3ca18e5c2db2c0ab950927
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006849dff6df46bbddfe5a9b04e68547e9c816ca6762fb11abd57f00f2fd9546
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.145232582614186,
5
  "eval_steps": 100,
6
- "global_step": 24100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3863,6 +3863,70 @@
3863
  "eval_samples_per_second": 25.751,
3864
  "eval_steps_per_second": 3.22,
3865
  "step": 24100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3866
  }
3867
  ],
3868
  "logging_steps": 100,
@@ -3870,7 +3934,7 @@
3870
  "num_input_tokens_seen": 0,
3871
  "num_train_epochs": 30,
3872
  "save_steps": 100,
3873
- "total_flos": 2.6405292408352522e+20,
3874
  "train_batch_size": 8,
3875
  "trial_name": null,
3876
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.313618185645128,
5
  "eval_steps": 100,
6
+ "global_step": 24500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3863
  "eval_samples_per_second": 25.751,
3864
  "eval_steps_per_second": 3.22,
3865
  "step": 24100
3866
+ },
3867
+ {
3868
+ "epoch": 10.19,
3869
+ "grad_norm": 2.6913535594940186,
3870
+ "learning_rate": 3.3260777385159016e-05,
3871
+ "loss": 1.2071,
3872
+ "step": 24200
3873
+ },
3874
+ {
3875
+ "epoch": 10.19,
3876
+ "eval_cer": 0.43098382933792184,
3877
+ "eval_loss": 1.9375296831130981,
3878
+ "eval_runtime": 388.5754,
3879
+ "eval_samples_per_second": 24.392,
3880
+ "eval_steps_per_second": 3.05,
3881
+ "step": 24200
3882
+ },
3883
+ {
3884
+ "epoch": 10.23,
3885
+ "grad_norm": 2.8861985206604004,
3886
+ "learning_rate": 3.319010600706714e-05,
3887
+ "loss": 1.2745,
3888
+ "step": 24300
3889
+ },
3890
+ {
3891
+ "epoch": 10.23,
3892
+ "eval_cer": 0.43681563098822884,
3893
+ "eval_loss": 1.8604010343551636,
3894
+ "eval_runtime": 365.4924,
3895
+ "eval_samples_per_second": 25.932,
3896
+ "eval_steps_per_second": 3.242,
3897
+ "step": 24300
3898
+ },
3899
+ {
3900
+ "epoch": 10.27,
3901
+ "grad_norm": 3.54598069190979,
3902
+ "learning_rate": 3.3119434628975265e-05,
3903
+ "loss": 1.3486,
3904
+ "step": 24400
3905
+ },
3906
+ {
3907
+ "epoch": 10.27,
3908
+ "eval_cer": 0.4357133080442689,
3909
+ "eval_loss": 1.3347864151000977,
3910
+ "eval_runtime": 380.5736,
3911
+ "eval_samples_per_second": 24.905,
3912
+ "eval_steps_per_second": 3.114,
3913
+ "step": 24400
3914
+ },
3915
+ {
3916
+ "epoch": 10.31,
3917
+ "grad_norm": 2.1198437213897705,
3918
+ "learning_rate": 3.304876325088339e-05,
3919
+ "loss": 1.1866,
3920
+ "step": 24500
3921
+ },
3922
+ {
3923
+ "epoch": 10.31,
3924
+ "eval_cer": 0.4299792733956435,
3925
+ "eval_loss": 1.308254361152649,
3926
+ "eval_runtime": 368.9525,
3927
+ "eval_samples_per_second": 25.689,
3928
+ "eval_steps_per_second": 3.212,
3929
+ "step": 24500
3930
  }
3931
  ],
3932
  "logging_steps": 100,
 
3934
  "num_input_tokens_seen": 0,
3935
  "num_train_epochs": 30,
3936
  "save_steps": 100,
3937
+ "total_flos": 2.6839589636223064e+20,
3938
  "train_batch_size": 8,
3939
  "trial_name": null,
3940
  "trial_params": null