ashanhr commited on
Commit
d7d4c78
1 Parent(s): b03f04e

Training in progress, step 57300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e346f6031cce705b7a51a894f6f6378890db177912b7be7998e56d05733b2858
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b70489ef8d247cef87879363895d4f10172c2f01a2dce1018521c3e1196062
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c1028245122ba4f0c3074dff84fb7696439ffd310961493c9af148d1751f0d0
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f074e5548cab7f7d8619266bf2b437b7ac6af4dd2c84d102157ca2b21d476a6
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1480bf7de8a85342639e631f5104ad44bcd60ccdd626b0e1def4e6b1302d49f4
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c9164876ea35a671565a42caafd2e90750339ac21a3f33050ee6f24b574012d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2354b0cf33dcdb8cc3fac9417682e1384f8242d5c4698533d6cd32a821a72d6
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3072c8a89084a4b231262fb6e7855152c0d0f0e572ea1bc0f437092386b0b94a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4c15ce344ebb0358d0e2efa0d9ff824c7520b3efdb90c37d82fe6283bcb7100
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26d96a9177e995c4a7993aea1285f8bcbf19f7b0074431df56e02f6ba6af8002
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 24.03704483266681,
5
  "eval_steps": 100,
6
- "global_step": 57100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9143,6 +9143,38 @@
9143
  "eval_samples_per_second": 23.982,
9144
  "eval_steps_per_second": 2.998,
9145
  "step": 57100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9146
  }
9147
  ],
9148
  "logging_steps": 100,
@@ -9150,7 +9182,7 @@
9150
  "num_input_tokens_seen": 0,
9151
  "num_train_epochs": 30,
9152
  "save_steps": 100,
9153
- "total_flos": 6.257522860637754e+20,
9154
  "train_batch_size": 8,
9155
  "trial_name": null,
9156
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 24.12123763418228,
5
  "eval_steps": 100,
6
+ "global_step": 57300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9143
  "eval_samples_per_second": 23.982,
9144
  "eval_steps_per_second": 2.998,
9145
  "step": 57100
9146
+ },
9147
+ {
9148
+ "epoch": 24.08,
9149
+ "grad_norm": 5.454607009887695,
9150
+ "learning_rate": 9.946289752650177e-06,
9151
+ "loss": 0.2339,
9152
+ "step": 57200
9153
+ },
9154
+ {
9155
+ "epoch": 24.08,
9156
+ "eval_cer": 0.3325251261194322,
9157
+ "eval_loss": 2.6627321243286133,
9158
+ "eval_runtime": 422.5781,
9159
+ "eval_samples_per_second": 22.429,
9160
+ "eval_steps_per_second": 2.804,
9161
+ "step": 57200
9162
+ },
9163
+ {
9164
+ "epoch": 24.12,
9165
+ "grad_norm": 4.305576801300049,
9166
+ "learning_rate": 9.875618374558305e-06,
9167
+ "loss": 0.2229,
9168
+ "step": 57300
9169
+ },
9170
+ {
9171
+ "epoch": 24.12,
9172
+ "eval_cer": 0.33326815533221227,
9173
+ "eval_loss": 2.737283945083618,
9174
+ "eval_runtime": 400.1034,
9175
+ "eval_samples_per_second": 23.689,
9176
+ "eval_steps_per_second": 2.962,
9177
+ "step": 57300
9178
  }
9179
  ],
9180
  "logging_steps": 100,
 
9182
  "num_input_tokens_seen": 0,
9183
  "num_train_epochs": 30,
9184
  "save_steps": 100,
9185
+ "total_flos": 6.279302169302917e+20,
9186
  "train_batch_size": 8,
9187
  "trial_name": null,
9188
  "trial_params": null