ashanhr commited on
Commit
679047d
1 Parent(s): 50be3ad

Training in progress, step 33500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10d5b6ee3ec7774796203e9857adedf4a34ad1a8f9d65eac92d02013104b7486
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbf03b6a39b6b310c792fd4ce9a56b4feab22bc4d8aae7c67e0b23549ec3844
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7eed855e5c83384fc6750fe371ea0613d62eeceb95e59f1e6fb79a4aa75fac5
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5322792e8472158c629b41c8ed853c14c036430212aa2395b1962b71dd6688c
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:298d5e2bc403ff74d204ac4ad46dbab183f8ce444ab4c0c42dc465d17ad6d84f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41f6c73349fb0a4922b7c514625c8f319c04d8a3281a94dc80e47d5905a270b7
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3119fb2b34229087dc7d4734b99ce7345817f422d2b8091f039bff04ac0992
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b7a405079c06c2e1179546e5ade586e1754239d626d4047688ddad0abeac41
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2db2a9d436fb980c51b3b887bf8c6c11afcad56f430dc37015dfbad372f5df29
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c05bab47528958ca030cc3c2eb992ca93a53fcef5ee7de67d8809ec8b54d00
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.018101452325826,
5
  "eval_steps": 100,
6
- "global_step": 33300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5335,6 +5335,38 @@
5335
  "eval_samples_per_second": 25.558,
5336
  "eval_steps_per_second": 3.195,
5337
  "step": 33300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5338
  }
5339
  ],
5340
  "logging_steps": 100,
@@ -5342,7 +5374,7 @@
5342
  "num_input_tokens_seen": 0,
5343
  "num_train_epochs": 30,
5344
  "save_steps": 100,
5345
- "total_flos": 3.6491196354345704e+20,
5346
  "train_batch_size": 8,
5347
  "trial_name": null,
5348
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.102294253841297,
5
  "eval_steps": 100,
6
+ "global_step": 33500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5335
  "eval_samples_per_second": 25.558,
5336
  "eval_steps_per_second": 3.195,
5337
  "step": 33300
5338
+ },
5339
+ {
5340
+ "epoch": 14.06,
5341
+ "grad_norm": 2.916482925415039,
5342
+ "learning_rate": 2.6761130742049474e-05,
5343
+ "loss": 0.9488,
5344
+ "step": 33400
5345
+ },
5346
+ {
5347
+ "epoch": 14.06,
5348
+ "eval_cer": 0.39579504125767473,
5349
+ "eval_loss": 2.687380075454712,
5350
+ "eval_runtime": 390.5164,
5351
+ "eval_samples_per_second": 24.27,
5352
+ "eval_steps_per_second": 3.034,
5353
+ "step": 33400
5354
+ },
5355
+ {
5356
+ "epoch": 14.1,
5357
+ "grad_norm": 5.404353618621826,
5358
+ "learning_rate": 2.66904593639576e-05,
5359
+ "loss": 0.9346,
5360
+ "step": 33500
5361
+ },
5362
+ {
5363
+ "epoch": 14.1,
5364
+ "eval_cer": 0.39129775918032145,
5365
+ "eval_loss": 2.3900837898254395,
5366
+ "eval_runtime": 372.0348,
5367
+ "eval_samples_per_second": 25.476,
5368
+ "eval_steps_per_second": 3.185,
5369
+ "step": 33500
5370
  }
5371
  ],
5372
  "logging_steps": 100,
 
5374
  "num_input_tokens_seen": 0,
5375
  "num_train_epochs": 30,
5376
  "save_steps": 100,
5377
+ "total_flos": 3.670837467873283e+20,
5378
  "train_batch_size": 8,
5379
  "trial_name": null,
5380
  "trial_params": null