ashanhr commited on
Commit
47d8fc3
1 Parent(s): d4c2eb6

Training in progress, step 7300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ffe4a4c3e4e20b6f8d7533a207499d54e18533a2ff9988b5cf140b2c17082c0
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:930ff99206b4cefd343133517df19a99faeea6029d6177481ccaec88e3dce2a1
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50723aacfe1d09285d3d20c78e7a1e48064440bdd2388888b659a4f9def33f3c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6c24714dc5073398cfebd333596cf1b0bae2a24c794815e98acfcf729f25e62
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0b9c0f6c20269a5797deab52d27abffe73bdbbaf470e35e43b903f7ebcadebf
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ace4eb638ee35078677814378e460122eee04667323d7ec66aafc41a0bcf3d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a74dd20115cf8c84a288f3d48680cab469a7e69dc1d1632fbee207bbb0aa74
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe36de378b59c2f69bfa0fcce3a08cc6146802a272942efe4095319e84db456
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:233cc1a95fc61bdbfaf2f246e87bbffc2fb4b93155b895b23bcd4b806754f9d4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd87e1ec2272e10b79236e6de3ce1bcd6d1b8483ab6bbbf6487a56197cdb7bd1
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9888444537992003,
5
  "eval_steps": 100,
6
- "global_step": 7100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1143,6 +1143,38 @@
1143
  "eval_samples_per_second": 25.848,
1144
  "eval_steps_per_second": 3.232,
1145
  "step": 7100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1146
  }
1147
  ],
1148
  "logging_steps": 100,
@@ -1150,7 +1182,7 @@
1150
  "num_input_tokens_seen": 0,
1151
  "num_train_epochs": 30,
1152
  "save_steps": 100,
1153
- "total_flos": 7.774285790233235e+19,
1154
  "train_batch_size": 8,
1155
  "trial_name": null,
1156
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0730372553146705,
5
  "eval_steps": 100,
6
+ "global_step": 7300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1143
  "eval_samples_per_second": 25.848,
1144
  "eval_steps_per_second": 3.232,
1145
  "step": 7100
1146
+ },
1147
+ {
1148
+ "epoch": 3.03,
1149
+ "grad_norm": 237.01815795898438,
1150
+ "learning_rate": 4.527137809187279e-05,
1151
+ "loss": 3.0545,
1152
+ "step": 7200
1153
+ },
1154
+ {
1155
+ "epoch": 3.03,
1156
+ "eval_cer": 0.5361493488717688,
1157
+ "eval_loss": 2.276334524154663,
1158
+ "eval_runtime": 373.4974,
1159
+ "eval_samples_per_second": 25.376,
1160
+ "eval_steps_per_second": 3.173,
1161
+ "step": 7200
1162
+ },
1163
+ {
1164
+ "epoch": 3.07,
1165
+ "grad_norm": 2.9628891944885254,
1166
+ "learning_rate": 4.520070671378092e-05,
1167
+ "loss": 1.9898,
1168
+ "step": 7300
1169
+ },
1170
+ {
1171
+ "epoch": 3.07,
1172
+ "eval_cer": 0.5218631457510461,
1173
+ "eval_loss": 1.94766104221344,
1174
+ "eval_runtime": 364.5953,
1175
+ "eval_samples_per_second": 25.996,
1176
+ "eval_steps_per_second": 3.25,
1177
+ "step": 7300
1178
  }
1179
  ],
1180
  "logging_steps": 100,
 
1182
  "num_input_tokens_seen": 0,
1183
  "num_train_epochs": 30,
1184
  "save_steps": 100,
1185
+ "total_flos": 8.009534999457974e+19,
1186
  "train_batch_size": 8,
1187
  "trial_name": null,
1188
  "trial_params": null