gagan3012 commited on
Commit
4a47d37
1 Parent(s): 050e251

Training in progress, step 4000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98deb32c79b42cbb7d29bd1d342401d4526eaad7b0335b7895f05b0f8569c981
3
  size 2217170313
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00dbab2c669d1749c1221f16d0db9d773e53d869a6fbdb8e8de4cffede927844
3
  size 2217170313
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6ab2b57357837dcb7a2ac0e9c7f4b5663e60d42f3062fdaec7b31dacc509bbd
3
  size 1110979563
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6126262be252d9a97ffe5ce05868864986d359a880a56b15b73335b718325b32
3
  size 1110979563
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad3fe493c6b75cf37fbf1683049b17cb20d40fca4ab7678c59ce13018603fd83
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c154dc7011c0d78a47e9f03952eaa952ff95903d7e1bbd999c35b6a645c480d
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c8ebafee11ba1a9f8b3828ac7ef3cc74823857c9864133f6b5adf8aed082b63
3
  size 559
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4d531696be1263ac11d0d37e17da075ea84a1f159c73b303b19cb70a3843d8
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f56d95f063d6bd3e6989f74085b8339015df8cf63e497ec203733677b486978e
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ef11ca321da1d407dac8fbef3cc8f319dca5c16f01276dcd7d3b8d5447a0bc
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3640661938534278,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -144,11 +144,149 @@
144
  "eval_samples_per_second": 21.084,
145
  "eval_steps_per_second": 1.32,
146
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "max_steps": 4230,
150
  "num_train_epochs": 5,
151
- "total_flos": 5.790864009434628e+18,
152
  "trial_name": null,
153
  "trial_params": null
154
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.7281323877068555,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
144
  "eval_samples_per_second": 21.084,
145
  "eval_steps_per_second": 1.32,
146
  "step": 2000
147
+ },
148
+ {
149
+ "epoch": 2.48,
150
+ "learning_rate": 2.5283687943262413e-05,
151
+ "loss": 1.1804,
152
+ "step": 2100
153
+ },
154
+ {
155
+ "epoch": 2.6,
156
+ "learning_rate": 2.4101654846335698e-05,
157
+ "loss": 1.0433,
158
+ "step": 2200
159
+ },
160
+ {
161
+ "epoch": 2.72,
162
+ "learning_rate": 2.2919621749408986e-05,
163
+ "loss": 1.0309,
164
+ "step": 2300
165
+ },
166
+ {
167
+ "epoch": 2.84,
168
+ "learning_rate": 2.173758865248227e-05,
169
+ "loss": 0.917,
170
+ "step": 2400
171
+ },
172
+ {
173
+ "epoch": 2.96,
174
+ "learning_rate": 2.0555555555555555e-05,
175
+ "loss": 0.8589,
176
+ "step": 2500
177
+ },
178
+ {
179
+ "epoch": 3.07,
180
+ "learning_rate": 1.9373522458628842e-05,
181
+ "loss": 0.7167,
182
+ "step": 2600
183
+ },
184
+ {
185
+ "epoch": 3.19,
186
+ "learning_rate": 1.819148936170213e-05,
187
+ "loss": 0.6517,
188
+ "step": 2700
189
+ },
190
+ {
191
+ "epoch": 3.31,
192
+ "learning_rate": 1.7009456264775415e-05,
193
+ "loss": 0.6548,
194
+ "step": 2800
195
+ },
196
+ {
197
+ "epoch": 3.43,
198
+ "learning_rate": 1.5827423167848703e-05,
199
+ "loss": 0.5966,
200
+ "step": 2900
201
+ },
202
+ {
203
+ "epoch": 3.55,
204
+ "learning_rate": 1.4645390070921985e-05,
205
+ "loss": 0.5682,
206
+ "step": 3000
207
+ },
208
+ {
209
+ "epoch": 3.55,
210
+ "eval_cer": 0.21450393954011898,
211
+ "eval_loss": 1.9335544109344482,
212
+ "eval_runtime": 33.7287,
213
+ "eval_samples_per_second": 22.266,
214
+ "eval_steps_per_second": 1.393,
215
+ "step": 3000
216
+ },
217
+ {
218
+ "epoch": 3.66,
219
+ "learning_rate": 1.3463356973995273e-05,
220
+ "loss": 0.5376,
221
+ "step": 3100
222
+ },
223
+ {
224
+ "epoch": 3.78,
225
+ "learning_rate": 1.2281323877068558e-05,
226
+ "loss": 0.4875,
227
+ "step": 3200
228
+ },
229
+ {
230
+ "epoch": 3.9,
231
+ "learning_rate": 1.1099290780141844e-05,
232
+ "loss": 0.4462,
233
+ "step": 3300
234
+ },
235
+ {
236
+ "epoch": 4.02,
237
+ "learning_rate": 9.91725768321513e-06,
238
+ "loss": 0.4393,
239
+ "step": 3400
240
+ },
241
+ {
242
+ "epoch": 4.14,
243
+ "learning_rate": 8.735224586288416e-06,
244
+ "loss": 0.365,
245
+ "step": 3500
246
+ },
247
+ {
248
+ "epoch": 4.26,
249
+ "learning_rate": 7.5531914893617024e-06,
250
+ "loss": 0.3727,
251
+ "step": 3600
252
+ },
253
+ {
254
+ "epoch": 4.37,
255
+ "learning_rate": 6.371158392434988e-06,
256
+ "loss": 0.3343,
257
+ "step": 3700
258
+ },
259
+ {
260
+ "epoch": 4.49,
261
+ "learning_rate": 5.189125295508274e-06,
262
+ "loss": 0.3357,
263
+ "step": 3800
264
+ },
265
+ {
266
+ "epoch": 4.61,
267
+ "learning_rate": 4.007092198581561e-06,
268
+ "loss": 0.3244,
269
+ "step": 3900
270
+ },
271
+ {
272
+ "epoch": 4.73,
273
+ "learning_rate": 2.8250591016548463e-06,
274
+ "loss": 0.3038,
275
+ "step": 4000
276
+ },
277
+ {
278
+ "epoch": 4.73,
279
+ "eval_cer": 0.12493970091654606,
280
+ "eval_loss": 1.5810511112213135,
281
+ "eval_runtime": 33.6265,
282
+ "eval_samples_per_second": 22.334,
283
+ "eval_steps_per_second": 1.398,
284
+ "step": 4000
285
  }
286
  ],
287
  "max_steps": 4230,
288
  "num_train_epochs": 5,
289
+ "total_flos": 1.1581728018869256e+19,
290
  "trial_name": null,
291
  "trial_params": null
292
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6ab2b57357837dcb7a2ac0e9c7f4b5663e60d42f3062fdaec7b31dacc509bbd
3
  size 1110979563
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6126262be252d9a97ffe5ce05868864986d359a880a56b15b73335b718325b32
3
  size 1110979563
runs/Apr27_18-49-25_ed0e8029d983/events.out.tfevents.1651085391.ed0e8029d983.38.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d68230e4e12363a24d1d81ab97cc918416ca363ebdfd77cfd8cc0f21d0d1d24
3
- size 84406
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1df8acbb06ed443d1ac5af36c40e9566fba066f3958eedac067c8bea2422da8
3
+ size 88182