gagan3012 commited on
Commit
3f24190
1 Parent(s): 754df53

Training in progress, step 4000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ae955edc6f0f3d4522bd094fce5e3bd63e96d59ab8a37a589cb4270ab56e83d
3
  size 2216876959
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d412e81a61d6d60043deb1f0cee257eb7b78051ac0f5df86eff142ee2f5736
3
  size 2216876959
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:415bb7f0400444aea370a4b4c7492037db38fd90f49e658525a2f3de1c53de78
3
  size 1112207913
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a6b1471b3ef908655ec530f423c5206aac64f29eb05381e44ccf96ab8c46ae
3
  size 1112207913
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08caaceacca85ef32519a7581abfe5f23f29414206c4263faac6888a0071f567
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05787dac35dbdd37b3c745474cd2392f29710943747278ff59522a6996f89d5e
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:451f3ecd5c95243fdd2f69c7959fe4e80f342e2f1f787355e64d386bb5eb6461
3
  size 559
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e46694c7d98c5e4fefed63b336298efa60e63e823245b087b045057fdceba48
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:170a3af3f4139b4d5bab4b0c50be47bf25e44b89146714922596a6b635709b37
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4e799bc93ae8495865ed3fc239390cae62ebcc293d859b2256ac77ecd355e0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3640661938534278,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -144,11 +144,149 @@
144
  "eval_samples_per_second": 5.087,
145
  "eval_steps_per_second": 0.318,
146
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "max_steps": 4230,
150
  "num_train_epochs": 5,
151
- "total_flos": 5.772747680264356e+18,
152
  "trial_name": null,
153
  "trial_params": null
154
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.7281323877068555,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
144
  "eval_samples_per_second": 5.087,
145
  "eval_steps_per_second": 0.318,
146
  "step": 2000
147
+ },
148
+ {
149
+ "epoch": 2.48,
150
+ "learning_rate": 2.5342789598108745e-05,
151
+ "loss": 3.9868,
152
+ "step": 2100
153
+ },
154
+ {
155
+ "epoch": 2.6,
156
+ "learning_rate": 2.4160756501182033e-05,
157
+ "loss": 3.9224,
158
+ "step": 2200
159
+ },
160
+ {
161
+ "epoch": 2.72,
162
+ "learning_rate": 2.297872340425532e-05,
163
+ "loss": 3.8934,
164
+ "step": 2300
165
+ },
166
+ {
167
+ "epoch": 2.84,
168
+ "learning_rate": 2.179669030732861e-05,
169
+ "loss": 3.8838,
170
+ "step": 2400
171
+ },
172
+ {
173
+ "epoch": 2.96,
174
+ "learning_rate": 2.061465721040189e-05,
175
+ "loss": 3.8895,
176
+ "step": 2500
177
+ },
178
+ {
179
+ "epoch": 3.07,
180
+ "learning_rate": 1.9432624113475178e-05,
181
+ "loss": 3.8267,
182
+ "step": 2600
183
+ },
184
+ {
185
+ "epoch": 3.19,
186
+ "learning_rate": 1.8250591016548466e-05,
187
+ "loss": 3.8143,
188
+ "step": 2700
189
+ },
190
+ {
191
+ "epoch": 3.31,
192
+ "learning_rate": 1.706855791962175e-05,
193
+ "loss": 3.7788,
194
+ "step": 2800
195
+ },
196
+ {
197
+ "epoch": 3.43,
198
+ "learning_rate": 1.5886524822695038e-05,
199
+ "loss": 3.7714,
200
+ "step": 2900
201
+ },
202
+ {
203
+ "epoch": 3.55,
204
+ "learning_rate": 1.470449172576832e-05,
205
+ "loss": 3.752,
206
+ "step": 3000
207
+ },
208
+ {
209
+ "epoch": 3.55,
210
+ "eval_cer": 10.867020421289597,
211
+ "eval_loss": 7.850170612335205,
212
+ "eval_runtime": 147.6097,
213
+ "eval_samples_per_second": 5.088,
214
+ "eval_steps_per_second": 0.318,
215
+ "step": 3000
216
+ },
217
+ {
218
+ "epoch": 3.66,
219
+ "learning_rate": 1.3522458628841609e-05,
220
+ "loss": 3.7711,
221
+ "step": 3100
222
+ },
223
+ {
224
+ "epoch": 3.78,
225
+ "learning_rate": 1.2340425531914895e-05,
226
+ "loss": 3.7428,
227
+ "step": 3200
228
+ },
229
+ {
230
+ "epoch": 3.9,
231
+ "learning_rate": 1.115839243498818e-05,
232
+ "loss": 3.6924,
233
+ "step": 3300
234
+ },
235
+ {
236
+ "epoch": 4.02,
237
+ "learning_rate": 9.976359338061467e-06,
238
+ "loss": 3.6697,
239
+ "step": 3400
240
+ },
241
+ {
242
+ "epoch": 4.14,
243
+ "learning_rate": 8.794326241134753e-06,
244
+ "loss": 3.6303,
245
+ "step": 3500
246
+ },
247
+ {
248
+ "epoch": 4.26,
249
+ "learning_rate": 7.612293144208038e-06,
250
+ "loss": 3.641,
251
+ "step": 3600
252
+ },
253
+ {
254
+ "epoch": 4.37,
255
+ "learning_rate": 6.430260047281324e-06,
256
+ "loss": 3.6441,
257
+ "step": 3700
258
+ },
259
+ {
260
+ "epoch": 4.49,
261
+ "learning_rate": 5.24822695035461e-06,
262
+ "loss": 3.6077,
263
+ "step": 3800
264
+ },
265
+ {
266
+ "epoch": 4.61,
267
+ "learning_rate": 4.066193853427896e-06,
268
+ "loss": 3.5844,
269
+ "step": 3900
270
+ },
271
+ {
272
+ "epoch": 4.73,
273
+ "learning_rate": 2.884160756501182e-06,
274
+ "loss": 3.5919,
275
+ "step": 4000
276
+ },
277
+ {
278
+ "epoch": 4.73,
279
+ "eval_cer": 10.43849493487699,
280
+ "eval_loss": 8.424138069152832,
281
+ "eval_runtime": 147.3939,
282
+ "eval_samples_per_second": 5.095,
283
+ "eval_steps_per_second": 0.319,
284
+ "step": 4000
285
  }
286
  ],
287
  "max_steps": 4230,
288
  "num_train_epochs": 5,
289
+ "total_flos": 1.1545495360528712e+19,
290
  "trial_name": null,
291
  "trial_params": null
292
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:415bb7f0400444aea370a4b4c7492037db38fd90f49e658525a2f3de1c53de78
3
  size 1112207913
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a6b1471b3ef908655ec530f423c5206aac64f29eb05381e44ccf96ab8c46ae
3
  size 1112207913
runs/Apr27_08-37-21_cef3a7eee143/events.out.tfevents.1651048661.cef3a7eee143.38.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c7540e57ca7a9cf0701ba602feb467a95ffe73a22b30698d9d43f6644f8a3f0
3
- size 84979
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e71b8f231ee1e34cd0a1e0474e2f3172d0e6a3f9547cfc08c2c6ba86cbbcaf75
3
+ size 88755