marinone94 commited on
Commit
dcbccaa
β€’
1 Parent(s): 91fb563

Training in progress, step 5000

Browse files
{checkpoint-2000 β†’ checkpoint-5000}/config.json RENAMED
File without changes
{checkpoint-2000 β†’ checkpoint-5000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cbed71746846766d0ee32fd1c3d30a051fe1cd5fa1a364116a8331a718ac106
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fed206cb0b8c9efd68f99b934024f9d58a7904498f14739bda054642344f0c5
3
  size 2490337809
{checkpoint-2000 β†’ checkpoint-5000}/preprocessor_config.json RENAMED
File without changes
{checkpoint-2000 β†’ checkpoint-5000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1b4db9fe6fc412c5bb7bf8cacc52fb1a6a7a9a2c5203f2e3246fbbc44c01f8d
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6cf3410bd0459e255908bdf44aa100963a7896b62986289a4263c9bba0a128
3
  size 1262063089
{checkpoint-2000 β†’ checkpoint-5000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df565685c0e4849507cfdc6dc93987c2f9ca38e629be718a83015272881f9e14
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf9476b89a6bcc7967770399790226316550d21ef77b90612b8400ced7382ba
3
  size 14567
{checkpoint-2000 β†’ checkpoint-5000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22e86abaf1861fe41e7a0f8a14cd1180d020d6a1ae32de680f6edcf6afc3ebd8
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990ce215d68bd722ee630c7dd2ad39b7e343d03331f980e089575d761e120768
3
  size 559
{checkpoint-2000 β†’ checkpoint-5000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8974dc324f1da04e1c2f621f09ee80bb5dcc0fac900f8f887ccf3266bcd8368d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e193efbb6f30702eab41cac4b81e33ac1af4f8da7a89847a8f71202c8ec35e
3
  size 623
{checkpoint-2000 β†’ checkpoint-5000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.83116883116883,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -144,11 +144,218 @@
144
  "eval_steps_per_second": 0.782,
145
  "eval_wer": 0.16725371193237237,
146
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "max_steps": 9600,
150
  "num_train_epochs": 100,
151
- "total_flos": 3.1116125970966065e+19,
152
  "trial_name": null,
153
  "trial_params": null
154
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 52.08311688311688,
5
+ "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
144
  "eval_steps_per_second": 0.782,
145
  "eval_wer": 0.16725371193237237,
146
  "step": 2000
147
+ },
148
+ {
149
+ "epoch": 21.87,
150
+ "learning_rate": 7.327148437499999e-05,
151
+ "loss": 1.0485,
152
+ "step": 2100
153
+ },
154
+ {
155
+ "epoch": 22.91,
156
+ "learning_rate": 7.2294921875e-05,
157
+ "loss": 1.0291,
158
+ "step": 2200
159
+ },
160
+ {
161
+ "epoch": 23.96,
162
+ "learning_rate": 7.1318359375e-05,
163
+ "loss": 1.007,
164
+ "step": 2300
165
+ },
166
+ {
167
+ "epoch": 25.0,
168
+ "learning_rate": 7.0341796875e-05,
169
+ "loss": 1.0008,
170
+ "step": 2400
171
+ },
172
+ {
173
+ "epoch": 26.04,
174
+ "learning_rate": 6.9365234375e-05,
175
+ "loss": 0.988,
176
+ "step": 2500
177
+ },
178
+ {
179
+ "epoch": 27.08,
180
+ "learning_rate": 6.8388671875e-05,
181
+ "loss": 0.9766,
182
+ "step": 2600
183
+ },
184
+ {
185
+ "epoch": 28.12,
186
+ "learning_rate": 6.7412109375e-05,
187
+ "loss": 0.9663,
188
+ "step": 2700
189
+ },
190
+ {
191
+ "epoch": 29.17,
192
+ "learning_rate": 6.6435546875e-05,
193
+ "loss": 0.9539,
194
+ "step": 2800
195
+ },
196
+ {
197
+ "epoch": 30.21,
198
+ "learning_rate": 6.545898437499999e-05,
199
+ "loss": 0.9479,
200
+ "step": 2900
201
+ },
202
+ {
203
+ "epoch": 31.25,
204
+ "learning_rate": 6.448242187499999e-05,
205
+ "loss": 0.934,
206
+ "step": 3000
207
+ },
208
+ {
209
+ "epoch": 31.25,
210
+ "eval_loss": 0.1579357087612152,
211
+ "eval_runtime": 198.7733,
212
+ "eval_samples_per_second": 25.381,
213
+ "eval_steps_per_second": 0.795,
214
+ "eval_wer": 0.1388858784003468,
215
+ "step": 3000
216
+ },
217
+ {
218
+ "epoch": 32.29,
219
+ "learning_rate": 6.3505859375e-05,
220
+ "loss": 0.9285,
221
+ "step": 3100
222
+ },
223
+ {
224
+ "epoch": 33.33,
225
+ "learning_rate": 6.252929687499999e-05,
226
+ "loss": 0.9121,
227
+ "step": 3200
228
+ },
229
+ {
230
+ "epoch": 34.37,
231
+ "learning_rate": 6.155273437499999e-05,
232
+ "loss": 0.9016,
233
+ "step": 3300
234
+ },
235
+ {
236
+ "epoch": 35.42,
237
+ "learning_rate": 6.0576171875e-05,
238
+ "loss": 0.9023,
239
+ "step": 3400
240
+ },
241
+ {
242
+ "epoch": 36.46,
243
+ "learning_rate": 5.9599609374999994e-05,
244
+ "loss": 0.9004,
245
+ "step": 3500
246
+ },
247
+ {
248
+ "epoch": 37.5,
249
+ "learning_rate": 5.862304687499999e-05,
250
+ "loss": 0.8844,
251
+ "step": 3600
252
+ },
253
+ {
254
+ "epoch": 38.54,
255
+ "learning_rate": 5.7646484375e-05,
256
+ "loss": 0.8771,
257
+ "step": 3700
258
+ },
259
+ {
260
+ "epoch": 39.58,
261
+ "learning_rate": 5.6669921875e-05,
262
+ "loss": 0.876,
263
+ "step": 3800
264
+ },
265
+ {
266
+ "epoch": 40.62,
267
+ "learning_rate": 5.569335937499999e-05,
268
+ "loss": 0.8708,
269
+ "step": 3900
270
+ },
271
+ {
272
+ "epoch": 41.66,
273
+ "learning_rate": 5.4716796874999997e-05,
274
+ "loss": 0.8691,
275
+ "step": 4000
276
+ },
277
+ {
278
+ "epoch": 41.66,
279
+ "eval_loss": 0.14571049809455872,
280
+ "eval_runtime": 195.237,
281
+ "eval_samples_per_second": 25.84,
282
+ "eval_steps_per_second": 0.809,
283
+ "eval_wer": 0.12899642353961202,
284
+ "step": 4000
285
+ },
286
+ {
287
+ "epoch": 42.71,
288
+ "learning_rate": 5.3740234374999996e-05,
289
+ "loss": 0.8624,
290
+ "step": 4100
291
+ },
292
+ {
293
+ "epoch": 43.75,
294
+ "learning_rate": 5.2763671874999995e-05,
295
+ "loss": 0.8556,
296
+ "step": 4200
297
+ },
298
+ {
299
+ "epoch": 44.79,
300
+ "learning_rate": 5.1787109375e-05,
301
+ "loss": 0.8607,
302
+ "step": 4300
303
+ },
304
+ {
305
+ "epoch": 45.83,
306
+ "learning_rate": 5.0810546875e-05,
307
+ "loss": 0.8536,
308
+ "step": 4400
309
+ },
310
+ {
311
+ "epoch": 46.87,
312
+ "learning_rate": 4.983398437499999e-05,
313
+ "loss": 0.8493,
314
+ "step": 4500
315
+ },
316
+ {
317
+ "epoch": 47.91,
318
+ "learning_rate": 4.8857421875e-05,
319
+ "loss": 0.8456,
320
+ "step": 4600
321
+ },
322
+ {
323
+ "epoch": 48.96,
324
+ "learning_rate": 4.7880859375e-05,
325
+ "loss": 0.8333,
326
+ "step": 4700
327
+ },
328
+ {
329
+ "epoch": 50.0,
330
+ "learning_rate": 4.6904296874999996e-05,
331
+ "loss": 0.8346,
332
+ "step": 4800
333
+ },
334
+ {
335
+ "epoch": 51.04,
336
+ "learning_rate": 4.5927734375e-05,
337
+ "loss": 0.8403,
338
+ "step": 4900
339
+ },
340
+ {
341
+ "epoch": 52.08,
342
+ "learning_rate": 4.4951171874999995e-05,
343
+ "loss": 0.8328,
344
+ "step": 5000
345
+ },
346
+ {
347
+ "epoch": 52.08,
348
+ "eval_loss": 0.14348936080932617,
349
+ "eval_runtime": 197.7739,
350
+ "eval_samples_per_second": 25.509,
351
+ "eval_steps_per_second": 0.799,
352
+ "eval_wer": 0.12054297171344966,
353
+ "step": 5000
354
  }
355
  ],
356
  "max_steps": 9600,
357
  "num_train_epochs": 100,
358
+ "total_flos": 7.771701490085208e+19,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
{checkpoint-2000 β†’ checkpoint-5000}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fab49963e9b9398b213516e5f111a4b3c94b65baca6eae9896d23b2cf617fc05
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6cf3410bd0459e255908bdf44aa100963a7896b62986289a4263c9bba0a128
3
  size 1262063089