fsicoli commited on
Commit
0563836
1 Parent(s): 145bfbf

Upload 11 files

Browse files
config.json CHANGED
@@ -43,7 +43,7 @@
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
- "torch_dtype": "float16",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
 
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
+ "torch_dtype": "float32",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e0b39e4071338e74731a0effa63b1446b76262677e458e296c53e4a9e2d7e17
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dfba4cd57cc0c415a0474adc9b1b320d884ad8357c905ef92c1c708eaffd327
3
  size 4993448880
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a491fb4ca03fcdcad2b02f474a905c161fe140a898b8c657fd1d4efe846b32f
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4283dc2b4a753c2638ada4541429e3198bfc64b380a951724208de1fe0aeed9b
3
  size 1180663192
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee848402158ac34bd77da476b15335a035ef60e8d19affef08148a06eb616aa0
3
  size 12333625252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbd5593589333e3322395caa3030e8e5b263075832eb6787d5c626cc780f80d9
3
  size 12333625252
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be24b2b8420b2e62233dac0078a0eaef4fb653235f8f3ed7ac8b73067c134bdf
3
  size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c1cdbc7a9b3a0567fa5693ac92071a7d30d3f3a5c9c2b16a14a14376366643
3
  size 14054
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1be4d2c14c0b846fa9a00d40df56472a7bedaade8c7be6c4383842a93e02dce1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d95ef8e7babb7d58bfff83c7b6b56b9aba67067e6e698db72cb5ed722b44f24
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25806451612903225,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -256,6 +256,255 @@
256
  "eval_steps_per_second": 0.007,
257
  "eval_wer": 0.10759534583063995,
258
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  }
260
  ],
261
  "logging_steps": 25,
@@ -263,7 +512,7 @@
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 2,
265
  "save_steps": 1000,
266
- "total_flos": 2.717998645248e+19,
267
  "train_batch_size": 8,
268
  "trial_name": null,
269
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5161290322580645,
5
  "eval_steps": 1000,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
256
  "eval_steps_per_second": 0.007,
257
  "eval_wer": 0.10759534583063995,
258
  "step": 1000
259
+ },
260
+ {
261
+ "epoch": 0.26,
262
+ "learning_rate": 9.9375e-07,
263
+ "loss": 0.144,
264
+ "step": 1025
265
+ },
266
+ {
267
+ "epoch": 0.27,
268
+ "learning_rate": 9.875e-07,
269
+ "loss": 0.2088,
270
+ "step": 1050
271
+ },
272
+ {
273
+ "epoch": 0.28,
274
+ "learning_rate": 9.8125e-07,
275
+ "loss": 0.1592,
276
+ "step": 1075
277
+ },
278
+ {
279
+ "epoch": 0.28,
280
+ "learning_rate": 9.75e-07,
281
+ "loss": 0.1775,
282
+ "step": 1100
283
+ },
284
+ {
285
+ "epoch": 0.29,
286
+ "learning_rate": 9.6875e-07,
287
+ "loss": 0.1345,
288
+ "step": 1125
289
+ },
290
+ {
291
+ "epoch": 0.3,
292
+ "learning_rate": 9.624999999999999e-07,
293
+ "loss": 0.1857,
294
+ "step": 1150
295
+ },
296
+ {
297
+ "epoch": 0.3,
298
+ "learning_rate": 9.5625e-07,
299
+ "loss": 0.1481,
300
+ "step": 1175
301
+ },
302
+ {
303
+ "epoch": 0.31,
304
+ "learning_rate": 9.499999999999999e-07,
305
+ "loss": 0.1926,
306
+ "step": 1200
307
+ },
308
+ {
309
+ "epoch": 0.32,
310
+ "learning_rate": 9.4375e-07,
311
+ "loss": 0.1298,
312
+ "step": 1225
313
+ },
314
+ {
315
+ "epoch": 0.32,
316
+ "learning_rate": 9.374999999999999e-07,
317
+ "loss": 0.193,
318
+ "step": 1250
319
+ },
320
+ {
321
+ "epoch": 0.33,
322
+ "learning_rate": 9.3125e-07,
323
+ "loss": 0.1336,
324
+ "step": 1275
325
+ },
326
+ {
327
+ "epoch": 0.34,
328
+ "learning_rate": 9.25e-07,
329
+ "loss": 0.1697,
330
+ "step": 1300
331
+ },
332
+ {
333
+ "epoch": 0.34,
334
+ "learning_rate": 9.187499999999999e-07,
335
+ "loss": 0.1332,
336
+ "step": 1325
337
+ },
338
+ {
339
+ "epoch": 0.35,
340
+ "learning_rate": 9.124999999999999e-07,
341
+ "loss": 0.1668,
342
+ "step": 1350
343
+ },
344
+ {
345
+ "epoch": 0.35,
346
+ "learning_rate": 9.0625e-07,
347
+ "loss": 0.1425,
348
+ "step": 1375
349
+ },
350
+ {
351
+ "epoch": 0.36,
352
+ "learning_rate": 9e-07,
353
+ "loss": 0.1815,
354
+ "step": 1400
355
+ },
356
+ {
357
+ "epoch": 0.37,
358
+ "learning_rate": 8.9375e-07,
359
+ "loss": 0.1579,
360
+ "step": 1425
361
+ },
362
+ {
363
+ "epoch": 0.37,
364
+ "learning_rate": 8.874999999999999e-07,
365
+ "loss": 0.1855,
366
+ "step": 1450
367
+ },
368
+ {
369
+ "epoch": 0.38,
370
+ "learning_rate": 8.812499999999999e-07,
371
+ "loss": 0.1359,
372
+ "step": 1475
373
+ },
374
+ {
375
+ "epoch": 0.39,
376
+ "learning_rate": 8.75e-07,
377
+ "loss": 0.1923,
378
+ "step": 1500
379
+ },
380
+ {
381
+ "epoch": 0.39,
382
+ "learning_rate": 8.687499999999999e-07,
383
+ "loss": 0.126,
384
+ "step": 1525
385
+ },
386
+ {
387
+ "epoch": 0.4,
388
+ "learning_rate": 8.625e-07,
389
+ "loss": 0.1824,
390
+ "step": 1550
391
+ },
392
+ {
393
+ "epoch": 0.41,
394
+ "learning_rate": 8.5625e-07,
395
+ "loss": 0.1286,
396
+ "step": 1575
397
+ },
398
+ {
399
+ "epoch": 0.41,
400
+ "learning_rate": 8.499999999999999e-07,
401
+ "loss": 0.1776,
402
+ "step": 1600
403
+ },
404
+ {
405
+ "epoch": 0.42,
406
+ "learning_rate": 8.4375e-07,
407
+ "loss": 0.1408,
408
+ "step": 1625
409
+ },
410
+ {
411
+ "epoch": 0.43,
412
+ "learning_rate": 8.375e-07,
413
+ "loss": 0.1684,
414
+ "step": 1650
415
+ },
416
+ {
417
+ "epoch": 0.43,
418
+ "learning_rate": 8.3125e-07,
419
+ "loss": 0.1511,
420
+ "step": 1675
421
+ },
422
+ {
423
+ "epoch": 0.44,
424
+ "learning_rate": 8.249999999999999e-07,
425
+ "loss": 0.1579,
426
+ "step": 1700
427
+ },
428
+ {
429
+ "epoch": 0.45,
430
+ "learning_rate": 8.187499999999999e-07,
431
+ "loss": 0.1303,
432
+ "step": 1725
433
+ },
434
+ {
435
+ "epoch": 0.45,
436
+ "learning_rate": 8.125e-07,
437
+ "loss": 0.1663,
438
+ "step": 1750
439
+ },
440
+ {
441
+ "epoch": 0.46,
442
+ "learning_rate": 8.0625e-07,
443
+ "loss": 0.1466,
444
+ "step": 1775
445
+ },
446
+ {
447
+ "epoch": 0.46,
448
+ "learning_rate": 8e-07,
449
+ "loss": 0.1738,
450
+ "step": 1800
451
+ },
452
+ {
453
+ "epoch": 0.47,
454
+ "learning_rate": 7.937499999999999e-07,
455
+ "loss": 0.1324,
456
+ "step": 1825
457
+ },
458
+ {
459
+ "epoch": 0.48,
460
+ "learning_rate": 7.875e-07,
461
+ "loss": 0.1775,
462
+ "step": 1850
463
+ },
464
+ {
465
+ "epoch": 0.48,
466
+ "learning_rate": 7.812499999999999e-07,
467
+ "loss": 0.0969,
468
+ "step": 1875
469
+ },
470
+ {
471
+ "epoch": 0.49,
472
+ "learning_rate": 7.75e-07,
473
+ "loss": 0.172,
474
+ "step": 1900
475
+ },
476
+ {
477
+ "epoch": 0.5,
478
+ "learning_rate": 7.6875e-07,
479
+ "loss": 0.1633,
480
+ "step": 1925
481
+ },
482
+ {
483
+ "epoch": 0.5,
484
+ "learning_rate": 7.624999999999999e-07,
485
+ "loss": 0.1695,
486
+ "step": 1950
487
+ },
488
+ {
489
+ "epoch": 0.51,
490
+ "learning_rate": 7.5625e-07,
491
+ "loss": 0.1374,
492
+ "step": 1975
493
+ },
494
+ {
495
+ "epoch": 0.52,
496
+ "learning_rate": 7.5e-07,
497
+ "loss": 0.1703,
498
+ "step": 2000
499
+ },
500
+ {
501
+ "epoch": 0.52,
502
+ "eval_loss": 0.13918790221214294,
503
+ "eval_runtime": 176892.8221,
504
+ "eval_samples_per_second": 0.053,
505
+ "eval_steps_per_second": 0.007,
506
+ "eval_wer": 0.10190691661279896,
507
+ "step": 2000
508
  }
509
  ],
510
  "logging_steps": 25,
 
512
  "num_input_tokens_seen": 0,
513
  "num_train_epochs": 2,
514
  "save_steps": 1000,
515
+ "total_flos": 5.435997290496e+19,
516
  "train_batch_size": 8,
517
  "trial_name": null,
518
  "trial_params": null