fsicoli commited on
Commit
ebb6357
1 Parent(s): a3b17b7

Upload 11 files

Browse files
config.json CHANGED
@@ -43,7 +43,7 @@
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
- "torch_dtype": "float16",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
 
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
+ "torch_dtype": "float32",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14da2025731d949006200c34abfb6aa1be4cc2350ecc27dc6af559603a86225d
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:156e0fd71ba8d6d1a308460a813997db7d653c1f820bd2d5995abd49692f14a1
3
  size 4993448880
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b38b2d250388274dbab6a134b7ff4184cb3525599666efa7cdefed5c99d82c
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c05a1c27b9a9a440db979127057f5f66ec41e211bf68bb2ea3a227c4403c8e60
3
  size 1180663192
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8b7a6a42c8f342e4252296a6744e5aeb820e8d8de08a16c9dd10adc396c1293
3
  size 12333660476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da10ec2e0bf47d4b44db41bb1bfc93e007a4873e0e2ff5dbc5e667cb0168b4f
3
  size 12333660476
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94dabcf3aa6388cb7358095beeacf8c08e10d0c57c65e895222ff8e1688faa47
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f584fc46b466d88dd39394bd540717b3bc1053ecc01185f34166047c3442833a
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:734304bd0e6dc36b41c4ccbd3af99b577906c8419c323945dce4562d0d94fb39
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2b4288adefddaec779eef5814fc8113ed6f29d36bcf1119544624d9b84c141
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25806451612903225,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -256,6 +256,255 @@
256
  "eval_steps_per_second": 0.006,
257
  "eval_wer": 0.1124272786037492,
258
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  }
260
  ],
261
  "logging_steps": 25,
@@ -263,7 +512,7 @@
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 2,
265
  "save_steps": 1000,
266
- "total_flos": 2.717998645248e+19,
267
  "train_batch_size": 8,
268
  "trial_name": null,
269
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5161290322580645,
5
  "eval_steps": 1000,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
256
  "eval_steps_per_second": 0.006,
257
  "eval_wer": 0.1124272786037492,
258
  "step": 1000
259
+ },
260
+ {
261
+ "epoch": 0.26,
262
+ "learning_rate": 5.114999999999999e-07,
263
+ "loss": 0.1301,
264
+ "step": 1025
265
+ },
266
+ {
267
+ "epoch": 0.27,
268
+ "learning_rate": 5.24e-07,
269
+ "loss": 0.2193,
270
+ "step": 1050
271
+ },
272
+ {
273
+ "epoch": 0.28,
274
+ "learning_rate": 5.365e-07,
275
+ "loss": 0.1552,
276
+ "step": 1075
277
+ },
278
+ {
279
+ "epoch": 0.28,
280
+ "learning_rate": 5.490000000000001e-07,
281
+ "loss": 0.1865,
282
+ "step": 1100
283
+ },
284
+ {
285
+ "epoch": 0.29,
286
+ "learning_rate": 5.614999999999999e-07,
287
+ "loss": 0.1618,
288
+ "step": 1125
289
+ },
290
+ {
291
+ "epoch": 0.3,
292
+ "learning_rate": 5.739999999999999e-07,
293
+ "loss": 0.2259,
294
+ "step": 1150
295
+ },
296
+ {
297
+ "epoch": 0.3,
298
+ "learning_rate": 5.865e-07,
299
+ "loss": 0.1418,
300
+ "step": 1175
301
+ },
302
+ {
303
+ "epoch": 0.31,
304
+ "learning_rate": 5.989999999999999e-07,
305
+ "loss": 0.1916,
306
+ "step": 1200
307
+ },
308
+ {
309
+ "epoch": 0.32,
310
+ "learning_rate": 6.115e-07,
311
+ "loss": 0.1295,
312
+ "step": 1225
313
+ },
314
+ {
315
+ "epoch": 0.32,
316
+ "learning_rate": 6.24e-07,
317
+ "loss": 0.2112,
318
+ "step": 1250
319
+ },
320
+ {
321
+ "epoch": 0.33,
322
+ "learning_rate": 6.365e-07,
323
+ "loss": 0.1231,
324
+ "step": 1275
325
+ },
326
+ {
327
+ "epoch": 0.34,
328
+ "learning_rate": 6.49e-07,
329
+ "loss": 0.1914,
330
+ "step": 1300
331
+ },
332
+ {
333
+ "epoch": 0.34,
334
+ "learning_rate": 6.614999999999999e-07,
335
+ "loss": 0.1485,
336
+ "step": 1325
337
+ },
338
+ {
339
+ "epoch": 0.35,
340
+ "learning_rate": 6.74e-07,
341
+ "loss": 0.1958,
342
+ "step": 1350
343
+ },
344
+ {
345
+ "epoch": 0.35,
346
+ "learning_rate": 6.865e-07,
347
+ "loss": 0.1452,
348
+ "step": 1375
349
+ },
350
+ {
351
+ "epoch": 0.36,
352
+ "learning_rate": 6.989999999999999e-07,
353
+ "loss": 0.1624,
354
+ "step": 1400
355
+ },
356
+ {
357
+ "epoch": 0.37,
358
+ "learning_rate": 7.115e-07,
359
+ "loss": 0.1518,
360
+ "step": 1425
361
+ },
362
+ {
363
+ "epoch": 0.37,
364
+ "learning_rate": 7.24e-07,
365
+ "loss": 0.1935,
366
+ "step": 1450
367
+ },
368
+ {
369
+ "epoch": 0.38,
370
+ "learning_rate": 7.365e-07,
371
+ "loss": 0.1138,
372
+ "step": 1475
373
+ },
374
+ {
375
+ "epoch": 0.39,
376
+ "learning_rate": 7.489999999999999e-07,
377
+ "loss": 0.16,
378
+ "step": 1500
379
+ },
380
+ {
381
+ "epoch": 0.39,
382
+ "learning_rate": 7.614999999999999e-07,
383
+ "loss": 0.1279,
384
+ "step": 1525
385
+ },
386
+ {
387
+ "epoch": 0.4,
388
+ "learning_rate": 7.74e-07,
389
+ "loss": 0.1862,
390
+ "step": 1550
391
+ },
392
+ {
393
+ "epoch": 0.41,
394
+ "learning_rate": 7.864999999999999e-07,
395
+ "loss": 0.1537,
396
+ "step": 1575
397
+ },
398
+ {
399
+ "epoch": 0.41,
400
+ "learning_rate": 7.99e-07,
401
+ "loss": 0.1915,
402
+ "step": 1600
403
+ },
404
+ {
405
+ "epoch": 0.42,
406
+ "learning_rate": 8.115e-07,
407
+ "loss": 0.1303,
408
+ "step": 1625
409
+ },
410
+ {
411
+ "epoch": 0.43,
412
+ "learning_rate": 8.24e-07,
413
+ "loss": 0.1967,
414
+ "step": 1650
415
+ },
416
+ {
417
+ "epoch": 0.43,
418
+ "learning_rate": 8.365e-07,
419
+ "loss": 0.1418,
420
+ "step": 1675
421
+ },
422
+ {
423
+ "epoch": 0.44,
424
+ "learning_rate": 8.489999999999999e-07,
425
+ "loss": 0.194,
426
+ "step": 1700
427
+ },
428
+ {
429
+ "epoch": 0.45,
430
+ "learning_rate": 8.615e-07,
431
+ "loss": 0.1377,
432
+ "step": 1725
433
+ },
434
+ {
435
+ "epoch": 0.45,
436
+ "learning_rate": 8.739999999999999e-07,
437
+ "loss": 0.1534,
438
+ "step": 1750
439
+ },
440
+ {
441
+ "epoch": 0.46,
442
+ "learning_rate": 8.864999999999999e-07,
443
+ "loss": 0.1416,
444
+ "step": 1775
445
+ },
446
+ {
447
+ "epoch": 0.46,
448
+ "learning_rate": 8.99e-07,
449
+ "loss": 0.1869,
450
+ "step": 1800
451
+ },
452
+ {
453
+ "epoch": 0.47,
454
+ "learning_rate": 9.115e-07,
455
+ "loss": 0.1507,
456
+ "step": 1825
457
+ },
458
+ {
459
+ "epoch": 0.48,
460
+ "learning_rate": 9.24e-07,
461
+ "loss": 0.1571,
462
+ "step": 1850
463
+ },
464
+ {
465
+ "epoch": 0.48,
466
+ "learning_rate": 9.365e-07,
467
+ "loss": 0.1348,
468
+ "step": 1875
469
+ },
470
+ {
471
+ "epoch": 0.49,
472
+ "learning_rate": 9.489999999999999e-07,
473
+ "loss": 0.1981,
474
+ "step": 1900
475
+ },
476
+ {
477
+ "epoch": 0.5,
478
+ "learning_rate": 9.615e-07,
479
+ "loss": 0.1324,
480
+ "step": 1925
481
+ },
482
+ {
483
+ "epoch": 0.5,
484
+ "learning_rate": 9.74e-07,
485
+ "loss": 0.1712,
486
+ "step": 1950
487
+ },
488
+ {
489
+ "epoch": 0.51,
490
+ "learning_rate": 9.865e-07,
491
+ "loss": 0.1169,
492
+ "step": 1975
493
+ },
494
+ {
495
+ "epoch": 0.52,
496
+ "learning_rate": 9.989999999999999e-07,
497
+ "loss": 0.1654,
498
+ "step": 2000
499
+ },
500
+ {
501
+ "epoch": 0.52,
502
+ "eval_loss": 0.1500108540058136,
503
+ "eval_runtime": 188234.7285,
504
+ "eval_samples_per_second": 0.05,
505
+ "eval_steps_per_second": 0.006,
506
+ "eval_wer": 0.10521978021978022,
507
+ "step": 2000
508
  }
509
  ],
510
  "logging_steps": 25,
 
512
  "num_input_tokens_seen": 0,
513
  "num_train_epochs": 2,
514
  "save_steps": 1000,
515
+ "total_flos": 5.435997290496e+19,
516
  "train_batch_size": 8,
517
  "trial_name": null,
518
  "trial_params": null