marinone94 commited on
Commit
e0ccb33
β€’
1 Parent(s): bc5c400

Training in progress, step 900

Browse files
{checkpoint-600 β†’ checkpoint-900}/config.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-900}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87b571a9150b8daa615fd79bd19594823cd3a5916dff123175f2b5acd3aae34f
3
  size 2490346001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59f0b148f8bd2bd87664562f6b57aed69fada1df7fac25bf851efef79073d650
3
  size 2490346001
{checkpoint-600 β†’ checkpoint-900}/preprocessor_config.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-900}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8516ba404c3005619dab698e95eba4925ba995d7c93b7c12a9a3ec6e076a633a
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03c4ad43954ff5d68a16a7e40b86626a429d98e2fdeccfd745f3e2d0f37d6ea5
3
  size 1262067185
{checkpoint-600 β†’ checkpoint-900}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4efb8c99e93bf6a9e0eaf10d4bc56f851a7d3bcdc654e07f6bc1aa10347070e6
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a4585f922a033758d0a6a0b0f1a5d69a1e6835c8ebb3ce639c007579d1be7d
3
  size 14567
{checkpoint-600 β†’ checkpoint-900}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd9adbb7bf7a7b94e5da4d383115b816c16f892b14cb7df333d0775ae0f262ae
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2805e3eccf2e46b8f5518fa8d1844fcdcd45d4b27e76ca4417c27b06af79b8d5
3
  size 559
{checkpoint-600 β†’ checkpoint-900}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1234d8c3176806f7e4a5e539c9c83abb170bf548c7ebf7edff51815795c496ed
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d83fa4807715cddf3058690a0c6a411df0e3d3bd6db8670f4c3379a7e7bfd95
3
  size 623
{checkpoint-600 β†’ checkpoint-900}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2768166089965398,
5
- "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -420,11 +420,218 @@
420
  "eval_steps_per_second": 1.121,
421
  "eval_wer": 1.0,
422
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  }
424
  ],
425
  "max_steps": 6501,
426
  "num_train_epochs": 3,
427
- "total_flos": 1.1344329626225234e+19,
428
  "trial_name": null,
429
  "trial_params": null
430
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.41522491349480967,
5
+ "global_step": 900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
420
  "eval_steps_per_second": 1.121,
421
  "eval_wer": 1.0,
422
  "step": 600
423
+ },
424
+ {
425
+ "epoch": 0.28,
426
+ "learning_rate": 6.952511773940345e-05,
427
+ "loss": 2.5368,
428
+ "step": 610
429
+ },
430
+ {
431
+ "epoch": 0.29,
432
+ "learning_rate": 6.940737833594976e-05,
433
+ "loss": 2.5942,
434
+ "step": 620
435
+ },
436
+ {
437
+ "epoch": 0.29,
438
+ "learning_rate": 6.928963893249607e-05,
439
+ "loss": 2.4907,
440
+ "step": 630
441
+ },
442
+ {
443
+ "epoch": 0.3,
444
+ "learning_rate": 6.917189952904238e-05,
445
+ "loss": 2.472,
446
+ "step": 640
447
+ },
448
+ {
449
+ "epoch": 0.3,
450
+ "learning_rate": 6.907770800627943e-05,
451
+ "loss": 2.6826,
452
+ "step": 650
453
+ },
454
+ {
455
+ "epoch": 0.3,
456
+ "learning_rate": 6.895996860282574e-05,
457
+ "loss": 2.3167,
458
+ "step": 660
459
+ },
460
+ {
461
+ "epoch": 0.31,
462
+ "learning_rate": 6.884222919937205e-05,
463
+ "loss": 2.3457,
464
+ "step": 670
465
+ },
466
+ {
467
+ "epoch": 0.31,
468
+ "learning_rate": 6.872448979591836e-05,
469
+ "loss": 2.3532,
470
+ "step": 680
471
+ },
472
+ {
473
+ "epoch": 0.32,
474
+ "learning_rate": 6.860675039246468e-05,
475
+ "loss": 2.4822,
476
+ "step": 690
477
+ },
478
+ {
479
+ "epoch": 0.32,
480
+ "learning_rate": 6.850078492935636e-05,
481
+ "loss": 3.1002,
482
+ "step": 700
483
+ },
484
+ {
485
+ "epoch": 0.32,
486
+ "eval_loss": 2.819200277328491,
487
+ "eval_runtime": 129.5044,
488
+ "eval_samples_per_second": 35.674,
489
+ "eval_steps_per_second": 1.12,
490
+ "eval_wer": 0.9999694058618369,
491
+ "step": 700
492
+ },
493
+ {
494
+ "epoch": 0.33,
495
+ "learning_rate": 6.838304552590267e-05,
496
+ "loss": 2.9139,
497
+ "step": 710
498
+ },
499
+ {
500
+ "epoch": 0.33,
501
+ "learning_rate": 6.826530612244897e-05,
502
+ "loss": 2.9957,
503
+ "step": 720
504
+ },
505
+ {
506
+ "epoch": 0.34,
507
+ "learning_rate": 6.814756671899528e-05,
508
+ "loss": 2.9842,
509
+ "step": 730
510
+ },
511
+ {
512
+ "epoch": 0.34,
513
+ "learning_rate": 6.802982731554159e-05,
514
+ "loss": 3.2804,
515
+ "step": 740
516
+ },
517
+ {
518
+ "epoch": 0.35,
519
+ "learning_rate": 6.792386185243327e-05,
520
+ "loss": 3.4944,
521
+ "step": 750
522
+ },
523
+ {
524
+ "epoch": 0.35,
525
+ "learning_rate": 6.780612244897958e-05,
526
+ "loss": 2.9905,
527
+ "step": 760
528
+ },
529
+ {
530
+ "epoch": 0.36,
531
+ "learning_rate": 6.768838304552589e-05,
532
+ "loss": 2.9692,
533
+ "step": 770
534
+ },
535
+ {
536
+ "epoch": 0.36,
537
+ "learning_rate": 6.757064364207221e-05,
538
+ "loss": 2.9747,
539
+ "step": 780
540
+ },
541
+ {
542
+ "epoch": 0.36,
543
+ "learning_rate": 6.745290423861852e-05,
544
+ "loss": 3.294,
545
+ "step": 790
546
+ },
547
+ {
548
+ "epoch": 0.37,
549
+ "learning_rate": 6.73469387755102e-05,
550
+ "loss": 3.5998,
551
+ "step": 800
552
+ },
553
+ {
554
+ "epoch": 0.37,
555
+ "eval_loss": 3.059178590774536,
556
+ "eval_runtime": 129.2337,
557
+ "eval_samples_per_second": 35.749,
558
+ "eval_steps_per_second": 1.122,
559
+ "eval_wer": 1.0,
560
+ "step": 800
561
+ },
562
+ {
563
+ "epoch": 0.37,
564
+ "learning_rate": 6.72291993720565e-05,
565
+ "loss": 2.9902,
566
+ "step": 810
567
+ },
568
+ {
569
+ "epoch": 0.38,
570
+ "learning_rate": 6.711145996860281e-05,
571
+ "loss": 2.9644,
572
+ "step": 820
573
+ },
574
+ {
575
+ "epoch": 0.38,
576
+ "learning_rate": 6.699372056514913e-05,
577
+ "loss": 2.9529,
578
+ "step": 830
579
+ },
580
+ {
581
+ "epoch": 0.39,
582
+ "learning_rate": 6.687598116169544e-05,
583
+ "loss": 3.2737,
584
+ "step": 840
585
+ },
586
+ {
587
+ "epoch": 0.39,
588
+ "learning_rate": 6.677001569858712e-05,
589
+ "loss": 3.6534,
590
+ "step": 850
591
+ },
592
+ {
593
+ "epoch": 0.4,
594
+ "learning_rate": 6.665227629513343e-05,
595
+ "loss": 2.9664,
596
+ "step": 860
597
+ },
598
+ {
599
+ "epoch": 0.4,
600
+ "learning_rate": 6.653453689167974e-05,
601
+ "loss": 2.9463,
602
+ "step": 870
603
+ },
604
+ {
605
+ "epoch": 0.41,
606
+ "learning_rate": 6.641679748822606e-05,
607
+ "loss": 2.9591,
608
+ "step": 880
609
+ },
610
+ {
611
+ "epoch": 0.41,
612
+ "learning_rate": 6.629905808477237e-05,
613
+ "loss": 3.2701,
614
+ "step": 890
615
+ },
616
+ {
617
+ "epoch": 0.42,
618
+ "learning_rate": 6.618131868131867e-05,
619
+ "loss": 3.3784,
620
+ "step": 900
621
+ },
622
+ {
623
+ "epoch": 0.42,
624
+ "eval_loss": 3.0334482192993164,
625
+ "eval_runtime": 129.072,
626
+ "eval_samples_per_second": 35.794,
627
+ "eval_steps_per_second": 1.123,
628
+ "eval_wer": 1.0,
629
+ "step": 900
630
  }
631
  ],
632
  "max_steps": 6501,
633
  "num_train_epochs": 3,
634
+ "total_flos": 1.7007095739442115e+19,
635
  "trial_name": null,
636
  "trial_params": null
637
  }
{checkpoint-600 β†’ checkpoint-900}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa43bdbed16b5fc10ebee537908dcf5e5f5e67381037463baf9a181a9926057d
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03c4ad43954ff5d68a16a7e40b86626a429d98e2fdeccfd745f3e2d0f37d6ea5
3
  size 1262067185