marinone94 commited on
Commit
d379668
β€’
1 Parent(s): 46c0759

Training in progress, step 4300

Browse files
{checkpoint-3800 β†’ checkpoint-4200}/config.json RENAMED
File without changes
{checkpoint-3800 β†’ checkpoint-4200}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3916c502476103f3ffc7a1308222895c30676a37dc692fc61cb70066e716d9ac
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ddef0f5049947aa8210403bee49ef1fd880b41459272b4c05c3eb32308f2f44
3
  size 2490337809
{checkpoint-3800 β†’ checkpoint-4200}/preprocessor_config.json RENAMED
File without changes
{checkpoint-3900 β†’ checkpoint-4200}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b689bc4f48a4fe8515030b196fb5bd4de5819b77555cbfd8e1979aa0d04c367
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:588e62e516d26a20bbfce337c36c9f1a83451a67d19e583d1cdceb96aff5859f
3
  size 1262063089
{checkpoint-3900 β†’ checkpoint-4200}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:266a5b6d767d7cc2ccd3807fca6c4cfb051cc7d3796a6836ef75e2cf3c6b3218
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e03d76f10a67412455e92aefe077a6da7230abf78e55d42dec22dfbaea90dbd
3
  size 14567
{checkpoint-3900 β†’ checkpoint-4200}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26f1e0a22621c69063b9f7d6715acac2896bf4745746722f322c93153a0c85b5
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7018904ee53324745014ee00f0192d0f3344069918f4c8dd54467134a03ae355
3
  size 559
{checkpoint-3800 β†’ checkpoint-4200}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:108cf3add24d85591d96de728715165debaf66f2fb85e7a11bb55ca6e478dd61
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb4edd93c6eef5711ff572cfd2e93da18eb0ae8a7738f853f88ca1ecfb5e8b9
3
  size 623
{checkpoint-3900 β†’ checkpoint-4200}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 42.85245901639344,
5
- "global_step": 3900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1527,11 +1527,128 @@
1527
  "eval_steps_per_second": 0.798,
1528
  "eval_wer": 0.12883905762134545,
1529
  "step": 3900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1530
  }
1531
  ],
1532
  "max_steps": 4550,
1533
  "num_train_epochs": 50,
1534
- "total_flos": 6.032084123274907e+19,
1535
  "trial_name": null,
1536
  "trial_params": null
1537
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 46.15300546448088,
5
+ "global_step": 4200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1527
  "eval_steps_per_second": 0.798,
1528
  "eval_wer": 0.12883905762134545,
1529
  "step": 3900
1530
+ },
1531
+ {
1532
+ "epoch": 43.08,
1533
+ "learning_rate": 6.205882352941176e-05,
1534
+ "loss": 0.7691,
1535
+ "step": 3920
1536
+ },
1537
+ {
1538
+ "epoch": 43.3,
1539
+ "learning_rate": 6.0098039215686274e-05,
1540
+ "loss": 0.728,
1541
+ "step": 3940
1542
+ },
1543
+ {
1544
+ "epoch": 43.51,
1545
+ "learning_rate": 5.813725490196078e-05,
1546
+ "loss": 0.7585,
1547
+ "step": 3960
1548
+ },
1549
+ {
1550
+ "epoch": 43.73,
1551
+ "learning_rate": 5.6176470588235296e-05,
1552
+ "loss": 0.7564,
1553
+ "step": 3980
1554
+ },
1555
+ {
1556
+ "epoch": 43.95,
1557
+ "learning_rate": 5.4215686274509804e-05,
1558
+ "loss": 0.7383,
1559
+ "step": 4000
1560
+ },
1561
+ {
1562
+ "epoch": 43.95,
1563
+ "eval_loss": 0.15269021689891815,
1564
+ "eval_runtime": 188.5816,
1565
+ "eval_samples_per_second": 25.681,
1566
+ "eval_steps_per_second": 0.806,
1567
+ "eval_wer": 0.13003122338915696,
1568
+ "step": 4000
1569
+ },
1570
+ {
1571
+ "epoch": 44.17,
1572
+ "learning_rate": 5.225490196078431e-05,
1573
+ "loss": 0.7693,
1574
+ "step": 4020
1575
+ },
1576
+ {
1577
+ "epoch": 44.39,
1578
+ "learning_rate": 5.0294117647058826e-05,
1579
+ "loss": 0.7347,
1580
+ "step": 4040
1581
+ },
1582
+ {
1583
+ "epoch": 44.61,
1584
+ "learning_rate": 4.8333333333333334e-05,
1585
+ "loss": 0.7185,
1586
+ "step": 4060
1587
+ },
1588
+ {
1589
+ "epoch": 44.83,
1590
+ "learning_rate": 4.637254901960784e-05,
1591
+ "loss": 0.7394,
1592
+ "step": 4080
1593
+ },
1594
+ {
1595
+ "epoch": 45.05,
1596
+ "learning_rate": 4.4411764705882356e-05,
1597
+ "loss": 0.7565,
1598
+ "step": 4100
1599
+ },
1600
+ {
1601
+ "epoch": 45.05,
1602
+ "eval_loss": 0.14823457598686218,
1603
+ "eval_runtime": 191.1254,
1604
+ "eval_samples_per_second": 25.339,
1605
+ "eval_steps_per_second": 0.795,
1606
+ "eval_wer": 0.12886744252057905,
1607
+ "step": 4100
1608
+ },
1609
+ {
1610
+ "epoch": 45.27,
1611
+ "learning_rate": 4.2450980392156864e-05,
1612
+ "loss": 0.7535,
1613
+ "step": 4120
1614
+ },
1615
+ {
1616
+ "epoch": 45.49,
1617
+ "learning_rate": 4.049019607843137e-05,
1618
+ "loss": 0.737,
1619
+ "step": 4140
1620
+ },
1621
+ {
1622
+ "epoch": 45.71,
1623
+ "learning_rate": 3.852941176470588e-05,
1624
+ "loss": 0.7501,
1625
+ "step": 4160
1626
+ },
1627
+ {
1628
+ "epoch": 45.93,
1629
+ "learning_rate": 3.6568627450980393e-05,
1630
+ "loss": 0.7285,
1631
+ "step": 4180
1632
+ },
1633
+ {
1634
+ "epoch": 46.15,
1635
+ "learning_rate": 3.46078431372549e-05,
1636
+ "loss": 0.7697,
1637
+ "step": 4200
1638
+ },
1639
+ {
1640
+ "epoch": 46.15,
1641
+ "eval_loss": 0.1494804471731186,
1642
+ "eval_runtime": 190.3868,
1643
+ "eval_samples_per_second": 25.438,
1644
+ "eval_steps_per_second": 0.798,
1645
+ "eval_wer": 0.1271927334657962,
1646
+ "step": 4200
1647
  }
1648
  ],
1649
  "max_steps": 4550,
1650
  "num_train_epochs": 50,
1651
+ "total_flos": 6.496086001622114e+19,
1652
  "trial_name": null,
1653
  "trial_params": null
1654
  }
{checkpoint-3800 β†’ checkpoint-4200}/training_args.bin RENAMED
File without changes
{checkpoint-3900 β†’ checkpoint-4300}/config.json RENAMED
File without changes
{checkpoint-3900 β†’ checkpoint-4300}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:168e59323a9bf241005906b1870612b1b524768b8a27757437012f8cdf781fc3
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58cce60fe69424e2a89d307caa661955fd8777a8855b339f2af2ad4573bb989c
3
  size 2490337809
{checkpoint-3900 β†’ checkpoint-4300}/preprocessor_config.json RENAMED
File without changes
{checkpoint-3800 β†’ checkpoint-4300}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0b145b6b59bd869ef8bc9342a6f9c151b7c459935f0fd7b3877c9afe90a49de
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca11c993941e9cd903a98a8d6a851a925b5bc173fcb10c435092a4ff2bbe4c56
3
  size 1262063089
{checkpoint-3800 β†’ checkpoint-4300}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd0c1e59d01355dc7800f78e2a8c707b3bf501a6ea7d92076b4d2400614623cf
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:412f958acca27a8108970c9e2ebea1c88954db1eb284dd8fe51687e899621b23
3
+ size 14503
{checkpoint-3800 β†’ checkpoint-4300}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29eb923c97bb88614ccb0255ae678634f872dd8aa03ae16319e241eb7a1e8c90
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6924042b31741a307a9c2d3e5d4148105227679127f306862679a760336ca11c
3
  size 559
{checkpoint-3900 β†’ checkpoint-4300}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1bc34f9b39344288eae3f0b593523acd1748174f4d473bee33a329e62da9e8a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9bfd4ddf4228b39539babfcc9c868a929c00cb0946eb4b8ecfaa5fa12431293
3
  size 623
{checkpoint-3800 β†’ checkpoint-4300}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 41.75409836065574,
5
- "global_step": 3800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1488,11 +1488,206 @@
1488
  "eval_steps_per_second": 0.815,
1489
  "eval_wer": 0.13170593244393983,
1490
  "step": 3800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1491
  }
1492
  ],
1493
  "max_steps": 4550,
1494
  "num_train_epochs": 50,
1495
- "total_flos": 5.876357873041098e+19,
1496
  "trial_name": null,
1497
  "trial_params": null
1498
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 47.25136612021858,
5
+ "global_step": 4300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1488
  "eval_steps_per_second": 0.815,
1489
  "eval_wer": 0.13170593244393983,
1490
  "step": 3800
1491
+ },
1492
+ {
1493
+ "epoch": 41.97,
1494
+ "learning_rate": 7.186274509803923e-05,
1495
+ "loss": 0.7765,
1496
+ "step": 3820
1497
+ },
1498
+ {
1499
+ "epoch": 42.2,
1500
+ "learning_rate": 6.990196078431373e-05,
1501
+ "loss": 0.7599,
1502
+ "step": 3840
1503
+ },
1504
+ {
1505
+ "epoch": 42.42,
1506
+ "learning_rate": 6.794117647058824e-05,
1507
+ "loss": 0.7782,
1508
+ "step": 3860
1509
+ },
1510
+ {
1511
+ "epoch": 42.63,
1512
+ "learning_rate": 6.598039215686274e-05,
1513
+ "loss": 0.7395,
1514
+ "step": 3880
1515
+ },
1516
+ {
1517
+ "epoch": 42.85,
1518
+ "learning_rate": 6.401960784313726e-05,
1519
+ "loss": 0.7594,
1520
+ "step": 3900
1521
+ },
1522
+ {
1523
+ "epoch": 42.85,
1524
+ "eval_loss": 0.14983513951301575,
1525
+ "eval_runtime": 190.4439,
1526
+ "eval_samples_per_second": 25.43,
1527
+ "eval_steps_per_second": 0.798,
1528
+ "eval_wer": 0.12883905762134545,
1529
+ "step": 3900
1530
+ },
1531
+ {
1532
+ "epoch": 43.08,
1533
+ "learning_rate": 6.205882352941176e-05,
1534
+ "loss": 0.7691,
1535
+ "step": 3920
1536
+ },
1537
+ {
1538
+ "epoch": 43.3,
1539
+ "learning_rate": 6.0098039215686274e-05,
1540
+ "loss": 0.728,
1541
+ "step": 3940
1542
+ },
1543
+ {
1544
+ "epoch": 43.51,
1545
+ "learning_rate": 5.813725490196078e-05,
1546
+ "loss": 0.7585,
1547
+ "step": 3960
1548
+ },
1549
+ {
1550
+ "epoch": 43.73,
1551
+ "learning_rate": 5.6176470588235296e-05,
1552
+ "loss": 0.7564,
1553
+ "step": 3980
1554
+ },
1555
+ {
1556
+ "epoch": 43.95,
1557
+ "learning_rate": 5.4215686274509804e-05,
1558
+ "loss": 0.7383,
1559
+ "step": 4000
1560
+ },
1561
+ {
1562
+ "epoch": 43.95,
1563
+ "eval_loss": 0.15269021689891815,
1564
+ "eval_runtime": 188.5816,
1565
+ "eval_samples_per_second": 25.681,
1566
+ "eval_steps_per_second": 0.806,
1567
+ "eval_wer": 0.13003122338915696,
1568
+ "step": 4000
1569
+ },
1570
+ {
1571
+ "epoch": 44.17,
1572
+ "learning_rate": 5.225490196078431e-05,
1573
+ "loss": 0.7693,
1574
+ "step": 4020
1575
+ },
1576
+ {
1577
+ "epoch": 44.39,
1578
+ "learning_rate": 5.0294117647058826e-05,
1579
+ "loss": 0.7347,
1580
+ "step": 4040
1581
+ },
1582
+ {
1583
+ "epoch": 44.61,
1584
+ "learning_rate": 4.8333333333333334e-05,
1585
+ "loss": 0.7185,
1586
+ "step": 4060
1587
+ },
1588
+ {
1589
+ "epoch": 44.83,
1590
+ "learning_rate": 4.637254901960784e-05,
1591
+ "loss": 0.7394,
1592
+ "step": 4080
1593
+ },
1594
+ {
1595
+ "epoch": 45.05,
1596
+ "learning_rate": 4.4411764705882356e-05,
1597
+ "loss": 0.7565,
1598
+ "step": 4100
1599
+ },
1600
+ {
1601
+ "epoch": 45.05,
1602
+ "eval_loss": 0.14823457598686218,
1603
+ "eval_runtime": 191.1254,
1604
+ "eval_samples_per_second": 25.339,
1605
+ "eval_steps_per_second": 0.795,
1606
+ "eval_wer": 0.12886744252057905,
1607
+ "step": 4100
1608
+ },
1609
+ {
1610
+ "epoch": 45.27,
1611
+ "learning_rate": 4.2450980392156864e-05,
1612
+ "loss": 0.7535,
1613
+ "step": 4120
1614
+ },
1615
+ {
1616
+ "epoch": 45.49,
1617
+ "learning_rate": 4.049019607843137e-05,
1618
+ "loss": 0.737,
1619
+ "step": 4140
1620
+ },
1621
+ {
1622
+ "epoch": 45.71,
1623
+ "learning_rate": 3.852941176470588e-05,
1624
+ "loss": 0.7501,
1625
+ "step": 4160
1626
+ },
1627
+ {
1628
+ "epoch": 45.93,
1629
+ "learning_rate": 3.6568627450980393e-05,
1630
+ "loss": 0.7285,
1631
+ "step": 4180
1632
+ },
1633
+ {
1634
+ "epoch": 46.15,
1635
+ "learning_rate": 3.46078431372549e-05,
1636
+ "loss": 0.7697,
1637
+ "step": 4200
1638
+ },
1639
+ {
1640
+ "epoch": 46.15,
1641
+ "eval_loss": 0.1494804471731186,
1642
+ "eval_runtime": 190.3868,
1643
+ "eval_samples_per_second": 25.438,
1644
+ "eval_steps_per_second": 0.798,
1645
+ "eval_wer": 0.1271927334657962,
1646
+ "step": 4200
1647
+ },
1648
+ {
1649
+ "epoch": 46.37,
1650
+ "learning_rate": 3.264705882352941e-05,
1651
+ "loss": 0.7283,
1652
+ "step": 4220
1653
+ },
1654
+ {
1655
+ "epoch": 46.59,
1656
+ "learning_rate": 3.0686274509803923e-05,
1657
+ "loss": 0.7218,
1658
+ "step": 4240
1659
+ },
1660
+ {
1661
+ "epoch": 46.81,
1662
+ "learning_rate": 2.872549019607843e-05,
1663
+ "loss": 0.7341,
1664
+ "step": 4260
1665
+ },
1666
+ {
1667
+ "epoch": 47.03,
1668
+ "learning_rate": 2.6764705882352942e-05,
1669
+ "loss": 0.7293,
1670
+ "step": 4280
1671
+ },
1672
+ {
1673
+ "epoch": 47.25,
1674
+ "learning_rate": 2.480392156862745e-05,
1675
+ "loss": 0.7194,
1676
+ "step": 4300
1677
+ },
1678
+ {
1679
+ "epoch": 47.25,
1680
+ "eval_loss": 0.14928147196769714,
1681
+ "eval_runtime": 189.625,
1682
+ "eval_samples_per_second": 25.54,
1683
+ "eval_steps_per_second": 0.802,
1684
+ "eval_wer": 0.12693726937269373,
1685
+ "step": 4300
1686
  }
1687
  ],
1688
  "max_steps": 4550,
1689
  "num_train_epochs": 50,
1690
+ "total_flos": 6.648181588185401e+19,
1691
  "trial_name": null,
1692
  "trial_params": null
1693
  }
{checkpoint-3900 β†’ checkpoint-4300}/training_args.bin RENAMED
File without changes