marinone94
commited on
Commit
β’
d379668
1
Parent(s):
46c0759
Training in progress, step 4300
Browse files- {checkpoint-3800 β checkpoint-4200}/config.json +0 -0
- {checkpoint-3800 β checkpoint-4200}/optimizer.pt +1 -1
- {checkpoint-3800 β checkpoint-4200}/preprocessor_config.json +0 -0
- {checkpoint-3900 β checkpoint-4200}/pytorch_model.bin +1 -1
- {checkpoint-3900 β checkpoint-4200}/rng_state.pth +1 -1
- {checkpoint-3900 β checkpoint-4200}/scaler.pt +1 -1
- {checkpoint-3800 β checkpoint-4200}/scheduler.pt +1 -1
- {checkpoint-3900 β checkpoint-4200}/trainer_state.json +120 -3
- {checkpoint-3800 β checkpoint-4200}/training_args.bin +0 -0
- {checkpoint-3900 β checkpoint-4300}/config.json +0 -0
- {checkpoint-3900 β checkpoint-4300}/optimizer.pt +1 -1
- {checkpoint-3900 β checkpoint-4300}/preprocessor_config.json +0 -0
- {checkpoint-3800 β checkpoint-4300}/pytorch_model.bin +1 -1
- {checkpoint-3800 β checkpoint-4300}/rng_state.pth +2 -2
- {checkpoint-3800 β checkpoint-4300}/scaler.pt +1 -1
- {checkpoint-3900 β checkpoint-4300}/scheduler.pt +1 -1
- {checkpoint-3800 β checkpoint-4300}/trainer_state.json +198 -3
- {checkpoint-3900 β checkpoint-4300}/training_args.bin +0 -0
{checkpoint-3800 β checkpoint-4200}/config.json
RENAMED
File without changes
|
{checkpoint-3800 β checkpoint-4200}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ddef0f5049947aa8210403bee49ef1fd880b41459272b4c05c3eb32308f2f44
|
3 |
size 2490337809
|
{checkpoint-3800 β checkpoint-4200}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-3900 β checkpoint-4200}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:588e62e516d26a20bbfce337c36c9f1a83451a67d19e583d1cdceb96aff5859f
|
3 |
size 1262063089
|
{checkpoint-3900 β checkpoint-4200}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e03d76f10a67412455e92aefe077a6da7230abf78e55d42dec22dfbaea90dbd
|
3 |
size 14567
|
{checkpoint-3900 β checkpoint-4200}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7018904ee53324745014ee00f0192d0f3344069918f4c8dd54467134a03ae355
|
3 |
size 559
|
{checkpoint-3800 β checkpoint-4200}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fb4edd93c6eef5711ff572cfd2e93da18eb0ae8a7738f853f88ca1ecfb5e8b9
|
3 |
size 623
|
{checkpoint-3900 β checkpoint-4200}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1527,11 +1527,128 @@
|
|
1527 |
"eval_steps_per_second": 0.798,
|
1528 |
"eval_wer": 0.12883905762134545,
|
1529 |
"step": 3900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1530 |
}
|
1531 |
],
|
1532 |
"max_steps": 4550,
|
1533 |
"num_train_epochs": 50,
|
1534 |
-
"total_flos": 6.
|
1535 |
"trial_name": null,
|
1536 |
"trial_params": null
|
1537 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 46.15300546448088,
|
5 |
+
"global_step": 4200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1527 |
"eval_steps_per_second": 0.798,
|
1528 |
"eval_wer": 0.12883905762134545,
|
1529 |
"step": 3900
|
1530 |
+
},
|
1531 |
+
{
|
1532 |
+
"epoch": 43.08,
|
1533 |
+
"learning_rate": 6.205882352941176e-05,
|
1534 |
+
"loss": 0.7691,
|
1535 |
+
"step": 3920
|
1536 |
+
},
|
1537 |
+
{
|
1538 |
+
"epoch": 43.3,
|
1539 |
+
"learning_rate": 6.0098039215686274e-05,
|
1540 |
+
"loss": 0.728,
|
1541 |
+
"step": 3940
|
1542 |
+
},
|
1543 |
+
{
|
1544 |
+
"epoch": 43.51,
|
1545 |
+
"learning_rate": 5.813725490196078e-05,
|
1546 |
+
"loss": 0.7585,
|
1547 |
+
"step": 3960
|
1548 |
+
},
|
1549 |
+
{
|
1550 |
+
"epoch": 43.73,
|
1551 |
+
"learning_rate": 5.6176470588235296e-05,
|
1552 |
+
"loss": 0.7564,
|
1553 |
+
"step": 3980
|
1554 |
+
},
|
1555 |
+
{
|
1556 |
+
"epoch": 43.95,
|
1557 |
+
"learning_rate": 5.4215686274509804e-05,
|
1558 |
+
"loss": 0.7383,
|
1559 |
+
"step": 4000
|
1560 |
+
},
|
1561 |
+
{
|
1562 |
+
"epoch": 43.95,
|
1563 |
+
"eval_loss": 0.15269021689891815,
|
1564 |
+
"eval_runtime": 188.5816,
|
1565 |
+
"eval_samples_per_second": 25.681,
|
1566 |
+
"eval_steps_per_second": 0.806,
|
1567 |
+
"eval_wer": 0.13003122338915696,
|
1568 |
+
"step": 4000
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 44.17,
|
1572 |
+
"learning_rate": 5.225490196078431e-05,
|
1573 |
+
"loss": 0.7693,
|
1574 |
+
"step": 4020
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 44.39,
|
1578 |
+
"learning_rate": 5.0294117647058826e-05,
|
1579 |
+
"loss": 0.7347,
|
1580 |
+
"step": 4040
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 44.61,
|
1584 |
+
"learning_rate": 4.8333333333333334e-05,
|
1585 |
+
"loss": 0.7185,
|
1586 |
+
"step": 4060
|
1587 |
+
},
|
1588 |
+
{
|
1589 |
+
"epoch": 44.83,
|
1590 |
+
"learning_rate": 4.637254901960784e-05,
|
1591 |
+
"loss": 0.7394,
|
1592 |
+
"step": 4080
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"epoch": 45.05,
|
1596 |
+
"learning_rate": 4.4411764705882356e-05,
|
1597 |
+
"loss": 0.7565,
|
1598 |
+
"step": 4100
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 45.05,
|
1602 |
+
"eval_loss": 0.14823457598686218,
|
1603 |
+
"eval_runtime": 191.1254,
|
1604 |
+
"eval_samples_per_second": 25.339,
|
1605 |
+
"eval_steps_per_second": 0.795,
|
1606 |
+
"eval_wer": 0.12886744252057905,
|
1607 |
+
"step": 4100
|
1608 |
+
},
|
1609 |
+
{
|
1610 |
+
"epoch": 45.27,
|
1611 |
+
"learning_rate": 4.2450980392156864e-05,
|
1612 |
+
"loss": 0.7535,
|
1613 |
+
"step": 4120
|
1614 |
+
},
|
1615 |
+
{
|
1616 |
+
"epoch": 45.49,
|
1617 |
+
"learning_rate": 4.049019607843137e-05,
|
1618 |
+
"loss": 0.737,
|
1619 |
+
"step": 4140
|
1620 |
+
},
|
1621 |
+
{
|
1622 |
+
"epoch": 45.71,
|
1623 |
+
"learning_rate": 3.852941176470588e-05,
|
1624 |
+
"loss": 0.7501,
|
1625 |
+
"step": 4160
|
1626 |
+
},
|
1627 |
+
{
|
1628 |
+
"epoch": 45.93,
|
1629 |
+
"learning_rate": 3.6568627450980393e-05,
|
1630 |
+
"loss": 0.7285,
|
1631 |
+
"step": 4180
|
1632 |
+
},
|
1633 |
+
{
|
1634 |
+
"epoch": 46.15,
|
1635 |
+
"learning_rate": 3.46078431372549e-05,
|
1636 |
+
"loss": 0.7697,
|
1637 |
+
"step": 4200
|
1638 |
+
},
|
1639 |
+
{
|
1640 |
+
"epoch": 46.15,
|
1641 |
+
"eval_loss": 0.1494804471731186,
|
1642 |
+
"eval_runtime": 190.3868,
|
1643 |
+
"eval_samples_per_second": 25.438,
|
1644 |
+
"eval_steps_per_second": 0.798,
|
1645 |
+
"eval_wer": 0.1271927334657962,
|
1646 |
+
"step": 4200
|
1647 |
}
|
1648 |
],
|
1649 |
"max_steps": 4550,
|
1650 |
"num_train_epochs": 50,
|
1651 |
+
"total_flos": 6.496086001622114e+19,
|
1652 |
"trial_name": null,
|
1653 |
"trial_params": null
|
1654 |
}
|
{checkpoint-3800 β checkpoint-4200}/training_args.bin
RENAMED
File without changes
|
{checkpoint-3900 β checkpoint-4300}/config.json
RENAMED
File without changes
|
{checkpoint-3900 β checkpoint-4300}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58cce60fe69424e2a89d307caa661955fd8777a8855b339f2af2ad4573bb989c
|
3 |
size 2490337809
|
{checkpoint-3900 β checkpoint-4300}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-3800 β checkpoint-4300}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca11c993941e9cd903a98a8d6a851a925b5bc173fcb10c435092a4ff2bbe4c56
|
3 |
size 1262063089
|
{checkpoint-3800 β checkpoint-4300}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:412f958acca27a8108970c9e2ebea1c88954db1eb284dd8fe51687e899621b23
|
3 |
+
size 14503
|
{checkpoint-3800 β checkpoint-4300}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6924042b31741a307a9c2d3e5d4148105227679127f306862679a760336ca11c
|
3 |
size 559
|
{checkpoint-3900 β checkpoint-4300}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9bfd4ddf4228b39539babfcc9c868a929c00cb0946eb4b8ecfaa5fa12431293
|
3 |
size 623
|
{checkpoint-3800 β checkpoint-4300}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1488,11 +1488,206 @@
|
|
1488 |
"eval_steps_per_second": 0.815,
|
1489 |
"eval_wer": 0.13170593244393983,
|
1490 |
"step": 3800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1491 |
}
|
1492 |
],
|
1493 |
"max_steps": 4550,
|
1494 |
"num_train_epochs": 50,
|
1495 |
-
"total_flos":
|
1496 |
"trial_name": null,
|
1497 |
"trial_params": null
|
1498 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 47.25136612021858,
|
5 |
+
"global_step": 4300,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1488 |
"eval_steps_per_second": 0.815,
|
1489 |
"eval_wer": 0.13170593244393983,
|
1490 |
"step": 3800
|
1491 |
+
},
|
1492 |
+
{
|
1493 |
+
"epoch": 41.97,
|
1494 |
+
"learning_rate": 7.186274509803923e-05,
|
1495 |
+
"loss": 0.7765,
|
1496 |
+
"step": 3820
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 42.2,
|
1500 |
+
"learning_rate": 6.990196078431373e-05,
|
1501 |
+
"loss": 0.7599,
|
1502 |
+
"step": 3840
|
1503 |
+
},
|
1504 |
+
{
|
1505 |
+
"epoch": 42.42,
|
1506 |
+
"learning_rate": 6.794117647058824e-05,
|
1507 |
+
"loss": 0.7782,
|
1508 |
+
"step": 3860
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 42.63,
|
1512 |
+
"learning_rate": 6.598039215686274e-05,
|
1513 |
+
"loss": 0.7395,
|
1514 |
+
"step": 3880
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 42.85,
|
1518 |
+
"learning_rate": 6.401960784313726e-05,
|
1519 |
+
"loss": 0.7594,
|
1520 |
+
"step": 3900
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 42.85,
|
1524 |
+
"eval_loss": 0.14983513951301575,
|
1525 |
+
"eval_runtime": 190.4439,
|
1526 |
+
"eval_samples_per_second": 25.43,
|
1527 |
+
"eval_steps_per_second": 0.798,
|
1528 |
+
"eval_wer": 0.12883905762134545,
|
1529 |
+
"step": 3900
|
1530 |
+
},
|
1531 |
+
{
|
1532 |
+
"epoch": 43.08,
|
1533 |
+
"learning_rate": 6.205882352941176e-05,
|
1534 |
+
"loss": 0.7691,
|
1535 |
+
"step": 3920
|
1536 |
+
},
|
1537 |
+
{
|
1538 |
+
"epoch": 43.3,
|
1539 |
+
"learning_rate": 6.0098039215686274e-05,
|
1540 |
+
"loss": 0.728,
|
1541 |
+
"step": 3940
|
1542 |
+
},
|
1543 |
+
{
|
1544 |
+
"epoch": 43.51,
|
1545 |
+
"learning_rate": 5.813725490196078e-05,
|
1546 |
+
"loss": 0.7585,
|
1547 |
+
"step": 3960
|
1548 |
+
},
|
1549 |
+
{
|
1550 |
+
"epoch": 43.73,
|
1551 |
+
"learning_rate": 5.6176470588235296e-05,
|
1552 |
+
"loss": 0.7564,
|
1553 |
+
"step": 3980
|
1554 |
+
},
|
1555 |
+
{
|
1556 |
+
"epoch": 43.95,
|
1557 |
+
"learning_rate": 5.4215686274509804e-05,
|
1558 |
+
"loss": 0.7383,
|
1559 |
+
"step": 4000
|
1560 |
+
},
|
1561 |
+
{
|
1562 |
+
"epoch": 43.95,
|
1563 |
+
"eval_loss": 0.15269021689891815,
|
1564 |
+
"eval_runtime": 188.5816,
|
1565 |
+
"eval_samples_per_second": 25.681,
|
1566 |
+
"eval_steps_per_second": 0.806,
|
1567 |
+
"eval_wer": 0.13003122338915696,
|
1568 |
+
"step": 4000
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 44.17,
|
1572 |
+
"learning_rate": 5.225490196078431e-05,
|
1573 |
+
"loss": 0.7693,
|
1574 |
+
"step": 4020
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 44.39,
|
1578 |
+
"learning_rate": 5.0294117647058826e-05,
|
1579 |
+
"loss": 0.7347,
|
1580 |
+
"step": 4040
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 44.61,
|
1584 |
+
"learning_rate": 4.8333333333333334e-05,
|
1585 |
+
"loss": 0.7185,
|
1586 |
+
"step": 4060
|
1587 |
+
},
|
1588 |
+
{
|
1589 |
+
"epoch": 44.83,
|
1590 |
+
"learning_rate": 4.637254901960784e-05,
|
1591 |
+
"loss": 0.7394,
|
1592 |
+
"step": 4080
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"epoch": 45.05,
|
1596 |
+
"learning_rate": 4.4411764705882356e-05,
|
1597 |
+
"loss": 0.7565,
|
1598 |
+
"step": 4100
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 45.05,
|
1602 |
+
"eval_loss": 0.14823457598686218,
|
1603 |
+
"eval_runtime": 191.1254,
|
1604 |
+
"eval_samples_per_second": 25.339,
|
1605 |
+
"eval_steps_per_second": 0.795,
|
1606 |
+
"eval_wer": 0.12886744252057905,
|
1607 |
+
"step": 4100
|
1608 |
+
},
|
1609 |
+
{
|
1610 |
+
"epoch": 45.27,
|
1611 |
+
"learning_rate": 4.2450980392156864e-05,
|
1612 |
+
"loss": 0.7535,
|
1613 |
+
"step": 4120
|
1614 |
+
},
|
1615 |
+
{
|
1616 |
+
"epoch": 45.49,
|
1617 |
+
"learning_rate": 4.049019607843137e-05,
|
1618 |
+
"loss": 0.737,
|
1619 |
+
"step": 4140
|
1620 |
+
},
|
1621 |
+
{
|
1622 |
+
"epoch": 45.71,
|
1623 |
+
"learning_rate": 3.852941176470588e-05,
|
1624 |
+
"loss": 0.7501,
|
1625 |
+
"step": 4160
|
1626 |
+
},
|
1627 |
+
{
|
1628 |
+
"epoch": 45.93,
|
1629 |
+
"learning_rate": 3.6568627450980393e-05,
|
1630 |
+
"loss": 0.7285,
|
1631 |
+
"step": 4180
|
1632 |
+
},
|
1633 |
+
{
|
1634 |
+
"epoch": 46.15,
|
1635 |
+
"learning_rate": 3.46078431372549e-05,
|
1636 |
+
"loss": 0.7697,
|
1637 |
+
"step": 4200
|
1638 |
+
},
|
1639 |
+
{
|
1640 |
+
"epoch": 46.15,
|
1641 |
+
"eval_loss": 0.1494804471731186,
|
1642 |
+
"eval_runtime": 190.3868,
|
1643 |
+
"eval_samples_per_second": 25.438,
|
1644 |
+
"eval_steps_per_second": 0.798,
|
1645 |
+
"eval_wer": 0.1271927334657962,
|
1646 |
+
"step": 4200
|
1647 |
+
},
|
1648 |
+
{
|
1649 |
+
"epoch": 46.37,
|
1650 |
+
"learning_rate": 3.264705882352941e-05,
|
1651 |
+
"loss": 0.7283,
|
1652 |
+
"step": 4220
|
1653 |
+
},
|
1654 |
+
{
|
1655 |
+
"epoch": 46.59,
|
1656 |
+
"learning_rate": 3.0686274509803923e-05,
|
1657 |
+
"loss": 0.7218,
|
1658 |
+
"step": 4240
|
1659 |
+
},
|
1660 |
+
{
|
1661 |
+
"epoch": 46.81,
|
1662 |
+
"learning_rate": 2.872549019607843e-05,
|
1663 |
+
"loss": 0.7341,
|
1664 |
+
"step": 4260
|
1665 |
+
},
|
1666 |
+
{
|
1667 |
+
"epoch": 47.03,
|
1668 |
+
"learning_rate": 2.6764705882352942e-05,
|
1669 |
+
"loss": 0.7293,
|
1670 |
+
"step": 4280
|
1671 |
+
},
|
1672 |
+
{
|
1673 |
+
"epoch": 47.25,
|
1674 |
+
"learning_rate": 2.480392156862745e-05,
|
1675 |
+
"loss": 0.7194,
|
1676 |
+
"step": 4300
|
1677 |
+
},
|
1678 |
+
{
|
1679 |
+
"epoch": 47.25,
|
1680 |
+
"eval_loss": 0.14928147196769714,
|
1681 |
+
"eval_runtime": 189.625,
|
1682 |
+
"eval_samples_per_second": 25.54,
|
1683 |
+
"eval_steps_per_second": 0.802,
|
1684 |
+
"eval_wer": 0.12693726937269373,
|
1685 |
+
"step": 4300
|
1686 |
}
|
1687 |
],
|
1688 |
"max_steps": 4550,
|
1689 |
"num_train_epochs": 50,
|
1690 |
+
"total_flos": 6.648181588185401e+19,
|
1691 |
"trial_name": null,
|
1692 |
"trial_params": null
|
1693 |
}
|
{checkpoint-3900 β checkpoint-4300}/training_args.bin
RENAMED
File without changes
|