Training in progress, step 145000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27aa7f64a5668997df91945c7aa7132d08b2680aaa25cff1f1eb6757be6962d1
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e29202b72e61f42c98bc25f38f94ee98f77b85c4c4bbb0df8266a8f3f8636208
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bebfad7a42fafd0d6329fb78b3b96185038870b68e01ba3866bf4bcbf7fadf51
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d162d01ddde70767c9bd5e336a2499ccf3d2396e7d0b7bd1aba9c1d926c83412
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1534c814ec2aa6d32a209f8a0be527741a8fd36e30d675e3804369bc95727fb
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c493cb7808c8c84c1bd1c4ad27d29f6c3927fd2c0837f8164a91bfa81d20be30
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:075e346807569ede9b90b05bbb65e7966f610fc696d43bda85981de735e64f8e
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b40afc71d1da5a92824e2304336140e2b33816aaeeea8304b470df2b5a3f51c5
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1686,6 +1686,66 @@
|
|
1686 |
"learning_rate": 0.00012463865432770735,
|
1687 |
"loss": 0.3455,
|
1688 |
"step": 140000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1689 |
}
|
1690 |
],
|
1691 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.468081122713849,
|
5 |
+
"global_step": 145000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1686 |
"learning_rate": 0.00012463865432770735,
|
1687 |
"loss": 0.3455,
|
1688 |
"step": 140000
|
1689 |
+
},
|
1690 |
+
{
|
1691 |
+
"epoch": 2.39,
|
1692 |
+
"learning_rate": 0.00012446903894448242,
|
1693 |
+
"loss": 0.345,
|
1694 |
+
"step": 140500
|
1695 |
+
},
|
1696 |
+
{
|
1697 |
+
"epoch": 2.4,
|
1698 |
+
"learning_rate": 0.00012429898466979599,
|
1699 |
+
"loss": 0.3447,
|
1700 |
+
"step": 141000
|
1701 |
+
},
|
1702 |
+
{
|
1703 |
+
"epoch": 2.41,
|
1704 |
+
"learning_rate": 0.0001241284931820151,
|
1705 |
+
"loss": 0.3445,
|
1706 |
+
"step": 141500
|
1707 |
+
},
|
1708 |
+
{
|
1709 |
+
"epoch": 2.42,
|
1710 |
+
"learning_rate": 0.00012395756616382189,
|
1711 |
+
"loss": 0.3446,
|
1712 |
+
"step": 142000
|
1713 |
+
},
|
1714 |
+
{
|
1715 |
+
"epoch": 2.43,
|
1716 |
+
"learning_rate": 0.0001237865484557726,
|
1717 |
+
"loss": 0.3444,
|
1718 |
+
"step": 142500
|
1719 |
+
},
|
1720 |
+
{
|
1721 |
+
"epoch": 2.43,
|
1722 |
+
"learning_rate": 0.00012361475630459222,
|
1723 |
+
"loss": 0.3441,
|
1724 |
+
"step": 143000
|
1725 |
+
},
|
1726 |
+
{
|
1727 |
+
"epoch": 2.44,
|
1728 |
+
"learning_rate": 0.0001234428785670676,
|
1729 |
+
"loss": 0.3443,
|
1730 |
+
"step": 143500
|
1731 |
+
},
|
1732 |
+
{
|
1733 |
+
"epoch": 2.45,
|
1734 |
+
"learning_rate": 0.00012327022805140274,
|
1735 |
+
"loss": 0.3438,
|
1736 |
+
"step": 144000
|
1737 |
+
},
|
1738 |
+
{
|
1739 |
+
"epoch": 2.46,
|
1740 |
+
"learning_rate": 0.00012309715047605592,
|
1741 |
+
"loss": 0.3437,
|
1742 |
+
"step": 144500
|
1743 |
+
},
|
1744 |
+
{
|
1745 |
+
"epoch": 2.47,
|
1746 |
+
"learning_rate": 0.00012292364754923295,
|
1747 |
+
"loss": 0.3444,
|
1748 |
+
"step": 145000
|
1749 |
}
|
1750 |
],
|
1751 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e29202b72e61f42c98bc25f38f94ee98f77b85c4c4bbb0df8266a8f3f8636208
|
3 |
size 201355195
|