Training in progress, step 140000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:246979d5fb8eafccea5b5b91baae8bc0bcfd128b46162e9026ec7bcabe9d443b
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:add991c0656ba83939eaaaa5f8472350d3e0f2500ee6f7a6cf4cbc6fed800e2a
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb807b3bd2b0081f655c2878080eee0250b1f4da08f23efe5c475f80f0a6d077
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:817351e8cd65944628650439198ba63b920c38ccf45a9d7f01ba40b94c4adc2b
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ae332eaeccc83788bfc926c63f6dad62054cb8aeb99782ab70d3f40e234e1d0
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d13c03d6e862e36539ae520905e6386d61dfa184e5f6b5682d9a5a5d25d5411e
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a6cf35e4618cace687ad98a9d09ef37a3a00cc3a2f5e3fd4cc50724db355378
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447e23aed23375686188e382af4e706666a1c19c5099e0889d79d882f5a031a1
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1626,6 +1626,66 @@
|
|
1626 |
"learning_rate": 0.00012630964337607617,
|
1627 |
"loss": 0.3459,
|
1628 |
"step": 135000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1629 |
}
|
1630 |
],
|
1631 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.382975464038604,
|
5 |
+
"global_step": 140000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1626 |
"learning_rate": 0.00012630964337607617,
|
1627 |
"loss": 0.3459,
|
1628 |
"step": 135000
|
1629 |
+
},
|
1630 |
+
{
|
1631 |
+
"epoch": 2.31,
|
1632 |
+
"learning_rate": 0.00012614483716050272,
|
1633 |
+
"loss": 0.3456,
|
1634 |
+
"step": 135500
|
1635 |
+
},
|
1636 |
+
{
|
1637 |
+
"epoch": 2.31,
|
1638 |
+
"learning_rate": 0.0001259792456975559,
|
1639 |
+
"loss": 0.3459,
|
1640 |
+
"step": 136000
|
1641 |
+
},
|
1642 |
+
{
|
1643 |
+
"epoch": 2.32,
|
1644 |
+
"learning_rate": 0.00012581320043801664,
|
1645 |
+
"loss": 0.3456,
|
1646 |
+
"step": 136500
|
1647 |
+
},
|
1648 |
+
{
|
1649 |
+
"epoch": 2.33,
|
1650 |
+
"learning_rate": 0.00012564670302068462,
|
1651 |
+
"loss": 0.3457,
|
1652 |
+
"step": 137000
|
1653 |
+
},
|
1654 |
+
{
|
1655 |
+
"epoch": 2.34,
|
1656 |
+
"learning_rate": 0.00012547975508882204,
|
1657 |
+
"loss": 0.3452,
|
1658 |
+
"step": 137500
|
1659 |
+
},
|
1660 |
+
{
|
1661 |
+
"epoch": 2.35,
|
1662 |
+
"learning_rate": 0.00012531235829013772,
|
1663 |
+
"loss": 0.3452,
|
1664 |
+
"step": 138000
|
1665 |
+
},
|
1666 |
+
{
|
1667 |
+
"epoch": 2.36,
|
1668 |
+
"learning_rate": 0.00012514451427677039,
|
1669 |
+
"loss": 0.3451,
|
1670 |
+
"step": 138500
|
1671 |
+
},
|
1672 |
+
{
|
1673 |
+
"epoch": 2.37,
|
1674 |
+
"learning_rate": 0.0001249762247052727,
|
1675 |
+
"loss": 0.3446,
|
1676 |
+
"step": 139000
|
1677 |
+
},
|
1678 |
+
{
|
1679 |
+
"epoch": 2.37,
|
1680 |
+
"learning_rate": 0.00012480749123659483,
|
1681 |
+
"loss": 0.3445,
|
1682 |
+
"step": 139500
|
1683 |
+
},
|
1684 |
+
{
|
1685 |
+
"epoch": 2.38,
|
1686 |
+
"learning_rate": 0.00012463865432770735,
|
1687 |
+
"loss": 0.3455,
|
1688 |
+
"step": 140000
|
1689 |
}
|
1690 |
],
|
1691 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:add991c0656ba83939eaaaa5f8472350d3e0f2500ee6f7a6cf4cbc6fed800e2a
|
3 |
size 201355195
|