Training in progress, step 140000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88502487669ed48c5b81d5668e89156ca00d459ffec5303442cc864943528e45
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df9892519dbd6116b68b1382f6bef13a8948e72a1f7cdfd140a44813466337bd
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:518d0c98c26e535dd659b44ad27605870644d0174fe82b5b955b8c75913da3f5
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dbd3b927f188de94763ae6dc5d2ab91c1f2f3e8c7e56ca840acc9f783d0c6d4
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75dda0801167aeaa5255929f5576e748485109db796bcd6a0701c192fcac2bdc
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b3fe0f7ed2afce033c8d24222ffcba0f571c8b93d6124174a974b413ce6e4cd
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43c0a8099e9a7e853a472bfc2b42aadd31fdf32da1dafad4f35b19f8984441e1
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a764d29a3460312afa48d75bfde217931b9b606545018d900461c5290efd3e7
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1626,6 +1626,66 @@
|
|
1626 |
"learning_rate": 0.00012630898373767156,
|
1627 |
"loss": 0.342,
|
1628 |
"step": 135000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1629 |
}
|
1630 |
],
|
1631 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.382978723404255,
|
5 |
+
"global_step": 140000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1626 |
"learning_rate": 0.00012630898373767156,
|
1627 |
"loss": 0.342,
|
1628 |
"step": 135000
|
1629 |
+
},
|
1630 |
+
{
|
1631 |
+
"epoch": 2.31,
|
1632 |
+
"learning_rate": 0.00012614417570078125,
|
1633 |
+
"loss": 0.3419,
|
1634 |
+
"step": 135500
|
1635 |
+
},
|
1636 |
+
{
|
1637 |
+
"epoch": 2.31,
|
1638 |
+
"learning_rate": 0.00012597858241938955,
|
1639 |
+
"loss": 0.3418,
|
1640 |
+
"step": 136000
|
1641 |
+
},
|
1642 |
+
{
|
1643 |
+
"epoch": 2.32,
|
1644 |
+
"learning_rate": 0.00012581253534795165,
|
1645 |
+
"loss": 0.3414,
|
1646 |
+
"step": 136500
|
1647 |
+
},
|
1648 |
+
{
|
1649 |
+
"epoch": 2.33,
|
1650 |
+
"learning_rate": 0.00012564603612528512,
|
1651 |
+
"loss": 0.3415,
|
1652 |
+
"step": 137000
|
1653 |
+
},
|
1654 |
+
{
|
1655 |
+
"epoch": 2.34,
|
1656 |
+
"learning_rate": 0.0001254790863946701,
|
1657 |
+
"loss": 0.3413,
|
1658 |
+
"step": 137500
|
1659 |
+
},
|
1660 |
+
{
|
1661 |
+
"epoch": 2.35,
|
1662 |
+
"learning_rate": 0.000125311687803833,
|
1663 |
+
"loss": 0.3412,
|
1664 |
+
"step": 138000
|
1665 |
+
},
|
1666 |
+
{
|
1667 |
+
"epoch": 2.36,
|
1668 |
+
"learning_rate": 0.00012514384200493036,
|
1669 |
+
"loss": 0.3414,
|
1670 |
+
"step": 138500
|
1671 |
+
},
|
1672 |
+
{
|
1673 |
+
"epoch": 2.37,
|
1674 |
+
"learning_rate": 0.00012497588768079038,
|
1675 |
+
"loss": 0.3414,
|
1676 |
+
"step": 139000
|
1677 |
+
},
|
1678 |
+
{
|
1679 |
+
"epoch": 2.37,
|
1680 |
+
"learning_rate": 0.00012480715332598534,
|
1681 |
+
"loss": 0.3411,
|
1682 |
+
"step": 139500
|
1683 |
+
},
|
1684 |
+
{
|
1685 |
+
"epoch": 2.38,
|
1686 |
+
"learning_rate": 0.00012463797674266635,
|
1687 |
+
"loss": 0.3409,
|
1688 |
+
"step": 140000
|
1689 |
}
|
1690 |
],
|
1691 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df9892519dbd6116b68b1382f6bef13a8948e72a1f7cdfd140a44813466337bd
|
3 |
size 201355195
|