Training in progress, step 135000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:928c3746f9382b349fc28e80d04b5a20158395105d5217c3005ba37255c3ca77
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5110cd9554e14b21510435e745dab095b01a66799fa13e3282560ca1e6c45499
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0b1514696acbea535d08573a1b2d3b31938bfb72bfeab4eb11f6d2373802be4
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8499b7e9d1a8145db60bfc50cf0edd03d252ca1ec45abe3f044e258613563f47
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61efa3d107ffd4c591af8bc6122fbe036d89c50d1c5b1e286246a90e3c68104b
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5e1eec7327c005bf9146420cdca2e2d7329d76afea935314537cbb31fdce8bb
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ef237e3efe3b0130f216c0e5b4fc7b0739fb6103cc4ce8392328f95e9b9e2c6
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a2aef2a9ec660505c33a59d4501ad8e14372a86baef2bf2887bd89652ca5ea7
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1566,6 +1566,66 @@
|
|
1566 |
"learning_rate": 0.00012793432590746354,
|
1567 |
"loss": 0.3432,
|
1568 |
"step": 130000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1569 |
}
|
1570 |
],
|
1571 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.297872340425532,
|
5 |
+
"global_step": 135000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1566 |
"learning_rate": 0.00012793432590746354,
|
1567 |
"loss": 0.3432,
|
1568 |
"step": 130000
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 2.22,
|
1572 |
+
"learning_rate": 0.00012777382835231195,
|
1573 |
+
"loss": 0.343,
|
1574 |
+
"step": 130500
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 2.23,
|
1578 |
+
"learning_rate": 0.0001276128592887616,
|
1579 |
+
"loss": 0.3429,
|
1580 |
+
"step": 131000
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 2.24,
|
1584 |
+
"learning_rate": 0.00012745174365140025,
|
1585 |
+
"loss": 0.3427,
|
1586 |
+
"step": 131500
|
1587 |
+
},
|
1588 |
+
{
|
1589 |
+
"epoch": 2.25,
|
1590 |
+
"learning_rate": 0.00012728983727684876,
|
1591 |
+
"loss": 0.3426,
|
1592 |
+
"step": 132000
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"epoch": 2.26,
|
1596 |
+
"learning_rate": 0.00012712746417069506,
|
1597 |
+
"loss": 0.3425,
|
1598 |
+
"step": 132500
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 2.26,
|
1602 |
+
"learning_rate": 0.00012696462593549614,
|
1603 |
+
"loss": 0.3426,
|
1604 |
+
"step": 133000
|
1605 |
+
},
|
1606 |
+
{
|
1607 |
+
"epoch": 2.27,
|
1608 |
+
"learning_rate": 0.0001268013241783996,
|
1609 |
+
"loss": 0.3421,
|
1610 |
+
"step": 133500
|
1611 |
+
},
|
1612 |
+
{
|
1613 |
+
"epoch": 2.28,
|
1614 |
+
"learning_rate": 0.00012663756051112788,
|
1615 |
+
"loss": 0.3421,
|
1616 |
+
"step": 134000
|
1617 |
+
},
|
1618 |
+
{
|
1619 |
+
"epoch": 2.29,
|
1620 |
+
"learning_rate": 0.00012647333654996226,
|
1621 |
+
"loss": 0.3419,
|
1622 |
+
"step": 134500
|
1623 |
+
},
|
1624 |
+
{
|
1625 |
+
"epoch": 2.3,
|
1626 |
+
"learning_rate": 0.00012630898373767156,
|
1627 |
+
"loss": 0.342,
|
1628 |
+
"step": 135000
|
1629 |
}
|
1630 |
],
|
1631 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5110cd9554e14b21510435e745dab095b01a66799fa13e3282560ca1e6c45499
|
3 |
size 201355195
|