Training in progress, step 135000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21589503afb0fef95e8f3b2bd8ae8067dd6059ea26d4940a5858feccb2dc09b9
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe37f4eadc7811d56cab6644cc6cecca4c4f3f8b8cd0ae53e901ecf848307e9d
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f6e1b9bb7574047d1306ab0dec5378b927030e938d1c9c3c6a6103d2c3fbd9f
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29cad83ae9d13af3dbf9af309eece67d85dc134b641cd23576994ba705b54e28
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff7641e9294a56ec6863ac85382b6fc460e378f80bec45d159a07984a8b1164e
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69163afa634f38dec9dc5ca9165317709354ed8ee036c0e4e4d1a9c7d4f5a860
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:730a32589488f10ec0da4ea011ebc8abc623060b6f6c6b9f2b7973a9807b83df
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b461c0c13e01c35a65562aa813b4c4797d5f391521a87866d0e89d7c2ae072bd
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1566,6 +1566,66 @@
|
|
1566 |
"learning_rate": 0.000127934966948769,
|
1567 |
"loss": 0.3466,
|
1568 |
"step": 130000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1569 |
}
|
1570 |
],
|
1571 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.2978698053633586,
|
5 |
+
"global_step": 135000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1566 |
"learning_rate": 0.000127934966948769,
|
1567 |
"loss": 0.3466,
|
1568 |
"step": 130000
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 2.22,
|
1572 |
+
"learning_rate": 0.00012777447128283487,
|
1573 |
+
"loss": 0.3464,
|
1574 |
+
"step": 130500
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 2.23,
|
1578 |
+
"learning_rate": 0.0001276135041021565,
|
1579 |
+
"loss": 0.3462,
|
1580 |
+
"step": 131000
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 2.24,
|
1584 |
+
"learning_rate": 0.00012745239033755645,
|
1585 |
+
"loss": 0.3467,
|
1586 |
+
"step": 131500
|
1587 |
+
},
|
1588 |
+
{
|
1589 |
+
"epoch": 2.25,
|
1590 |
+
"learning_rate": 0.0001272908101084899,
|
1591 |
+
"loss": 0.3464,
|
1592 |
+
"step": 132000
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"epoch": 2.26,
|
1596 |
+
"learning_rate": 0.0001271284397979516,
|
1597 |
+
"loss": 0.3463,
|
1598 |
+
"step": 132500
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 2.26,
|
1602 |
+
"learning_rate": 0.000126965604348739,
|
1603 |
+
"loss": 0.3463,
|
1604 |
+
"step": 133000
|
1605 |
+
},
|
1606 |
+
{
|
1607 |
+
"epoch": 2.27,
|
1608 |
+
"learning_rate": 0.00012680230536797228,
|
1609 |
+
"loss": 0.3461,
|
1610 |
+
"step": 133500
|
1611 |
+
},
|
1612 |
+
{
|
1613 |
+
"epoch": 2.28,
|
1614 |
+
"learning_rate": 0.0001266385444673464,
|
1615 |
+
"loss": 0.3458,
|
1616 |
+
"step": 134000
|
1617 |
+
},
|
1618 |
+
{
|
1619 |
+
"epoch": 2.29,
|
1620 |
+
"learning_rate": 0.00012647432326311537,
|
1621 |
+
"loss": 0.3459,
|
1622 |
+
"step": 134500
|
1623 |
+
},
|
1624 |
+
{
|
1625 |
+
"epoch": 2.3,
|
1626 |
+
"learning_rate": 0.00012630964337607617,
|
1627 |
+
"loss": 0.3459,
|
1628 |
+
"step": 135000
|
1629 |
}
|
1630 |
],
|
1631 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe37f4eadc7811d56cab6644cc6cecca4c4f3f8b8cd0ae53e901ecf848307e9d
|
3 |
size 201355195
|