Training in progress, step 60000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402587859
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d302bf22eca1e4732e76e4759fe19e28495149e2095cdab3dce2c47bf8d2bb46
|
3 |
size 402587859
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d37299971102b1c1b0e646047331964f3eb595cb33cc0fdef07f684e77f9162
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e55e23c4e1f887c9f1345271542c47ce9ac44e2fef2f011e2ff85da53dfa2c6f
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c2fdf27a79988277c8d3169488ede467a177478605d47e7e52b351b8e657b3a
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1bed720d434ccc847c646a079dd4c941bced304e648f5b59b29662d2aeb27b2
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77e625925d7d0ce335c783ed65f9b317cdd3cb968c914553c9a03122a8334de2
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2771a8d3ab3170d491280c6aea7da27d1f1112f2bb51953316682b038024e312
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da0621269a0c8516389df36f0cafeeb0cd188ee5b752e9ef1f1d20da526a2fbe
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -666,6 +666,66 @@
|
|
666 |
"learning_rate": 0.00014586478193255307,
|
667 |
"loss": 0.3617,
|
668 |
"step": 55000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
669 |
}
|
670 |
],
|
671 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0212764146688114,
|
5 |
+
"global_step": 60000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
666 |
"learning_rate": 0.00014586478193255307,
|
667 |
"loss": 0.3617,
|
668 |
"step": 55000
|
669 |
+
},
|
670 |
+
{
|
671 |
+
"epoch": 0.94,
|
672 |
+
"learning_rate": 0.00014578999196816224,
|
673 |
+
"loss": 0.3616,
|
674 |
+
"step": 55500
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 0.95,
|
678 |
+
"learning_rate": 0.00014571455268311119,
|
679 |
+
"loss": 0.361,
|
680 |
+
"step": 56000
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 0.96,
|
684 |
+
"learning_rate": 0.00014563861764445836,
|
685 |
+
"loss": 0.3613,
|
686 |
+
"step": 56500
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"epoch": 0.97,
|
690 |
+
"learning_rate": 0.00014556188325305048,
|
691 |
+
"loss": 0.3607,
|
692 |
+
"step": 57000
|
693 |
+
},
|
694 |
+
{
|
695 |
+
"epoch": 0.98,
|
696 |
+
"learning_rate": 0.00014548450179232327,
|
697 |
+
"loss": 0.3606,
|
698 |
+
"step": 57500
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"epoch": 0.99,
|
702 |
+
"learning_rate": 0.00014540647402600055,
|
703 |
+
"loss": 0.3607,
|
704 |
+
"step": 58000
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 1.0,
|
708 |
+
"learning_rate": 0.00014532780072418487,
|
709 |
+
"loss": 0.3602,
|
710 |
+
"step": 58500
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 1.0,
|
714 |
+
"learning_rate": 0.00014524864194242183,
|
715 |
+
"loss": 0.3607,
|
716 |
+
"step": 59000
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 1.01,
|
720 |
+
"learning_rate": 0.00014516868119257174,
|
721 |
+
"loss": 0.3599,
|
722 |
+
"step": 59500
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 1.02,
|
726 |
+
"learning_rate": 0.00014508823910339927,
|
727 |
+
"loss": 0.3599,
|
728 |
+
"step": 60000
|
729 |
}
|
730 |
],
|
731 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d37299971102b1c1b0e646047331964f3eb595cb33cc0fdef07f684e77f9162
|
3 |
size 201355195
|