Training in progress, step 80000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81b7562bdb5f8faadd0a5b65c7785e2e90c7a490a2d517760b2fd3790ff21fa6
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5acd3a5ba17eb21f71e4b3d447f414429a59c91a0f7e46be8a0dd35e859d16b8
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6ac1dc083f6d9f681e725e424bbc3a537f99007bc80e9ec5d3de1580b2efd7e
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3f57b3e9d8a73989fe9797603663e6bb728be62b005d78dcfad9151e45cd55d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e43b64a44eef5630d42417f02e38b9fb28b7598ada2873cd67e62b74ecb5ef79
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d92421316a9dfafb1a555182d77547876e8df4fda5a4cc89de3afac2777482f9
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf4bafa60b699a6f31d50ef65345a6d8036c7593d5739566caa2305a2ecf2172
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba4175cf663e1b18eb62a532a65b4ed6e0d23646843ce2ef7c2dd44a7e4a8f86
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -906,6 +906,66 @@
|
|
906 |
"learning_rate": 0.0001423762462458303,
|
907 |
"loss": 0.3552,
|
908 |
"step": 75000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
909 |
}
|
910 |
],
|
911 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.3616990493697925,
|
5 |
+
"global_step": 80000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
906 |
"learning_rate": 0.0001423762462458303,
|
907 |
"loss": 0.3552,
|
908 |
"step": 75000
|
909 |
+
},
|
910 |
+
{
|
911 |
+
"epoch": 1.29,
|
912 |
+
"learning_rate": 0.00014227613657531694,
|
913 |
+
"loss": 0.3552,
|
914 |
+
"step": 75500
|
915 |
+
},
|
916 |
+
{
|
917 |
+
"epoch": 1.29,
|
918 |
+
"learning_rate": 0.00014217541226447747,
|
919 |
+
"loss": 0.3551,
|
920 |
+
"step": 76000
|
921 |
+
},
|
922 |
+
{
|
923 |
+
"epoch": 1.3,
|
924 |
+
"learning_rate": 0.00014207427759508945,
|
925 |
+
"loss": 0.3552,
|
926 |
+
"step": 76500
|
927 |
+
},
|
928 |
+
{
|
929 |
+
"epoch": 1.31,
|
930 |
+
"learning_rate": 0.000141972328216269,
|
931 |
+
"loss": 0.355,
|
932 |
+
"step": 77000
|
933 |
+
},
|
934 |
+
{
|
935 |
+
"epoch": 1.32,
|
936 |
+
"learning_rate": 0.0001418697671955883,
|
937 |
+
"loss": 0.3546,
|
938 |
+
"step": 77500
|
939 |
+
},
|
940 |
+
{
|
941 |
+
"epoch": 1.33,
|
942 |
+
"learning_rate": 0.00014176659554528318,
|
943 |
+
"loss": 0.3546,
|
944 |
+
"step": 78000
|
945 |
+
},
|
946 |
+
{
|
947 |
+
"epoch": 1.34,
|
948 |
+
"learning_rate": 0.00014166302245385192,
|
949 |
+
"loss": 0.3544,
|
950 |
+
"step": 78500
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"epoch": 1.34,
|
954 |
+
"learning_rate": 0.00014155863382125022,
|
955 |
+
"loss": 0.3545,
|
956 |
+
"step": 79000
|
957 |
+
},
|
958 |
+
{
|
959 |
+
"epoch": 1.35,
|
960 |
+
"learning_rate": 0.00014145363762978493,
|
961 |
+
"loss": 0.3544,
|
962 |
+
"step": 79500
|
963 |
+
},
|
964 |
+
{
|
965 |
+
"epoch": 1.36,
|
966 |
+
"learning_rate": 0.00014134803491572606,
|
967 |
+
"loss": 0.3542,
|
968 |
+
"step": 80000
|
969 |
}
|
970 |
],
|
971 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5acd3a5ba17eb21f71e4b3d447f414429a59c91a0f7e46be8a0dd35e859d16b8
|
3 |
size 201355195
|