Training in progress, step 340000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1952eccd1be36050273a3da169f01e0dcbd93385719e227e8c0cbf385370e94
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:005415e0c0dc00c20985d74df3229860c964b56ec86f30c18482131d19de5443
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41ff4c3eaf719ecfb0226f2fa4ff973839d5df45f9040ace407fc862e82c30fc
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67e3fcc5578cced47b73cf878e1cea5c7f6167fcd2db90c612f1ae1e97fdb741
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9942ce5551e0fb777874b0ea1017aec1feda23a16ba01648615cf7898b5b1f2
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78b1fb7071ab3f8c0b03b32aaab037844b6c970d6d6bd97f7de516429bae95aa
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5570cbca9105b2caec7d33a540aca9f5afd3920d293c8c6ddfc4a69132c812c3
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2867fa09c46e5a8c081072e928df1dbe92f2c7c2721b17178bbbbb782816fba4
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4026,6 +4026,66 @@
|
|
4026 |
"learning_rate": 4.4416327413955116e-05,
|
4027 |
"loss": 0.3231,
|
4028 |
"step": 335000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4029 |
}
|
4030 |
],
|
4031 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.787227342746019,
|
5 |
+
"global_step": 340000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4026 |
"learning_rate": 4.4416327413955116e-05,
|
4027 |
"loss": 0.3231,
|
4028 |
"step": 335000
|
4029 |
+
},
|
4030 |
+
{
|
4031 |
+
"epoch": 5.71,
|
4032 |
+
"learning_rate": 4.4227502944687974e-05,
|
4033 |
+
"loss": 0.3231,
|
4034 |
+
"step": 335500
|
4035 |
+
},
|
4036 |
+
{
|
4037 |
+
"epoch": 5.72,
|
4038 |
+
"learning_rate": 4.403865277795774e-05,
|
4039 |
+
"loss": 0.3229,
|
4040 |
+
"step": 336000
|
4041 |
+
},
|
4042 |
+
{
|
4043 |
+
"epoch": 5.73,
|
4044 |
+
"learning_rate": 4.385015753520643e-05,
|
4045 |
+
"loss": 0.3231,
|
4046 |
+
"step": 336500
|
4047 |
+
},
|
4048 |
+
{
|
4049 |
+
"epoch": 5.74,
|
4050 |
+
"learning_rate": 4.366201907680594e-05,
|
4051 |
+
"loss": 0.3229,
|
4052 |
+
"step": 337000
|
4053 |
+
},
|
4054 |
+
{
|
4055 |
+
"epoch": 5.74,
|
4056 |
+
"learning_rate": 4.3474614460084984e-05,
|
4057 |
+
"loss": 0.3228,
|
4058 |
+
"step": 337500
|
4059 |
+
},
|
4060 |
+
{
|
4061 |
+
"epoch": 5.75,
|
4062 |
+
"learning_rate": 4.328719441456219e-05,
|
4063 |
+
"loss": 0.3227,
|
4064 |
+
"step": 338000
|
4065 |
+
},
|
4066 |
+
{
|
4067 |
+
"epoch": 5.76,
|
4068 |
+
"learning_rate": 4.310013670960897e-05,
|
4069 |
+
"loss": 0.323,
|
4070 |
+
"step": 338500
|
4071 |
+
},
|
4072 |
+
{
|
4073 |
+
"epoch": 5.77,
|
4074 |
+
"learning_rate": 4.291344319140932e-05,
|
4075 |
+
"loss": 0.323,
|
4076 |
+
"step": 339000
|
4077 |
+
},
|
4078 |
+
{
|
4079 |
+
"epoch": 5.78,
|
4080 |
+
"learning_rate": 4.272711570255294e-05,
|
4081 |
+
"loss": 0.3229,
|
4082 |
+
"step": 339500
|
4083 |
+
},
|
4084 |
+
{
|
4085 |
+
"epoch": 5.79,
|
4086 |
+
"learning_rate": 4.254115608201689e-05,
|
4087 |
+
"loss": 0.3228,
|
4088 |
+
"step": 340000
|
4089 |
}
|
4090 |
],
|
4091 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:005415e0c0dc00c20985d74df3229860c964b56ec86f30c18482131d19de5443
|
3 |
size 201355195
|