Training in progress, step 175000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd48b74ac1e83833752e1a90bba5608cc1040bf624f4eac82239aba5a9238261
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd14386b0a649570c4d20a23842f7cf3b08d541c3d4b898951a36f146b09ab68
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2348a48dcae284e3bf0c4419f1eea9e78d3eef8b466335308875b9d862c8d4b1
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48d4abc3f2899f95311dbce965fd7563d9bdfb75cedf302c2d6fbc196e24dd8e
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aff782f4812b791b760dc647ac3fa41d358c36cf6c112c7cfd80ff3953fbd063
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38531ecfec32b4f297fe204a04355aff6cc1b483e1e12ef7f2e8e8035519573b
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a183546506b409eaad8c229377dbaa51518042a7de70abf5671d9a48e04957b
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5d0f109ace001d7b1f30324beb3a2855f010a09846e498b98d99bc13aaefb31
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2046,6 +2046,66 @@
|
|
2046 |
"learning_rate": 0.00011374780666152555,
|
2047 |
"loss": 0.3353,
|
2048 |
"step": 170000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2049 |
}
|
2050 |
],
|
2051 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.978723404255319,
|
5 |
+
"global_step": 175000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2046 |
"learning_rate": 0.00011374780666152555,
|
2047 |
"loss": 0.3353,
|
2048 |
"step": 170000
|
2049 |
+
},
|
2050 |
+
{
|
2051 |
+
"epoch": 2.9,
|
2052 |
+
"learning_rate": 0.00011355497381985935,
|
2053 |
+
"loss": 0.3352,
|
2054 |
+
"step": 170500
|
2055 |
+
},
|
2056 |
+
{
|
2057 |
+
"epoch": 2.91,
|
2058 |
+
"learning_rate": 0.00011336180980414825,
|
2059 |
+
"loss": 0.3354,
|
2060 |
+
"step": 171000
|
2061 |
+
},
|
2062 |
+
{
|
2063 |
+
"epoch": 2.92,
|
2064 |
+
"learning_rate": 0.00011316831652084308,
|
2065 |
+
"loss": 0.3352,
|
2066 |
+
"step": 171500
|
2067 |
+
},
|
2068 |
+
{
|
2069 |
+
"epoch": 2.93,
|
2070 |
+
"learning_rate": 0.00011297488384635912,
|
2071 |
+
"loss": 0.3355,
|
2072 |
+
"step": 172000
|
2073 |
+
},
|
2074 |
+
{
|
2075 |
+
"epoch": 2.94,
|
2076 |
+
"learning_rate": 0.00011278073840917717,
|
2077 |
+
"loss": 0.3352,
|
2078 |
+
"step": 172500
|
2079 |
+
},
|
2080 |
+
{
|
2081 |
+
"epoch": 2.94,
|
2082 |
+
"learning_rate": 0.00011258626943934124,
|
2083 |
+
"loss": 0.336,
|
2084 |
+
"step": 173000
|
2085 |
+
},
|
2086 |
+
{
|
2087 |
+
"epoch": 2.95,
|
2088 |
+
"learning_rate": 0.00011239147885618158,
|
2089 |
+
"loss": 0.335,
|
2090 |
+
"step": 173500
|
2091 |
+
},
|
2092 |
+
{
|
2093 |
+
"epoch": 2.96,
|
2094 |
+
"learning_rate": 0.00011219636858220254,
|
2095 |
+
"loss": 0.3349,
|
2096 |
+
"step": 174000
|
2097 |
+
},
|
2098 |
+
{
|
2099 |
+
"epoch": 2.97,
|
2100 |
+
"learning_rate": 0.00011200094054306383,
|
2101 |
+
"loss": 0.3347,
|
2102 |
+
"step": 174500
|
2103 |
+
},
|
2104 |
+
{
|
2105 |
+
"epoch": 2.98,
|
2106 |
+
"learning_rate": 0.00011180519666756124,
|
2107 |
+
"loss": 0.3343,
|
2108 |
+
"step": 175000
|
2109 |
}
|
2110 |
],
|
2111 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd14386b0a649570c4d20a23842f7cf3b08d541c3d4b898951a36f146b09ab68
|
3 |
size 201355195
|