Training in progress, step 300000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ce18f0748d256d97c3ffa8fd0dca4af493eeb80a1e3c680178d29c9ddddf382
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:674942f3a22b761c6237130c7c1c65ef4ae9c18ea51c2e94629488e89aee1787
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79632fe258ae9bf0159b29eb8f75054c68c6d067b87a65bb1e5ce3d8111e8b99
|
3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30038ba0292443919b51298718047b6773e6d7e41921e311f8e57713dd04b046
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6c3448ae7122fabd57f3afe7b464dea056b6f69fb30306ac34573daa9a6e577
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2edb4ed6c12c84109dcab5c035479f8f5be0a6003c13da100daa3666a0ffbb98
|
3 |
size 14439
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09aaf2baea19ad11e593b229d87541fdc460178a4ce9400acb0896ac15b65425
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2152,11 +2152,85 @@
|
|
2152 |
"eval_samples_per_second": 985.898,
|
2153 |
"eval_steps_per_second": 15.774,
|
2154 |
"step": 290000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2155 |
}
|
2156 |
],
|
2157 |
"max_steps": 1000000,
|
2158 |
"num_train_epochs": 16,
|
2159 |
-
"total_flos": 2.
|
2160 |
"trial_name": null,
|
2161 |
"trial_params": null
|
2162 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.5810618901461355,
|
5 |
+
"global_step": 300000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2152 |
"eval_samples_per_second": 985.898,
|
2153 |
"eval_steps_per_second": 15.774,
|
2154 |
"step": 290000
|
2155 |
+
},
|
2156 |
+
{
|
2157 |
+
"epoch": 4.44,
|
2158 |
+
"learning_rate": 0.00012892127400441228,
|
2159 |
+
"loss": 0.2923,
|
2160 |
+
"step": 291000
|
2161 |
+
},
|
2162 |
+
{
|
2163 |
+
"epoch": 4.46,
|
2164 |
+
"learning_rate": 0.00012875543811453576,
|
2165 |
+
"loss": 0.2919,
|
2166 |
+
"step": 292000
|
2167 |
+
},
|
2168 |
+
{
|
2169 |
+
"epoch": 4.47,
|
2170 |
+
"learning_rate": 0.0001285890690429993,
|
2171 |
+
"loss": 0.2931,
|
2172 |
+
"step": 293000
|
2173 |
+
},
|
2174 |
+
{
|
2175 |
+
"epoch": 4.49,
|
2176 |
+
"learning_rate": 0.00012842216860918846,
|
2177 |
+
"loss": 0.292,
|
2178 |
+
"step": 294000
|
2179 |
+
},
|
2180 |
+
{
|
2181 |
+
"epoch": 4.5,
|
2182 |
+
"learning_rate": 0.0001282547386382996,
|
2183 |
+
"loss": 0.2914,
|
2184 |
+
"step": 295000
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 4.5,
|
2188 |
+
"eval_runtime": 1.0329,
|
2189 |
+
"eval_samples_per_second": 968.135,
|
2190 |
+
"eval_steps_per_second": 15.49,
|
2191 |
+
"step": 295000
|
2192 |
+
},
|
2193 |
+
{
|
2194 |
+
"epoch": 4.52,
|
2195 |
+
"learning_rate": 0.0001280867809613201,
|
2196 |
+
"loss": 0.2919,
|
2197 |
+
"step": 296000
|
2198 |
+
},
|
2199 |
+
{
|
2200 |
+
"epoch": 4.54,
|
2201 |
+
"learning_rate": 0.0001279182974150082,
|
2202 |
+
"loss": 0.2915,
|
2203 |
+
"step": 297000
|
2204 |
+
},
|
2205 |
+
{
|
2206 |
+
"epoch": 4.55,
|
2207 |
+
"learning_rate": 0.00012774928984187297,
|
2208 |
+
"loss": 0.2914,
|
2209 |
+
"step": 298000
|
2210 |
+
},
|
2211 |
+
{
|
2212 |
+
"epoch": 4.57,
|
2213 |
+
"learning_rate": 0.00012757976009015413,
|
2214 |
+
"loss": 0.2908,
|
2215 |
+
"step": 299000
|
2216 |
+
},
|
2217 |
+
{
|
2218 |
+
"epoch": 4.58,
|
2219 |
+
"learning_rate": 0.0001274097100138019,
|
2220 |
+
"loss": 0.2909,
|
2221 |
+
"step": 300000
|
2222 |
+
},
|
2223 |
+
{
|
2224 |
+
"epoch": 4.58,
|
2225 |
+
"eval_runtime": 1.0054,
|
2226 |
+
"eval_samples_per_second": 994.612,
|
2227 |
+
"eval_steps_per_second": 15.914,
|
2228 |
+
"step": 300000
|
2229 |
}
|
2230 |
],
|
2231 |
"max_steps": 1000000,
|
2232 |
"num_train_epochs": 16,
|
2233 |
+
"total_flos": 2.1030052026378193e+22,
|
2234 |
"trial_name": null,
|
2235 |
"trial_params": null
|
2236 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:674942f3a22b761c6237130c7c1c65ef4ae9c18ea51c2e94629488e89aee1787
|
3 |
size 449471589
|