Training in progress, step 450000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32d6b2d970f75095751adce24594786eea007b4c44c83f30a9c2314f7b4634dd
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:128323e6efc0784d1ae855208c6f17812d2791f58fb09c2e01be83a6275dd1bb
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07c51da117422beee1c074764110e77c2c84b8b8ac8d05c4e87e5bd25a7d1d2d
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30d6d8103ee83eb5301573bf580b0300eab2ea72aae5431e580c914a5f1483a7
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b550faa7c45bd79b4fa38739ea8fe0691d6c22fdc4c64ee9593c9e453bc0635
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dce7ecee3c954df2c01b66048701fe01eff0f093b566e92c04621aaa230c32c
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c43864f1c86a25925bd4e5ad2caa29376f314976c325992d08953697257bb696
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b2c73d56d919e8d68942bd061c63ae476de04684578b2bc07b2b33643233986
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5346,6 +5346,66 @@
|
|
5346 |
"learning_rate": 1.4164010000703365e-05,
|
5347 |
"loss": 0.319,
|
5348 |
"step": 445000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5349 |
}
|
5350 |
],
|
5351 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.659568854733151,
|
5 |
+
"global_step": 450000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5346 |
"learning_rate": 1.4164010000703365e-05,
|
5347 |
"loss": 0.319,
|
5348 |
"step": 445000
|
5349 |
+
},
|
5350 |
+
{
|
5351 |
+
"epoch": 7.58,
|
5352 |
+
"learning_rate": 1.4089619085357035e-05,
|
5353 |
+
"loss": 0.3189,
|
5354 |
+
"step": 445500
|
5355 |
+
},
|
5356 |
+
{
|
5357 |
+
"epoch": 7.59,
|
5358 |
+
"learning_rate": 1.4015878678861224e-05,
|
5359 |
+
"loss": 0.319,
|
5360 |
+
"step": 446000
|
5361 |
+
},
|
5362 |
+
{
|
5363 |
+
"epoch": 7.6,
|
5364 |
+
"learning_rate": 1.3942789509003995e-05,
|
5365 |
+
"loss": 0.3191,
|
5366 |
+
"step": 446500
|
5367 |
+
},
|
5368 |
+
{
|
5369 |
+
"epoch": 7.61,
|
5370 |
+
"learning_rate": 1.3870640746343366e-05,
|
5371 |
+
"loss": 0.319,
|
5372 |
+
"step": 447000
|
5373 |
+
},
|
5374 |
+
{
|
5375 |
+
"epoch": 7.62,
|
5376 |
+
"learning_rate": 1.3798853595303208e-05,
|
5377 |
+
"loss": 0.3189,
|
5378 |
+
"step": 447500
|
5379 |
+
},
|
5380 |
+
{
|
5381 |
+
"epoch": 7.63,
|
5382 |
+
"learning_rate": 1.3727719822851586e-05,
|
5383 |
+
"loss": 0.3191,
|
5384 |
+
"step": 448000
|
5385 |
+
},
|
5386 |
+
{
|
5387 |
+
"epoch": 7.63,
|
5388 |
+
"learning_rate": 1.365724013105011e-05,
|
5389 |
+
"loss": 0.3191,
|
5390 |
+
"step": 448500
|
5391 |
+
},
|
5392 |
+
{
|
5393 |
+
"epoch": 7.64,
|
5394 |
+
"learning_rate": 1.3587415215504886e-05,
|
5395 |
+
"loss": 0.3193,
|
5396 |
+
"step": 449000
|
5397 |
+
},
|
5398 |
+
{
|
5399 |
+
"epoch": 7.65,
|
5400 |
+
"learning_rate": 1.3518245765359632e-05,
|
5401 |
+
"loss": 0.3188,
|
5402 |
+
"step": 449500
|
5403 |
+
},
|
5404 |
+
{
|
5405 |
+
"epoch": 7.66,
|
5406 |
+
"learning_rate": 1.3449732463288913e-05,
|
5407 |
+
"loss": 0.3191,
|
5408 |
+
"step": 450000
|
5409 |
}
|
5410 |
],
|
5411 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:128323e6efc0784d1ae855208c6f17812d2791f58fb09c2e01be83a6275dd1bb
|
3 |
size 201355195
|