Training in progress, step 380000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8272af4ddd50ac3c03d82b6371903a8e2d3b850546bd45a9f8b94a3453985af
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28618062dba6742c58cd97862917d0e5cce7292726a4d65b550febad17e88fa8
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5de7856e29d324fe8b2d7a8039c54b4faff384ee2fadaf89719b8ee47b48cf0c
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0dd029a7e6530ace2ca7d9672c2c2caa1be55bd585e32a5581428856929e03f
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57d4b43fb88447123981547f4327ac577192580fb4ced52d9deda508c9b8d482
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81980e5ddbb31a0d475d91342667e7ec7c33d09f8b11bc51c09d140f2402de83
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcf90e1e79ee4934f7f14472b5dc245979542d20b81316a586ad390ebbd17f9c
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3230a86e29e9aa1af252bbac77fbb562b1bc880e11ef24d2344026ff504c2e85
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4506,6 +4506,66 @@
|
|
4506 |
"learning_rate": 3.054826368875238e-05,
|
4507 |
"loss": 0.3212,
|
4508 |
"step": 375000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4509 |
}
|
4510 |
],
|
4511 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.4680811227138495,
|
5 |
+
"global_step": 380000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4506 |
"learning_rate": 3.054826368875238e-05,
|
4507 |
"loss": 0.3212,
|
4508 |
"step": 375000
|
4509 |
+
},
|
4510 |
+
{
|
4511 |
+
"epoch": 6.39,
|
4512 |
+
"learning_rate": 3.0392863517601306e-05,
|
4513 |
+
"loss": 0.3212,
|
4514 |
+
"step": 375500
|
4515 |
+
},
|
4516 |
+
{
|
4517 |
+
"epoch": 6.4,
|
4518 |
+
"learning_rate": 3.0237952948860104e-05,
|
4519 |
+
"loss": 0.3208,
|
4520 |
+
"step": 376000
|
4521 |
+
},
|
4522 |
+
{
|
4523 |
+
"epoch": 6.41,
|
4524 |
+
"learning_rate": 3.0083841859145522e-05,
|
4525 |
+
"loss": 0.3209,
|
4526 |
+
"step": 376500
|
4527 |
+
},
|
4528 |
+
{
|
4529 |
+
"epoch": 6.42,
|
4530 |
+
"learning_rate": 2.9929914090262566e-05,
|
4531 |
+
"loss": 0.3209,
|
4532 |
+
"step": 377000
|
4533 |
+
},
|
4534 |
+
{
|
4535 |
+
"epoch": 6.43,
|
4536 |
+
"learning_rate": 2.977648049291346e-05,
|
4537 |
+
"loss": 0.3209,
|
4538 |
+
"step": 377500
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 6.43,
|
4542 |
+
"learning_rate": 2.9623542581425794e-05,
|
4543 |
+
"loss": 0.321,
|
4544 |
+
"step": 378000
|
4545 |
+
},
|
4546 |
+
{
|
4547 |
+
"epoch": 6.44,
|
4548 |
+
"learning_rate": 2.947110186523508e-05,
|
4549 |
+
"loss": 0.3209,
|
4550 |
+
"step": 378500
|
4551 |
+
},
|
4552 |
+
{
|
4553 |
+
"epoch": 6.45,
|
4554 |
+
"learning_rate": 2.9319159848869626e-05,
|
4555 |
+
"loss": 0.3209,
|
4556 |
+
"step": 379000
|
4557 |
+
},
|
4558 |
+
{
|
4559 |
+
"epoch": 6.46,
|
4560 |
+
"learning_rate": 2.9167718031935745e-05,
|
4561 |
+
"loss": 0.3207,
|
4562 |
+
"step": 379500
|
4563 |
+
},
|
4564 |
+
{
|
4565 |
+
"epoch": 6.47,
|
4566 |
+
"learning_rate": 2.9016777909103095e-05,
|
4567 |
+
"loss": 0.3207,
|
4568 |
+
"step": 380000
|
4569 |
}
|
4570 |
],
|
4571 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28618062dba6742c58cd97862917d0e5cce7292726a4d65b550febad17e88fa8
|
3 |
size 201355195
|