Training in progress, step 60000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402587859
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc4b5c38d548341be3fc6afd8378b62fca364b9a262d5e49b6efc3fc65030ea5
|
3 |
size 402587859
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee27d0a5d8910b44eff2a0ce02526c87e97539b95bf5d100d30d6d417b3f746f
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b9e46153a153464357c8d7cc6f8af8240f1f9bd785168adecd2070b9a4aeb7f
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2debf9f408eebd277828e5d3a2c83beeed24c922197846fbb46de266f4c73c50
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0ffd2b212e3bb97c9d1959bc46c09d8be6d1062b4e9a4863f950b2dc626889
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10efae63f5e57622a929379da87dd8d685840f8517ee8618d05f88b888c166c2
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c5b2d0eac6380d6179c7b005df331b38821e3646464c4f2ddf0b4dc67ec2f03
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4618e81b42373dc5f5648c0a2c9f74dfcdbe40964e20a6736201f60748e5ad2a
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -666,6 +666,66 @@
|
|
666 |
"learning_rate": 0.0001458644840671227,
|
667 |
"loss": 0.3617,
|
668 |
"step": 55000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
669 |
}
|
670 |
],
|
671 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0212765957446808,
|
5 |
+
"global_step": 60000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
666 |
"learning_rate": 0.0001458644840671227,
|
667 |
"loss": 0.3617,
|
668 |
"step": 55000
|
669 |
+
},
|
670 |
+
{
|
671 |
+
"epoch": 0.94,
|
672 |
+
"learning_rate": 0.0001457896915039746,
|
673 |
+
"loss": 0.3617,
|
674 |
+
"step": 55500
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 0.95,
|
678 |
+
"learning_rate": 0.00014571424962313165,
|
679 |
+
"loss": 0.3613,
|
680 |
+
"step": 56000
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 0.96,
|
684 |
+
"learning_rate": 0.0001456381591691749,
|
685 |
+
"loss": 0.3612,
|
686 |
+
"step": 56500
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"epoch": 0.97,
|
690 |
+
"learning_rate": 0.0001455614208930863,
|
691 |
+
"loss": 0.3613,
|
692 |
+
"step": 57000
|
693 |
+
},
|
694 |
+
{
|
695 |
+
"epoch": 0.98,
|
696 |
+
"learning_rate": 0.00014548403555224174,
|
697 |
+
"loss": 0.3608,
|
698 |
+
"step": 57500
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"epoch": 0.99,
|
702 |
+
"learning_rate": 0.0001454061606181845,
|
703 |
+
"loss": 0.3608,
|
704 |
+
"step": 58000
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 1.0,
|
708 |
+
"learning_rate": 0.00014532748473578173,
|
709 |
+
"loss": 0.3606,
|
710 |
+
"step": 58500
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 1.0,
|
714 |
+
"learning_rate": 0.0001452481640974784,
|
715 |
+
"loss": 0.3606,
|
716 |
+
"step": 59000
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 1.01,
|
720 |
+
"learning_rate": 0.00014516819948613718,
|
721 |
+
"loss": 0.3603,
|
722 |
+
"step": 59500
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 1.02,
|
726 |
+
"learning_rate": 0.0001450875916909765,
|
727 |
+
"loss": 0.3597,
|
728 |
+
"step": 60000
|
729 |
}
|
730 |
],
|
731 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee27d0a5d8910b44eff2a0ce02526c87e97539b95bf5d100d30d6d417b3f746f
|
3 |
size 201355195
|