AmberYifan
commited on
Training in progress, step 496, checkpoint
Browse files- last-checkpoint/global_step496/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step496/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step496/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step496/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step496/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step496/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step496/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step496/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00003.safetensors +1 -1
- last-checkpoint/model-00002-of-00003.safetensors +1 -1
- last-checkpoint/model-00003-of-00003.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +108 -2
last-checkpoint/global_step496/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b4cdd6b81f94e65310be6e1e52106d46f10ee558c06bf6c0d6718424b78dcdf
|
3 |
+
size 14483467880
|
last-checkpoint/global_step496/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c4bf14393e6ec22d3bd790bc9a39f0558c740b337d5ac0384c18a648a8b4861
|
3 |
+
size 14483467880
|
last-checkpoint/global_step496/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:230415a467393c078f8a710e70bd7085157d82eac70296b38027f02b45a0a871
|
3 |
+
size 14483467880
|
last-checkpoint/global_step496/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0135fcbe40d8d2f4416732065cc5ffa88e0c4082e840242dede0a294c1eca318
|
3 |
+
size 14483467880
|
last-checkpoint/global_step496/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a030eba951bd1aa25c9bd76a5990de7b6d75a6368b31c0ba89883264969207b6
|
3 |
+
size 150629
|
last-checkpoint/global_step496/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4b57c7fd218d93bd682196f95281ed3fd2b880cd8f0d694a77f56a3d6bfffc9
|
3 |
+
size 150629
|
last-checkpoint/global_step496/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98f058fa5f757d8597733073ed6b09257eae70b11212ecb083d55284229c4c17
|
3 |
+
size 150629
|
last-checkpoint/global_step496/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adb9ffc4c9f6b664766500e42a24fdd1c76dc6ac1947006a448eb0cbd34a8955
|
3 |
+
size 150629
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step496
|
last-checkpoint/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9a7a17d4560692c6bbac7a5803c9ef7966094cd085d51b14cecd1df2d86e64a
|
3 |
size 4943162336
|
last-checkpoint/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20ac5c3151e609b424576af7297e14857b8b7da6100a1a7704e41612dd7bc42
|
3 |
size 4999819336
|
last-checkpoint/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fbc99432985410ff6c43ff6c0882c1950b2b1368324dc2ad9f035c1fb4e9ac4
|
3 |
size 4540516344
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6115ef4a3972dc95f8b5f42d8b349e0290ad816095675fd656394e06736eed54
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8698a8fe21a6c40aa81c13fa783f7b480800d0e1c8b8bbfc0f7d795856c9a9d
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22995e1828e61a6ae15a2c3ebe4ca0a2b8b2daa337082a4762176b1ca46f3953
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49f599372909f19fb45797a334f06b5100cf004b284caa24cedf789d1047c5c7
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:239793f1a741f8e5a4de788cdc2c78f5187e67d38900b549f3e1a188c27ddb9c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 62,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -779,6 +779,112 @@
|
|
779 |
"eval_samples_per_second": 5.359,
|
780 |
"eval_steps_per_second": 0.348,
|
781 |
"step": 434
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
782 |
}
|
783 |
],
|
784 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.5872000000000002,
|
5 |
"eval_steps": 62,
|
6 |
+
"global_step": 496,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
779 |
"eval_samples_per_second": 5.359,
|
780 |
"eval_steps_per_second": 0.348,
|
781 |
"step": 434
|
782 |
+
},
|
783 |
+
{
|
784 |
+
"epoch": 1.408,
|
785 |
+
"grad_norm": 59.86596162814969,
|
786 |
+
"learning_rate": 2.9453681710213776e-07,
|
787 |
+
"logits/generated": -2.5212674140930176,
|
788 |
+
"logits/real": -2.6616861820220947,
|
789 |
+
"logps/generated": -124.64134216308594,
|
790 |
+
"logps/real": -119.5413818359375,
|
791 |
+
"loss": 0.4341,
|
792 |
+
"rewards/accuracies": 0.9375,
|
793 |
+
"rewards/generated": 0.546046257019043,
|
794 |
+
"rewards/margins": 2.7997288703918457,
|
795 |
+
"rewards/real": 3.3457748889923096,
|
796 |
+
"step": 440
|
797 |
+
},
|
798 |
+
{
|
799 |
+
"epoch": 1.44,
|
800 |
+
"grad_norm": 25.921657044180346,
|
801 |
+
"learning_rate": 2.885985748218527e-07,
|
802 |
+
"logits/generated": -2.4825379848480225,
|
803 |
+
"logits/real": -2.5802297592163086,
|
804 |
+
"logps/generated": -134.5606231689453,
|
805 |
+
"logps/real": -103.09733581542969,
|
806 |
+
"loss": 0.2921,
|
807 |
+
"rewards/accuracies": 0.949999988079071,
|
808 |
+
"rewards/generated": 0.3280000686645508,
|
809 |
+
"rewards/margins": 3.0266566276550293,
|
810 |
+
"rewards/real": 3.35465669631958,
|
811 |
+
"step": 450
|
812 |
+
},
|
813 |
+
{
|
814 |
+
"epoch": 1.472,
|
815 |
+
"grad_norm": 60.488510196254985,
|
816 |
+
"learning_rate": 2.8266033254156767e-07,
|
817 |
+
"logits/generated": -2.463170289993286,
|
818 |
+
"logits/real": -2.5204989910125732,
|
819 |
+
"logps/generated": -117.78157043457031,
|
820 |
+
"logps/real": -94.84367370605469,
|
821 |
+
"loss": 0.4117,
|
822 |
+
"rewards/accuracies": 0.9624999761581421,
|
823 |
+
"rewards/generated": 0.7249609231948853,
|
824 |
+
"rewards/margins": 2.5204477310180664,
|
825 |
+
"rewards/real": 3.245408535003662,
|
826 |
+
"step": 460
|
827 |
+
},
|
828 |
+
{
|
829 |
+
"epoch": 1.504,
|
830 |
+
"grad_norm": 42.83019835762152,
|
831 |
+
"learning_rate": 2.7672209026128263e-07,
|
832 |
+
"logits/generated": -2.474457263946533,
|
833 |
+
"logits/real": -2.513317584991455,
|
834 |
+
"logps/generated": -133.95883178710938,
|
835 |
+
"logps/real": -112.54573059082031,
|
836 |
+
"loss": 0.3241,
|
837 |
+
"rewards/accuracies": 0.887499988079071,
|
838 |
+
"rewards/generated": -0.05931330472230911,
|
839 |
+
"rewards/margins": 2.9567954540252686,
|
840 |
+
"rewards/real": 2.897481918334961,
|
841 |
+
"step": 470
|
842 |
+
},
|
843 |
+
{
|
844 |
+
"epoch": 1.536,
|
845 |
+
"grad_norm": 55.17709070072599,
|
846 |
+
"learning_rate": 2.7078384798099764e-07,
|
847 |
+
"logits/generated": -2.442061185836792,
|
848 |
+
"logits/real": -2.411407947540283,
|
849 |
+
"logps/generated": -124.24302673339844,
|
850 |
+
"logps/real": -105.8341293334961,
|
851 |
+
"loss": 0.3585,
|
852 |
+
"rewards/accuracies": 0.8999999761581421,
|
853 |
+
"rewards/generated": 0.2026868313550949,
|
854 |
+
"rewards/margins": 2.592716932296753,
|
855 |
+
"rewards/real": 2.7954039573669434,
|
856 |
+
"step": 480
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 1.568,
|
860 |
+
"grad_norm": 38.62958702650794,
|
861 |
+
"learning_rate": 2.648456057007126e-07,
|
862 |
+
"logits/generated": -2.338289737701416,
|
863 |
+
"logits/real": -2.3997902870178223,
|
864 |
+
"logps/generated": -128.10601806640625,
|
865 |
+
"logps/real": -111.6781234741211,
|
866 |
+
"loss": 0.4053,
|
867 |
+
"rewards/accuracies": 0.9375,
|
868 |
+
"rewards/generated": -0.04702238366007805,
|
869 |
+
"rewards/margins": 3.0088653564453125,
|
870 |
+
"rewards/real": 2.961843490600586,
|
871 |
+
"step": 490
|
872 |
+
},
|
873 |
+
{
|
874 |
+
"epoch": 1.5872000000000002,
|
875 |
+
"eval_logits/generated": -2.403900623321533,
|
876 |
+
"eval_logits/real": -2.4069132804870605,
|
877 |
+
"eval_logps/generated": -104.69697570800781,
|
878 |
+
"eval_logps/real": -115.92328643798828,
|
879 |
+
"eval_loss": 0.7082696557044983,
|
880 |
+
"eval_rewards/accuracies": 0.6730769276618958,
|
881 |
+
"eval_rewards/generated": 1.6618565320968628,
|
882 |
+
"eval_rewards/margins": 0.6372315287590027,
|
883 |
+
"eval_rewards/real": 2.2990880012512207,
|
884 |
+
"eval_runtime": 37.8337,
|
885 |
+
"eval_samples_per_second": 5.286,
|
886 |
+
"eval_steps_per_second": 0.344,
|
887 |
+
"step": 496
|
888 |
}
|
889 |
],
|
890 |
"logging_steps": 10,
|