HealthTeam
commited on
Commit
•
582b51f
1
Parent(s):
1fcdde7
Training in progress, step 77336
Browse files- last-checkpoint/generation_config.json +1 -0
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +144 -3
- pytorch_model.bin +1 -1
- runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0 +2 -2
last-checkpoint/generation_config.json
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
|
|
5 |
"pad_token_id": 0,
|
6 |
"transformers_version": "4.26.0"
|
7 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
+
"max_length": 300,
|
6 |
"pad_token_id": 0,
|
7 |
"transformers_version": "4.26.0"
|
8 |
}
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2401461637
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6df0004473716224f08e7366a4b13550fc06f34aec0352a5c2d2d7d5164597a3
|
3 |
size 2401461637
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87b0bb2564b2dab9e02aac0e077c1e06181afd486b00577a15c4a5029f68ed7c
|
3 |
size 1200739717
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1c41e482802bc97fdd0ddd478f81f890ac7879380e8dd605815c3f44c2761cf
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60fdcc494b0281edea8ff71092a9259fd16c44c328d0dafd2fc4eb4dda344861
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -798,11 +798,152 @@
|
|
798 |
"learning_rate": 1.3454523816607659e-05,
|
799 |
"loss": 3.013,
|
800 |
"step": 66000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
801 |
}
|
802 |
],
|
803 |
"max_steps": 201666,
|
804 |
"num_train_epochs": 3,
|
805 |
-
"total_flos":
|
806 |
"trial_name": null,
|
807 |
"trial_params": null
|
808 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.150456695724614,
|
5 |
+
"global_step": 77336,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
798 |
"learning_rate": 1.3454523816607659e-05,
|
799 |
"loss": 3.013,
|
800 |
"step": 66000
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 0.99,
|
804 |
+
"learning_rate": 1.3404936875824384e-05,
|
805 |
+
"loss": 3.0027,
|
806 |
+
"step": 66500
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"epoch": 1.0,
|
810 |
+
"learning_rate": 1.3355349935041108e-05,
|
811 |
+
"loss": 3.0155,
|
812 |
+
"step": 67000
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"epoch": 1.0,
|
816 |
+
"eval_bleu": 11.298551127218651,
|
817 |
+
"eval_loss": 2.3749005794525146,
|
818 |
+
"eval_runtime": 4929.9601,
|
819 |
+
"eval_samples_per_second": 8.201,
|
820 |
+
"eval_steps_per_second": 0.513,
|
821 |
+
"step": 67222
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"epoch": 1.0,
|
825 |
+
"learning_rate": 1.3305762994257834e-05,
|
826 |
+
"loss": 3.0195,
|
827 |
+
"step": 67500
|
828 |
+
},
|
829 |
+
{
|
830 |
+
"epoch": 1.01,
|
831 |
+
"learning_rate": 1.3256176053474558e-05,
|
832 |
+
"loss": 2.9924,
|
833 |
+
"step": 68000
|
834 |
+
},
|
835 |
+
{
|
836 |
+
"epoch": 1.02,
|
837 |
+
"learning_rate": 1.3206589112691284e-05,
|
838 |
+
"loss": 2.997,
|
839 |
+
"step": 68500
|
840 |
+
},
|
841 |
+
{
|
842 |
+
"epoch": 1.03,
|
843 |
+
"learning_rate": 1.3157002171908007e-05,
|
844 |
+
"loss": 2.9694,
|
845 |
+
"step": 69000
|
846 |
+
},
|
847 |
+
{
|
848 |
+
"epoch": 1.03,
|
849 |
+
"learning_rate": 1.3107415231124732e-05,
|
850 |
+
"loss": 2.9804,
|
851 |
+
"step": 69500
|
852 |
+
},
|
853 |
+
{
|
854 |
+
"epoch": 1.04,
|
855 |
+
"learning_rate": 1.3057828290341456e-05,
|
856 |
+
"loss": 2.9879,
|
857 |
+
"step": 70000
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 1.05,
|
861 |
+
"learning_rate": 1.3008241349558182e-05,
|
862 |
+
"loss": 2.9919,
|
863 |
+
"step": 70500
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"epoch": 1.06,
|
867 |
+
"learning_rate": 1.2958654408774906e-05,
|
868 |
+
"loss": 2.9875,
|
869 |
+
"step": 71000
|
870 |
+
},
|
871 |
+
{
|
872 |
+
"epoch": 1.06,
|
873 |
+
"learning_rate": 1.2909067467991632e-05,
|
874 |
+
"loss": 2.9912,
|
875 |
+
"step": 71500
|
876 |
+
},
|
877 |
+
{
|
878 |
+
"epoch": 1.07,
|
879 |
+
"learning_rate": 1.2859480527208354e-05,
|
880 |
+
"loss": 2.974,
|
881 |
+
"step": 72000
|
882 |
+
},
|
883 |
+
{
|
884 |
+
"epoch": 1.08,
|
885 |
+
"learning_rate": 1.280989358642508e-05,
|
886 |
+
"loss": 2.9581,
|
887 |
+
"step": 72500
|
888 |
+
},
|
889 |
+
{
|
890 |
+
"epoch": 1.09,
|
891 |
+
"learning_rate": 1.2760306645641804e-05,
|
892 |
+
"loss": 2.975,
|
893 |
+
"step": 73000
|
894 |
+
},
|
895 |
+
{
|
896 |
+
"epoch": 1.09,
|
897 |
+
"learning_rate": 1.271071970485853e-05,
|
898 |
+
"loss": 2.9737,
|
899 |
+
"step": 73500
|
900 |
+
},
|
901 |
+
{
|
902 |
+
"epoch": 1.1,
|
903 |
+
"learning_rate": 1.2661132764075254e-05,
|
904 |
+
"loss": 2.9722,
|
905 |
+
"step": 74000
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"epoch": 1.11,
|
909 |
+
"learning_rate": 1.261154582329198e-05,
|
910 |
+
"loss": 2.9727,
|
911 |
+
"step": 74500
|
912 |
+
},
|
913 |
+
{
|
914 |
+
"epoch": 1.12,
|
915 |
+
"learning_rate": 1.2561958882508702e-05,
|
916 |
+
"loss": 2.9618,
|
917 |
+
"step": 75000
|
918 |
+
},
|
919 |
+
{
|
920 |
+
"epoch": 1.12,
|
921 |
+
"learning_rate": 1.2512371941725428e-05,
|
922 |
+
"loss": 2.9554,
|
923 |
+
"step": 75500
|
924 |
+
},
|
925 |
+
{
|
926 |
+
"epoch": 1.13,
|
927 |
+
"learning_rate": 1.2462785000942152e-05,
|
928 |
+
"loss": 2.961,
|
929 |
+
"step": 76000
|
930 |
+
},
|
931 |
+
{
|
932 |
+
"epoch": 1.14,
|
933 |
+
"learning_rate": 1.2413198060158878e-05,
|
934 |
+
"loss": 2.9627,
|
935 |
+
"step": 76500
|
936 |
+
},
|
937 |
+
{
|
938 |
+
"epoch": 1.15,
|
939 |
+
"learning_rate": 1.2363611119375602e-05,
|
940 |
+
"loss": 2.9896,
|
941 |
+
"step": 77000
|
942 |
}
|
943 |
],
|
944 |
"max_steps": 201666,
|
945 |
"num_train_epochs": 3,
|
946 |
+
"total_flos": 9.093214173619814e+16,
|
947 |
"trial_name": null,
|
948 |
"trial_params": null
|
949 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87b0bb2564b2dab9e02aac0e077c1e06181afd486b00577a15c4a5029f68ed7c
|
3 |
size 1200739717
|
runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:643354f90e73662cc3dab47e0d1c8ae7366fff16e187d6ca8273a9b221c47a70
|
3 |
+
size 28986
|