HealthTeam commited on
Commit
582b51f
1 Parent(s): 1fcdde7

Training in progress, step 77336

Browse files
last-checkpoint/generation_config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
 
5
  "pad_token_id": 0,
6
  "transformers_version": "4.26.0"
7
  }
 
2
  "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
+ "max_length": 300,
6
  "pad_token_id": 0,
7
  "transformers_version": "4.26.0"
8
  }
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2992361efcf4770d4c6602bb67d93320a187287e7fc6dc44519cbf03c5dd8d1
3
  size 2401461637
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6df0004473716224f08e7366a4b13550fc06f34aec0352a5c2d2d7d5164597a3
3
  size 2401461637
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4071ac5bc82e1943cce385333b946af39ce630c568c0ddebfc504245d7448e1
3
  size 1200739717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b0bb2564b2dab9e02aac0e077c1e06181afd486b00577a15c4a5029f68ed7c
3
  size 1200739717
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e52dd293318f93d0d24ab2680c6a46204bbcb9dee0ba0954189329cd5f7d5e2e
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c41e482802bc97fdd0ddd478f81f890ac7879380e8dd605815c3f44c2761cf
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c0312d2c26f50db24e7fa24aa7f3be59f0d2b84dcf88829a4f490d4d99de93a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fdcc494b0281edea8ff71092a9259fd16c44c328d0dafd2fc4eb4dda344861
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9861057391925263,
5
- "global_step": 66288,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -798,11 +798,152 @@
798
  "learning_rate": 1.3454523816607659e-05,
799
  "loss": 3.013,
800
  "step": 66000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
  }
802
  ],
803
  "max_steps": 201666,
804
  "num_train_epochs": 3,
805
- "total_flos": 7.793968305851597e+16,
806
  "trial_name": null,
807
  "trial_params": null
808
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.150456695724614,
5
+ "global_step": 77336,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
798
  "learning_rate": 1.3454523816607659e-05,
799
  "loss": 3.013,
800
  "step": 66000
801
+ },
802
+ {
803
+ "epoch": 0.99,
804
+ "learning_rate": 1.3404936875824384e-05,
805
+ "loss": 3.0027,
806
+ "step": 66500
807
+ },
808
+ {
809
+ "epoch": 1.0,
810
+ "learning_rate": 1.3355349935041108e-05,
811
+ "loss": 3.0155,
812
+ "step": 67000
813
+ },
814
+ {
815
+ "epoch": 1.0,
816
+ "eval_bleu": 11.298551127218651,
817
+ "eval_loss": 2.3749005794525146,
818
+ "eval_runtime": 4929.9601,
819
+ "eval_samples_per_second": 8.201,
820
+ "eval_steps_per_second": 0.513,
821
+ "step": 67222
822
+ },
823
+ {
824
+ "epoch": 1.0,
825
+ "learning_rate": 1.3305762994257834e-05,
826
+ "loss": 3.0195,
827
+ "step": 67500
828
+ },
829
+ {
830
+ "epoch": 1.01,
831
+ "learning_rate": 1.3256176053474558e-05,
832
+ "loss": 2.9924,
833
+ "step": 68000
834
+ },
835
+ {
836
+ "epoch": 1.02,
837
+ "learning_rate": 1.3206589112691284e-05,
838
+ "loss": 2.997,
839
+ "step": 68500
840
+ },
841
+ {
842
+ "epoch": 1.03,
843
+ "learning_rate": 1.3157002171908007e-05,
844
+ "loss": 2.9694,
845
+ "step": 69000
846
+ },
847
+ {
848
+ "epoch": 1.03,
849
+ "learning_rate": 1.3107415231124732e-05,
850
+ "loss": 2.9804,
851
+ "step": 69500
852
+ },
853
+ {
854
+ "epoch": 1.04,
855
+ "learning_rate": 1.3057828290341456e-05,
856
+ "loss": 2.9879,
857
+ "step": 70000
858
+ },
859
+ {
860
+ "epoch": 1.05,
861
+ "learning_rate": 1.3008241349558182e-05,
862
+ "loss": 2.9919,
863
+ "step": 70500
864
+ },
865
+ {
866
+ "epoch": 1.06,
867
+ "learning_rate": 1.2958654408774906e-05,
868
+ "loss": 2.9875,
869
+ "step": 71000
870
+ },
871
+ {
872
+ "epoch": 1.06,
873
+ "learning_rate": 1.2909067467991632e-05,
874
+ "loss": 2.9912,
875
+ "step": 71500
876
+ },
877
+ {
878
+ "epoch": 1.07,
879
+ "learning_rate": 1.2859480527208354e-05,
880
+ "loss": 2.974,
881
+ "step": 72000
882
+ },
883
+ {
884
+ "epoch": 1.08,
885
+ "learning_rate": 1.280989358642508e-05,
886
+ "loss": 2.9581,
887
+ "step": 72500
888
+ },
889
+ {
890
+ "epoch": 1.09,
891
+ "learning_rate": 1.2760306645641804e-05,
892
+ "loss": 2.975,
893
+ "step": 73000
894
+ },
895
+ {
896
+ "epoch": 1.09,
897
+ "learning_rate": 1.271071970485853e-05,
898
+ "loss": 2.9737,
899
+ "step": 73500
900
+ },
901
+ {
902
+ "epoch": 1.1,
903
+ "learning_rate": 1.2661132764075254e-05,
904
+ "loss": 2.9722,
905
+ "step": 74000
906
+ },
907
+ {
908
+ "epoch": 1.11,
909
+ "learning_rate": 1.261154582329198e-05,
910
+ "loss": 2.9727,
911
+ "step": 74500
912
+ },
913
+ {
914
+ "epoch": 1.12,
915
+ "learning_rate": 1.2561958882508702e-05,
916
+ "loss": 2.9618,
917
+ "step": 75000
918
+ },
919
+ {
920
+ "epoch": 1.12,
921
+ "learning_rate": 1.2512371941725428e-05,
922
+ "loss": 2.9554,
923
+ "step": 75500
924
+ },
925
+ {
926
+ "epoch": 1.13,
927
+ "learning_rate": 1.2462785000942152e-05,
928
+ "loss": 2.961,
929
+ "step": 76000
930
+ },
931
+ {
932
+ "epoch": 1.14,
933
+ "learning_rate": 1.2413198060158878e-05,
934
+ "loss": 2.9627,
935
+ "step": 76500
936
+ },
937
+ {
938
+ "epoch": 1.15,
939
+ "learning_rate": 1.2363611119375602e-05,
940
+ "loss": 2.9896,
941
+ "step": 77000
942
  }
943
  ],
944
  "max_steps": 201666,
945
  "num_train_epochs": 3,
946
+ "total_flos": 9.093214173619814e+16,
947
  "trial_name": null,
948
  "trial_params": null
949
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4071ac5bc82e1943cce385333b946af39ce630c568c0ddebfc504245d7448e1
3
  size 1200739717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b0bb2564b2dab9e02aac0e077c1e06181afd486b00577a15c4a5029f68ed7c
3
  size 1200739717
runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16d94d99a4243783f2c4751209b7f4a0753cff0f06dbbcb869acc91381ecb224
3
- size 25141
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643354f90e73662cc3dab47e0d1c8ae7366fff16e187d6ca8273a9b221c47a70
3
+ size 28986