plip commited on
Commit
82521fb
·
1 Parent(s): 9baf744

Training in progress, step 450000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2088799669b8cbfd210d30dfce8989698daa830165d0ca944114a10a59871693
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7cf25044f894af33b58ee33e839ca67a9010ce216694a7f4c91e8f90caf02e3
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a7b0c7d08046c0c4e1a91c3f9d63b1d7bdf393ee4dbbd4736af162b969bdfb9
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f48e297dd8d43fe26955c08bdc18374b43b5c5c7bf58df74b63ccfce891d130
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7e233a8ef519d9d16a9b95d7a7171d6def7872349c82be02599284c8c6acffd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0eaf38f8d76dc97fc60763011f1de34c7a2cb3c95faaa0610cc0f4af72cd60
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be16f8037c6901bc41919bcc8cb1118de728673c68c18b0ae386bea63c0e3e3d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aee1e0607f38b87a3b735ae98b8e01339f7cc72ffa6ccf3c213d28824ed54a9
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be92d6d899a07c4671d400b60ab95a700562ae501c22a4fee8fa76e7589e996f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cceb842179d613224b3b5f8d750e75368fc012474b9befae3962586a3fa07c34
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8770268f899c830767a9d97c2302203be584e1c1cc37d9570826bfabc17c0fa2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e777f0df33e0c44b8c16c09cacb56ca419e02a262aa3b9ece5534f0249ba6105
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cc40f97d6fc7a436b02585a6f0d5affbb2d92e52b230fd202e6e0bc79392801
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ab8fd99918dee712abc63025e4d8d70437de212ab6324f5ae0cbe74ed24f94
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:593a229cfaacb8e002eb9c5c553875fab30c88f8d710b8359f369800ad7eef0c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef227953f09853ae7340813887aac1a30150643cccf7844f37d1f0ff5cb9042d
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d4f52680b07fe7330a884622b4d2a5e39ee242550e8d535f6458b2b4ab42b35
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7caa6c821a93c7fa5649d2fba3a2ebec3c3cd1a1620660f06157dc5569333b5a
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3181d4c08032a76433b86390db671ed6c1b5ecc3604448dc1685a0d499892b2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0e57d5d9717a7b27c7acd3b2ffc6fc1518aa4d2ebf016d3b2d036634f60df0
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1894f32b2441ea8820978bbb44f8f2d9ce0a579e669301efbd1655378591798a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.480805171727075,
5
- "global_step": 440000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8806,11 +8806,211 @@
8806
  "eval_samples_per_second": 1931.767,
8807
  "eval_steps_per_second": 30.908,
8808
  "step": 440000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8809
  }
8810
  ],
8811
  "max_steps": 500000,
8812
  "num_train_epochs": 16,
8813
- "total_flos": 1.4057330262702314e+22,
8814
  "trial_name": null,
8815
  "trial_params": null
8816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.787187107448144,
5
+ "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8806
  "eval_samples_per_second": 1931.767,
8807
  "eval_steps_per_second": 30.908,
8808
  "step": 440000
8809
+ },
8810
+ {
8811
+ "epoch": 13.5,
8812
+ "learning_rate": 2.1083388335824145e-05,
8813
+ "loss": 0.315,
8814
+ "step": 440500
8815
+ },
8816
+ {
8817
+ "epoch": 13.51,
8818
+ "learning_rate": 2.0900255633978873e-05,
8819
+ "loss": 0.3147,
8820
+ "step": 441000
8821
+ },
8822
+ {
8823
+ "epoch": 13.51,
8824
+ "eval_loss": 0.7775884866714478,
8825
+ "eval_runtime": 0.5109,
8826
+ "eval_samples_per_second": 1957.402,
8827
+ "eval_steps_per_second": 31.318,
8828
+ "step": 441000
8829
+ },
8830
+ {
8831
+ "epoch": 13.53,
8832
+ "learning_rate": 2.0718589425453314e-05,
8833
+ "loss": 0.3145,
8834
+ "step": 441500
8835
+ },
8836
+ {
8837
+ "epoch": 13.54,
8838
+ "learning_rate": 2.0538391696920015e-05,
8839
+ "loss": 0.3145,
8840
+ "step": 442000
8841
+ },
8842
+ {
8843
+ "epoch": 13.54,
8844
+ "eval_loss": 0.7738910913467407,
8845
+ "eval_runtime": 0.5137,
8846
+ "eval_samples_per_second": 1946.745,
8847
+ "eval_steps_per_second": 31.148,
8848
+ "step": 442000
8849
+ },
8850
+ {
8851
+ "epoch": 13.56,
8852
+ "learning_rate": 2.035966441899249e-05,
8853
+ "loss": 0.3148,
8854
+ "step": 442500
8855
+ },
8856
+ {
8857
+ "epoch": 13.57,
8858
+ "learning_rate": 2.0182409546203555e-05,
8859
+ "loss": 0.3145,
8860
+ "step": 443000
8861
+ },
8862
+ {
8863
+ "epoch": 13.57,
8864
+ "eval_loss": 0.7752822041511536,
8865
+ "eval_runtime": 0.5041,
8866
+ "eval_samples_per_second": 1983.895,
8867
+ "eval_steps_per_second": 31.742,
8868
+ "step": 443000
8869
+ },
8870
+ {
8871
+ "epoch": 13.59,
8872
+ "learning_rate": 2.000662901698415e-05,
8873
+ "loss": 0.3143,
8874
+ "step": 443500
8875
+ },
8876
+ {
8877
+ "epoch": 13.6,
8878
+ "learning_rate": 1.983232475364195e-05,
8879
+ "loss": 0.3145,
8880
+ "step": 444000
8881
+ },
8882
+ {
8883
+ "epoch": 13.6,
8884
+ "eval_loss": 0.7770563960075378,
8885
+ "eval_runtime": 0.5102,
8886
+ "eval_samples_per_second": 1959.954,
8887
+ "eval_steps_per_second": 31.359,
8888
+ "step": 444000
8889
+ },
8890
+ {
8891
+ "epoch": 13.62,
8892
+ "learning_rate": 1.9659498662340474e-05,
8893
+ "loss": 0.3144,
8894
+ "step": 444500
8895
+ },
8896
+ {
8897
+ "epoch": 13.63,
8898
+ "learning_rate": 1.948815263307819e-05,
8899
+ "loss": 0.3146,
8900
+ "step": 445000
8901
+ },
8902
+ {
8903
+ "epoch": 13.63,
8904
+ "eval_loss": 0.7755433917045593,
8905
+ "eval_runtime": 0.5167,
8906
+ "eval_samples_per_second": 1935.403,
8907
+ "eval_steps_per_second": 30.966,
8908
+ "step": 445000
8909
+ },
8910
+ {
8911
+ "epoch": 13.65,
8912
+ "learning_rate": 1.9318288539667765e-05,
8913
+ "loss": 0.3144,
8914
+ "step": 445500
8915
+ },
8916
+ {
8917
+ "epoch": 13.66,
8918
+ "learning_rate": 1.914990823971574e-05,
8919
+ "loss": 0.3144,
8920
+ "step": 446000
8921
+ },
8922
+ {
8923
+ "epoch": 13.66,
8924
+ "eval_loss": 0.7735174298286438,
8925
+ "eval_runtime": 0.5138,
8926
+ "eval_samples_per_second": 1946.362,
8927
+ "eval_steps_per_second": 31.142,
8928
+ "step": 446000
8929
+ },
8930
+ {
8931
+ "epoch": 13.68,
8932
+ "learning_rate": 1.8983013574602096e-05,
8933
+ "loss": 0.314,
8934
+ "step": 446500
8935
+ },
8936
+ {
8937
+ "epoch": 13.7,
8938
+ "learning_rate": 1.8817606369460156e-05,
8939
+ "loss": 0.3143,
8940
+ "step": 447000
8941
+ },
8942
+ {
8943
+ "epoch": 13.7,
8944
+ "eval_loss": 0.7771323323249817,
8945
+ "eval_runtime": 0.5128,
8946
+ "eval_samples_per_second": 1950.249,
8947
+ "eval_steps_per_second": 31.204,
8948
+ "step": 447000
8949
+ },
8950
+ {
8951
+ "epoch": 13.71,
8952
+ "learning_rate": 1.865368843315663e-05,
8953
+ "loss": 0.3147,
8954
+ "step": 447500
8955
+ },
8956
+ {
8957
+ "epoch": 13.73,
8958
+ "learning_rate": 1.8491261558271762e-05,
8959
+ "loss": 0.314,
8960
+ "step": 448000
8961
+ },
8962
+ {
8963
+ "epoch": 13.73,
8964
+ "eval_loss": 0.7759497165679932,
8965
+ "eval_runtime": 0.5133,
8966
+ "eval_samples_per_second": 1948.013,
8967
+ "eval_steps_per_second": 31.168,
8968
+ "step": 448000
8969
+ },
8970
+ {
8971
+ "epoch": 13.74,
8972
+ "learning_rate": 1.833032752107986e-05,
8973
+ "loss": 0.3143,
8974
+ "step": 448500
8975
+ },
8976
+ {
8977
+ "epoch": 13.76,
8978
+ "learning_rate": 1.817088808152978e-05,
8979
+ "loss": 0.3143,
8980
+ "step": 449000
8981
+ },
8982
+ {
8983
+ "epoch": 13.76,
8984
+ "eval_loss": 0.7774102687835693,
8985
+ "eval_runtime": 0.5028,
8986
+ "eval_samples_per_second": 1988.858,
8987
+ "eval_steps_per_second": 31.822,
8988
+ "step": 449000
8989
+ },
8990
+ {
8991
+ "epoch": 13.77,
8992
+ "learning_rate": 1.801294498322569e-05,
8993
+ "loss": 0.3141,
8994
+ "step": 449500
8995
+ },
8996
+ {
8997
+ "epoch": 13.79,
8998
+ "learning_rate": 1.7856499953407978e-05,
8999
+ "loss": 0.3142,
9000
+ "step": 450000
9001
+ },
9002
+ {
9003
+ "epoch": 13.79,
9004
+ "eval_loss": 0.7757880687713623,
9005
+ "eval_runtime": 0.5025,
9006
+ "eval_samples_per_second": 1989.909,
9007
+ "eval_steps_per_second": 31.839,
9008
+ "step": 450000
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 16,
9013
+ "total_flos": 1.4376817401476814e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a7b0c7d08046c0c4e1a91c3f9d63b1d7bdf393ee4dbbd4736af162b969bdfb9
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f48e297dd8d43fe26955c08bdc18374b43b5c5c7bf58df74b63ccfce891d130
3
  size 102501541