nbtpj commited on
Commit
8e59363
1 Parent(s): 9b0efe0

Training in progress, step 17500

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0abc3bbcb33f1b104a034878ea65a810881545bc5791a855d5351ca104d35090
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f389a14bfd58c27a83a27a7df40ecea82e8ea9846f9a754fe15ef3baf218cda
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cf5eb3b818e6cd78b758f5e03fb1f8f3add5c14dfabe0bcca7c806070d48d8f
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac15d1c531a2537f2ed5a5ca9b2c15212a32f681269a48c059475929253d03c
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d352999c7df11b9e94f1fd8a7375a3e73dafb109da22a6956122e5dc1a4f2bf1
3
- size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d1f1f98d1a485fc2aa7583bef37396f966ba1263ca57a87f7bb243165c4d1d2
3
+ size 15459
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21c769e63ec1de81b68aee725d82a5604ab67099a79974f6a735c0ba6a65445c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ddc9e3439f89ace0e8635bf051f23ef8d0f8b0f8a1af21d876b1c7c16844d24
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8408071748878924,
5
- "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -906,11 +906,161 @@
906
  "learning_rate": 3.2635774788241163e-06,
907
  "loss": 0.625,
908
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909
  }
910
  ],
911
  "max_steps": 32112,
912
  "num_train_epochs": 2,
913
- "total_flos": 6.927185891598336e+16,
914
  "trial_name": null,
915
  "trial_params": null
916
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9809417040358744,
5
+ "global_step": 17500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
906
  "learning_rate": 3.2635774788241163e-06,
907
  "loss": 0.625,
908
  "step": 15000
909
+ },
910
+ {
911
+ "epoch": 0.85,
912
+ "learning_rate": 2.952167414050822e-06,
913
+ "loss": 0.8014,
914
+ "step": 15100
915
+ },
916
+ {
917
+ "epoch": 0.85,
918
+ "learning_rate": 2.6407573492775286e-06,
919
+ "loss": 0.7089,
920
+ "step": 15200
921
+ },
922
+ {
923
+ "epoch": 0.86,
924
+ "learning_rate": 2.329347284504235e-06,
925
+ "loss": 0.74,
926
+ "step": 15300
927
+ },
928
+ {
929
+ "epoch": 0.86,
930
+ "learning_rate": 2.0179372197309418e-06,
931
+ "loss": 0.7056,
932
+ "step": 15400
933
+ },
934
+ {
935
+ "epoch": 0.87,
936
+ "learning_rate": 1.7065271549576482e-06,
937
+ "loss": 0.7139,
938
+ "step": 15500
939
+ },
940
+ {
941
+ "epoch": 0.87,
942
+ "learning_rate": 1.395117090184355e-06,
943
+ "loss": 0.7094,
944
+ "step": 15600
945
+ },
946
+ {
947
+ "epoch": 0.88,
948
+ "learning_rate": 1.0837070254110613e-06,
949
+ "loss": 0.7352,
950
+ "step": 15700
951
+ },
952
+ {
953
+ "epoch": 0.89,
954
+ "learning_rate": 7.722969606377678e-07,
955
+ "loss": 0.6215,
956
+ "step": 15800
957
+ },
958
+ {
959
+ "epoch": 0.89,
960
+ "learning_rate": 4.608868958644744e-07,
961
+ "loss": 0.72,
962
+ "step": 15900
963
+ },
964
+ {
965
+ "epoch": 0.9,
966
+ "learning_rate": 1.4947683109118088e-07,
967
+ "loss": 0.844,
968
+ "step": 16000
969
+ },
970
+ {
971
+ "epoch": 0.9,
972
+ "learning_rate": 0.0,
973
+ "loss": 0.7329,
974
+ "step": 16100
975
+ },
976
+ {
977
+ "epoch": 0.91,
978
+ "learning_rate": 0.0,
979
+ "loss": 0.7173,
980
+ "step": 16200
981
+ },
982
+ {
983
+ "epoch": 0.91,
984
+ "learning_rate": 0.0,
985
+ "loss": 0.7362,
986
+ "step": 16300
987
+ },
988
+ {
989
+ "epoch": 0.92,
990
+ "learning_rate": 0.0,
991
+ "loss": 0.7832,
992
+ "step": 16400
993
+ },
994
+ {
995
+ "epoch": 0.92,
996
+ "learning_rate": 0.0,
997
+ "loss": 0.6208,
998
+ "step": 16500
999
+ },
1000
+ {
1001
+ "epoch": 0.93,
1002
+ "learning_rate": 0.0,
1003
+ "loss": 0.7005,
1004
+ "step": 16600
1005
+ },
1006
+ {
1007
+ "epoch": 0.94,
1008
+ "learning_rate": 0.0,
1009
+ "loss": 0.6966,
1010
+ "step": 16700
1011
+ },
1012
+ {
1013
+ "epoch": 0.94,
1014
+ "learning_rate": 0.0,
1015
+ "loss": 0.7328,
1016
+ "step": 16800
1017
+ },
1018
+ {
1019
+ "epoch": 0.95,
1020
+ "learning_rate": 0.0,
1021
+ "loss": 0.7344,
1022
+ "step": 16900
1023
+ },
1024
+ {
1025
+ "epoch": 0.95,
1026
+ "learning_rate": 0.0,
1027
+ "loss": 0.6683,
1028
+ "step": 17000
1029
+ },
1030
+ {
1031
+ "epoch": 0.96,
1032
+ "learning_rate": 0.0,
1033
+ "loss": 0.7593,
1034
+ "step": 17100
1035
+ },
1036
+ {
1037
+ "epoch": 0.96,
1038
+ "learning_rate": 0.0,
1039
+ "loss": 0.6735,
1040
+ "step": 17200
1041
+ },
1042
+ {
1043
+ "epoch": 0.97,
1044
+ "learning_rate": 0.0,
1045
+ "loss": 0.6199,
1046
+ "step": 17300
1047
+ },
1048
+ {
1049
+ "epoch": 0.98,
1050
+ "learning_rate": 0.0,
1051
+ "loss": 0.7717,
1052
+ "step": 17400
1053
+ },
1054
+ {
1055
+ "epoch": 0.98,
1056
+ "learning_rate": 0.0,
1057
+ "loss": 0.7252,
1058
+ "step": 17500
1059
  }
1060
  ],
1061
  "max_steps": 32112,
1062
  "num_train_epochs": 2,
1063
+ "total_flos": 8.078058692960256e+16,
1064
  "trial_name": null,
1065
  "trial_params": null
1066
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cf5eb3b818e6cd78b758f5e03fb1f8f3add5c14dfabe0bcca7c806070d48d8f
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac15d1c531a2537f2ed5a5ca9b2c15212a32f681269a48c059475929253d03c
3
  size 557969145
runs/Jan03_01-47-25_21bb7eba274c/events.out.tfevents.1672710460.21bb7eba274c.23.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1ece1914909553e95eacd390102f6cf8f09e0b33baa54894afe1eb11854cdb7
3
- size 33189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0059104b5702024e249689bb568e96d0d4a72ee12fe8dd97c0dd035660f85968
3
+ size 37150