nbtpj commited on
Commit
022fd35
1 Parent(s): fb79de5

Training in progress, step 17500

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf2370268fbd4ed9711f23148e5e1266d41ae04c0e88d432b6280d6684096467
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70920a080f4832237ce7c6ba0b7f90a36204857a2597dfa2c6caf8d3a72f7e7f
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:376812cf708ab7061c8c4ac1b0483e37e40aac837abe95f3b8ae1e5922038a43
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3916aa768b3547ed097d45447da1e5f44df02e98af7f4c9081b40d19b99444c0
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:142093c2e302563f6acf00427729bb94740050933baea33071c54bdbba785272
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d08c867edbcec7c00e24e146bf3d6892636f95bde24b387b90e00d4e61e819
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6a0c5b7f631fe6cce73745fa375c6f37e5118cbc55c65d587e9651ec9176ce
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eab84ebacf80b8995a27a6b564c2dd8315f1d3f62ec0076da07f78d4762c61eb
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8847991505928154,
5
- "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -914,11 +914,161 @@
914
  "learning_rate": 8.225732450678378e-07,
915
  "loss": 0.6941,
916
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
917
  }
918
  ],
919
  "max_steps": 30516,
920
  "num_train_epochs": 2,
921
- "total_flos": 9.14703293177856e+16,
922
  "trial_name": null,
923
  "trial_params": null
924
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.032265675691618,
5
+ "global_step": 17500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
914
  "learning_rate": 8.225732450678378e-07,
915
  "loss": 0.6941,
916
  "step": 15000
917
+ },
918
+ {
919
+ "epoch": 0.89,
920
+ "learning_rate": 4.948548207380218e-07,
921
+ "loss": 0.7049,
922
+ "step": 15100
923
+ },
924
+ {
925
+ "epoch": 0.9,
926
+ "learning_rate": 1.6713639640820606e-07,
927
+ "loss": 0.6312,
928
+ "step": 15200
929
+ },
930
+ {
931
+ "epoch": 0.9,
932
+ "learning_rate": 0.0,
933
+ "loss": 0.6369,
934
+ "step": 15300
935
+ },
936
+ {
937
+ "epoch": 0.91,
938
+ "learning_rate": 0.0,
939
+ "loss": 0.6321,
940
+ "step": 15400
941
+ },
942
+ {
943
+ "epoch": 0.91,
944
+ "learning_rate": 0.0,
945
+ "loss": 0.7197,
946
+ "step": 15500
947
+ },
948
+ {
949
+ "epoch": 0.92,
950
+ "learning_rate": 0.0,
951
+ "loss": 0.6451,
952
+ "step": 15600
953
+ },
954
+ {
955
+ "epoch": 0.93,
956
+ "learning_rate": 0.0,
957
+ "loss": 0.6704,
958
+ "step": 15700
959
+ },
960
+ {
961
+ "epoch": 0.93,
962
+ "learning_rate": 0.0,
963
+ "loss": 0.6144,
964
+ "step": 15800
965
+ },
966
+ {
967
+ "epoch": 0.94,
968
+ "learning_rate": 0.0,
969
+ "loss": 0.6497,
970
+ "step": 15900
971
+ },
972
+ {
973
+ "epoch": 0.94,
974
+ "learning_rate": 0.0,
975
+ "loss": 0.7311,
976
+ "step": 16000
977
+ },
978
+ {
979
+ "epoch": 0.95,
980
+ "learning_rate": 0.0,
981
+ "loss": 0.6799,
982
+ "step": 16100
983
+ },
984
+ {
985
+ "epoch": 0.96,
986
+ "learning_rate": 0.0,
987
+ "loss": 0.6354,
988
+ "step": 16200
989
+ },
990
+ {
991
+ "epoch": 0.96,
992
+ "learning_rate": 0.0,
993
+ "loss": 0.6063,
994
+ "step": 16300
995
+ },
996
+ {
997
+ "epoch": 0.97,
998
+ "learning_rate": 0.0,
999
+ "loss": 0.6489,
1000
+ "step": 16400
1001
+ },
1002
+ {
1003
+ "epoch": 0.97,
1004
+ "learning_rate": 0.0,
1005
+ "loss": 0.6334,
1006
+ "step": 16500
1007
+ },
1008
+ {
1009
+ "epoch": 0.98,
1010
+ "learning_rate": 0.0,
1011
+ "loss": 0.689,
1012
+ "step": 16600
1013
+ },
1014
+ {
1015
+ "epoch": 0.99,
1016
+ "learning_rate": 0.0,
1017
+ "loss": 0.6852,
1018
+ "step": 16700
1019
+ },
1020
+ {
1021
+ "epoch": 0.99,
1022
+ "learning_rate": 0.0,
1023
+ "loss": 0.6481,
1024
+ "step": 16800
1025
+ },
1026
+ {
1027
+ "epoch": 1.0,
1028
+ "learning_rate": 0.0,
1029
+ "loss": 0.7362,
1030
+ "step": 16900
1031
+ },
1032
+ {
1033
+ "epoch": 1.0,
1034
+ "learning_rate": 0.0,
1035
+ "loss": 0.7102,
1036
+ "step": 17000
1037
+ },
1038
+ {
1039
+ "epoch": 1.01,
1040
+ "learning_rate": 0.0,
1041
+ "loss": 0.6313,
1042
+ "step": 17100
1043
+ },
1044
+ {
1045
+ "epoch": 1.01,
1046
+ "learning_rate": 0.0,
1047
+ "loss": 0.7327,
1048
+ "step": 17200
1049
+ },
1050
+ {
1051
+ "epoch": 1.02,
1052
+ "learning_rate": 0.0,
1053
+ "loss": 0.624,
1054
+ "step": 17300
1055
+ },
1056
+ {
1057
+ "epoch": 1.03,
1058
+ "learning_rate": 0.0,
1059
+ "loss": 0.6883,
1060
+ "step": 17400
1061
+ },
1062
+ {
1063
+ "epoch": 1.03,
1064
+ "learning_rate": 0.0,
1065
+ "loss": 0.5119,
1066
+ "step": 17500
1067
  }
1068
  ],
1069
  "max_steps": 30516,
1070
  "num_train_epochs": 2,
1071
+ "total_flos": 1.0676522063241216e+17,
1072
  "trial_name": null,
1073
  "trial_params": null
1074
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:376812cf708ab7061c8c4ac1b0483e37e40aac837abe95f3b8ae1e5922038a43
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3916aa768b3547ed097d45447da1e5f44df02e98af7f4c9081b40d19b99444c0
3
  size 557969145
runs/Dec05_03-30-37_fbdce2302f52/events.out.tfevents.1670211053.fbdce2302f52.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19a488bda0439719c1c15ecf951d4d1665a47792d76487d0be4947720935c87c
3
- size 33224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46d026c8117c697ed1221f4329f4a0e25d72fbf3cd5830657ac90bb96ef8f6fe
3
+ size 37185