joelniklaus commited on
Commit
32b8fa3
1 Parent(s): 0aba5e2

Training in progress, step 850000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfa984d48e2b25a4a8577d6e08f1cba0c93c8e248c62aa624d20530d6ecc9836
3
  size 1475917081
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b7d6ba0cf223f8355d77410c6d3f348e2c242171827184f9341f4d3e20de1b0
3
  size 1475917081
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5b9bd695615bdc79db5cf5c877f1f79e625eece275fecd868dead067af8d339
3
  size 737971755
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e85b4884a8f66ebb2df1765290d0653fa6abe253c0da87e19ed6ebbc99cda9
3
  size 737971755
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9430fa4990a550bd90819477431c7ebf833fe13945969d51951fd1b6462c2d7
3
  size 13611
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cdc708dab044dab6617ab454919a335f40e531b1a7749d536ac5ef45770a92
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50e51b9224ded3ddffee57f26ec45414409de0232579ddafb7f3e083076fa4c5
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adedebe0cc7e07de957a9e2967d6e9c3934a9fdca3245f46a29d125e5e36192e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8,
5
- "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4934,11 +4934,319 @@
4934
  "eval_samples_per_second": 125.14,
4935
  "eval_steps_per_second": 1.001,
4936
  "step": 800000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4937
  }
4938
  ],
4939
  "max_steps": 1000000,
4940
  "num_train_epochs": 9223372036854775807,
4941
- "total_flos": 1.34914183790592e+19,
4942
  "trial_name": null,
4943
  "trial_params": null
4944
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.85,
5
+ "global_step": 850000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
4934
  "eval_samples_per_second": 125.14,
4935
  "eval_steps_per_second": 1.001,
4936
  "step": 800000
4937
+ },
4938
+ {
4939
+ "epoch": 0.8,
4940
+ "learning_rate": 1.0441632244932237e-05,
4941
+ "loss": 0.8379,
4942
+ "step": 801000
4943
+ },
4944
+ {
4945
+ "epoch": 0.8,
4946
+ "learning_rate": 1.0340722563656107e-05,
4947
+ "loss": 0.8349,
4948
+ "step": 802000
4949
+ },
4950
+ {
4951
+ "epoch": 0.8,
4952
+ "learning_rate": 1.0240246589884044e-05,
4953
+ "loss": 0.8258,
4954
+ "step": 803000
4955
+ },
4956
+ {
4957
+ "epoch": 0.8,
4958
+ "learning_rate": 1.0140205422405214e-05,
4959
+ "loss": 0.8188,
4960
+ "step": 804000
4961
+ },
4962
+ {
4963
+ "epoch": 0.81,
4964
+ "learning_rate": 1.0040600155253765e-05,
4965
+ "loss": 0.8394,
4966
+ "step": 805000
4967
+ },
4968
+ {
4969
+ "epoch": 0.81,
4970
+ "learning_rate": 9.941431877696955e-06,
4971
+ "loss": 0.8345,
4972
+ "step": 806000
4973
+ },
4974
+ {
4975
+ "epoch": 0.81,
4976
+ "learning_rate": 9.842701674223187e-06,
4977
+ "loss": 0.8159,
4978
+ "step": 807000
4979
+ },
4980
+ {
4981
+ "epoch": 0.81,
4982
+ "learning_rate": 9.744410624530148e-06,
4983
+ "loss": 0.801,
4984
+ "step": 808000
4985
+ },
4986
+ {
4987
+ "epoch": 0.81,
4988
+ "learning_rate": 9.646559803512994e-06,
4989
+ "loss": 0.8,
4990
+ "step": 809000
4991
+ },
4992
+ {
4993
+ "epoch": 0.81,
4994
+ "learning_rate": 9.549150281252633e-06,
4995
+ "loss": 0.8052,
4996
+ "step": 810000
4997
+ },
4998
+ {
4999
+ "epoch": 0.81,
5000
+ "learning_rate": 9.452183123004e-06,
5001
+ "loss": 0.8163,
5002
+ "step": 811000
5003
+ },
5004
+ {
5005
+ "epoch": 0.81,
5006
+ "learning_rate": 9.355659389184396e-06,
5007
+ "loss": 0.8108,
5008
+ "step": 812000
5009
+ },
5010
+ {
5011
+ "epoch": 0.81,
5012
+ "learning_rate": 9.259580135361929e-06,
5013
+ "loss": 0.8371,
5014
+ "step": 813000
5015
+ },
5016
+ {
5017
+ "epoch": 0.81,
5018
+ "learning_rate": 9.163946412243896e-06,
5019
+ "loss": 0.8335,
5020
+ "step": 814000
5021
+ },
5022
+ {
5023
+ "epoch": 0.81,
5024
+ "learning_rate": 9.068759265665384e-06,
5025
+ "loss": 0.8201,
5026
+ "step": 815000
5027
+ },
5028
+ {
5029
+ "epoch": 0.82,
5030
+ "learning_rate": 8.974019736577777e-06,
5031
+ "loss": 0.8136,
5032
+ "step": 816000
5033
+ },
5034
+ {
5035
+ "epoch": 0.82,
5036
+ "learning_rate": 8.879728861037384e-06,
5037
+ "loss": 0.8287,
5038
+ "step": 817000
5039
+ },
5040
+ {
5041
+ "epoch": 0.82,
5042
+ "learning_rate": 8.785887670194138e-06,
5043
+ "loss": 0.8113,
5044
+ "step": 818000
5045
+ },
5046
+ {
5047
+ "epoch": 0.82,
5048
+ "learning_rate": 8.692497190280224e-06,
5049
+ "loss": 0.8387,
5050
+ "step": 819000
5051
+ },
5052
+ {
5053
+ "epoch": 0.82,
5054
+ "learning_rate": 8.599558442598998e-06,
5055
+ "loss": 0.8205,
5056
+ "step": 820000
5057
+ },
5058
+ {
5059
+ "epoch": 0.82,
5060
+ "learning_rate": 8.507072443513702e-06,
5061
+ "loss": 0.8294,
5062
+ "step": 821000
5063
+ },
5064
+ {
5065
+ "epoch": 0.82,
5066
+ "learning_rate": 8.415040204436426e-06,
5067
+ "loss": 0.8381,
5068
+ "step": 822000
5069
+ },
5070
+ {
5071
+ "epoch": 0.82,
5072
+ "learning_rate": 8.323462731816961e-06,
5073
+ "loss": 0.8285,
5074
+ "step": 823000
5075
+ },
5076
+ {
5077
+ "epoch": 0.82,
5078
+ "learning_rate": 8.232341027131885e-06,
5079
+ "loss": 0.8117,
5080
+ "step": 824000
5081
+ },
5082
+ {
5083
+ "epoch": 0.82,
5084
+ "learning_rate": 8.141676086873572e-06,
5085
+ "loss": 0.7924,
5086
+ "step": 825000
5087
+ },
5088
+ {
5089
+ "epoch": 0.83,
5090
+ "learning_rate": 8.051468902539272e-06,
5091
+ "loss": 0.8015,
5092
+ "step": 826000
5093
+ },
5094
+ {
5095
+ "epoch": 0.83,
5096
+ "learning_rate": 7.96172046062032e-06,
5097
+ "loss": 0.802,
5098
+ "step": 827000
5099
+ },
5100
+ {
5101
+ "epoch": 0.83,
5102
+ "learning_rate": 7.872431742591268e-06,
5103
+ "loss": 0.769,
5104
+ "step": 828000
5105
+ },
5106
+ {
5107
+ "epoch": 0.83,
5108
+ "learning_rate": 7.783603724899257e-06,
5109
+ "loss": 0.7827,
5110
+ "step": 829000
5111
+ },
5112
+ {
5113
+ "epoch": 0.83,
5114
+ "learning_rate": 7.695237378953223e-06,
5115
+ "loss": 0.768,
5116
+ "step": 830000
5117
+ },
5118
+ {
5119
+ "epoch": 0.83,
5120
+ "learning_rate": 7.607333671113409e-06,
5121
+ "loss": 0.7493,
5122
+ "step": 831000
5123
+ },
5124
+ {
5125
+ "epoch": 0.83,
5126
+ "learning_rate": 7.519893562680663e-06,
5127
+ "loss": 0.7302,
5128
+ "step": 832000
5129
+ },
5130
+ {
5131
+ "epoch": 0.83,
5132
+ "learning_rate": 7.432918009885997e-06,
5133
+ "loss": 0.7664,
5134
+ "step": 833000
5135
+ },
5136
+ {
5137
+ "epoch": 0.83,
5138
+ "learning_rate": 7.3464079638801365e-06,
5139
+ "loss": 0.775,
5140
+ "step": 834000
5141
+ },
5142
+ {
5143
+ "epoch": 0.83,
5144
+ "learning_rate": 7.260364370723044e-06,
5145
+ "loss": 0.7438,
5146
+ "step": 835000
5147
+ },
5148
+ {
5149
+ "epoch": 0.84,
5150
+ "learning_rate": 7.174788171373731e-06,
5151
+ "loss": 0.7599,
5152
+ "step": 836000
5153
+ },
5154
+ {
5155
+ "epoch": 0.84,
5156
+ "learning_rate": 7.089680301679752e-06,
5157
+ "loss": 0.7777,
5158
+ "step": 837000
5159
+ },
5160
+ {
5161
+ "epoch": 0.84,
5162
+ "learning_rate": 7.005041692367154e-06,
5163
+ "loss": 0.7522,
5164
+ "step": 838000
5165
+ },
5166
+ {
5167
+ "epoch": 0.84,
5168
+ "learning_rate": 6.92087326903022e-06,
5169
+ "loss": 0.772,
5170
+ "step": 839000
5171
+ },
5172
+ {
5173
+ "epoch": 0.84,
5174
+ "learning_rate": 6.837175952121306e-06,
5175
+ "loss": 0.7974,
5176
+ "step": 840000
5177
+ },
5178
+ {
5179
+ "epoch": 0.84,
5180
+ "learning_rate": 6.753950656940905e-06,
5181
+ "loss": 0.7792,
5182
+ "step": 841000
5183
+ },
5184
+ {
5185
+ "epoch": 0.84,
5186
+ "learning_rate": 6.671198293627479e-06,
5187
+ "loss": 0.773,
5188
+ "step": 842000
5189
+ },
5190
+ {
5191
+ "epoch": 0.84,
5192
+ "learning_rate": 6.588919767147639e-06,
5193
+ "loss": 0.7719,
5194
+ "step": 843000
5195
+ },
5196
+ {
5197
+ "epoch": 0.84,
5198
+ "learning_rate": 6.5071159772861436e-06,
5199
+ "loss": 0.7772,
5200
+ "step": 844000
5201
+ },
5202
+ {
5203
+ "epoch": 0.84,
5204
+ "learning_rate": 6.425787818636131e-06,
5205
+ "loss": 0.7716,
5206
+ "step": 845000
5207
+ },
5208
+ {
5209
+ "epoch": 0.85,
5210
+ "learning_rate": 6.344936180589351e-06,
5211
+ "loss": 0.7712,
5212
+ "step": 846000
5213
+ },
5214
+ {
5215
+ "epoch": 0.85,
5216
+ "learning_rate": 6.264561947326331e-06,
5217
+ "loss": 0.7627,
5218
+ "step": 847000
5219
+ },
5220
+ {
5221
+ "epoch": 0.85,
5222
+ "learning_rate": 6.184665997806832e-06,
5223
+ "loss": 0.7812,
5224
+ "step": 848000
5225
+ },
5226
+ {
5227
+ "epoch": 0.85,
5228
+ "learning_rate": 6.1052492057601275e-06,
5229
+ "loss": 0.7791,
5230
+ "step": 849000
5231
+ },
5232
+ {
5233
+ "epoch": 0.85,
5234
+ "learning_rate": 6.026312439675552e-06,
5235
+ "loss": 0.7794,
5236
+ "step": 850000
5237
+ },
5238
+ {
5239
+ "epoch": 0.85,
5240
+ "eval_loss": 0.5561954379081726,
5241
+ "eval_runtime": 107.9926,
5242
+ "eval_samples_per_second": 46.299,
5243
+ "eval_steps_per_second": 0.37,
5244
+ "step": 850000
5245
  }
5246
  ],
5247
  "max_steps": 1000000,
5248
  "num_train_epochs": 9223372036854775807,
5249
+ "total_flos": 1.43346320277504e+19,
5250
  "trial_name": null,
5251
  "trial_params": null
5252
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1793f13c5a11dd9ecdf84c423d90f2d5ffd02b783b224028e2471ade3abd8577
3
  size 3439
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2ec8d52d00037d36b72b00e5d0e42cc7dd35f96c9132b39acec68f74a0db1cd
3
  size 3439
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5b9bd695615bdc79db5cf5c877f1f79e625eece275fecd868dead067af8d339
3
  size 737971755
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e85b4884a8f66ebb2df1765290d0653fa6abe253c0da87e19ed6ebbc99cda9
3
  size 737971755
runs/Feb08_20-05-26_t1v-n-9f780742-w-0/1675886954.9315195/events.out.tfevents.1675886954.t1v-n-9f780742-w-0.24792.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f7e602d1bd2fb8041f11d0da5ad52374dc1c800b1cdcaba3296ecc3e5a57a80
3
+ size 5430
runs/Feb08_20-05-26_t1v-n-9f780742-w-0/events.out.tfevents.1675886954.t1v-n-9f780742-w-0.24792.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3631d5279790ee5d8f55eef6c2dd6dfbe5ae66dcbeccab0107d07e42f17ef2c6
3
+ size 12038
runs/Jan25_00-37-02_t1v-n-9f780742-w-0/events.out.tfevents.1674607228.t1v-n-9f780742-w-0.3357200.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f96af61d9fff061c46acf4fa4689be816ee9dee5ae2a5e8478e5b03f39d19cf8
3
- size 86522
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9faa8158417b38a3d6d08934eb123e8fa72074bae5fc2ccd1fad58f501e2d6b7
3
+ size 92122
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1793f13c5a11dd9ecdf84c423d90f2d5ffd02b783b224028e2471ade3abd8577
3
  size 3439
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2ec8d52d00037d36b72b00e5d0e42cc7dd35f96c9132b39acec68f74a0db1cd
3
  size 3439