joelniklaus commited on
Commit
b7398da
1 Parent(s): 0f28af8

Training in progress, step 850000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd4e9f97205769b9d327217534afeab7d15d8878377c4702248f6d8106176224
3
  size 885330713
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:377aef4dd059b74792c5352ef81f8e44e00ebae2aba8904e27b6d7bddc64b4be
3
  size 885330713
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9308899e7cf9b42f3d67f27af3fc47d5047d1474ee940ca97311078cf54325b8
3
  size 442678571
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157afe62666c333a6a822593f8ac020489c93154c07c70eed8f687050fb132b0
3
  size 442678571
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:063012cc8e8259f90140aa24921bb350f0801958a91aea05b73d5ff385433a8e
3
  size 17563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed8b2b295b4b043aa8b2feddb689917b282e526c45b6eb23bd29ebdfbe6d494
3
  size 17563
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dfd958abf22782455e0945b592950503a89b2eff4aac0ecd4b072d0e9cd3f74
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:065f3ff4df60950ed9b6903fc98546b04d3292228f5f7ea2579fd161acf366b2
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a782142f42cbef3421597d05870d8435f13392f6658de5fc017128ff2f53ff61
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd1b1b8a91f71cb25cb8a1516a3c8cda9e53c3b6dd58a61caf55df95b8a1c51
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.189467,
5
- "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4950,11 +4950,320 @@
4950
  "eval_samples_per_second": 415.274,
4951
  "eval_steps_per_second": 0.831,
4952
  "step": 800000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4953
  }
4954
  ],
4955
  "max_steps": 1000000,
4956
  "num_train_epochs": 9223372036854775807,
4957
- "total_flos": 5.390525448188854e+19,
4958
  "trial_name": null,
4959
  "trial_params": null
4960
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.239467,
5
+ "global_step": 850000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4950
  "eval_samples_per_second": 415.274,
4951
  "eval_steps_per_second": 0.831,
4952
  "step": 800000
4953
+ },
4954
+ {
4955
+ "epoch": 1.19,
4956
+ "learning_rate": 1.0473407182373813e-05,
4957
+ "loss": 0.9342,
4958
+ "step": 801000
4959
+ },
4960
+ {
4961
+ "epoch": 1.19,
4962
+ "learning_rate": 1.0372361544374464e-05,
4963
+ "loss": 0.8919,
4964
+ "step": 802000
4965
+ },
4966
+ {
4967
+ "epoch": 1.19,
4968
+ "learning_rate": 1.0271849663326171e-05,
4969
+ "loss": 0.8567,
4970
+ "step": 803000
4971
+ },
4972
+ {
4973
+ "epoch": 1.19,
4974
+ "learning_rate": 1.0171671413607247e-05,
4975
+ "loss": 0.9096,
4976
+ "step": 804000
4977
+ },
4978
+ {
4979
+ "epoch": 1.19,
4980
+ "learning_rate": 1.0072028244878407e-05,
4981
+ "loss": 1.0644,
4982
+ "step": 805000
4983
+ },
4984
+ {
4985
+ "epoch": 1.2,
4986
+ "learning_rate": 9.97272176118008e-06,
4987
+ "loss": 1.0327,
4988
+ "step": 806000
4989
+ },
4990
+ {
4991
+ "epoch": 1.2,
4992
+ "learning_rate": 9.873853009383633e-06,
4993
+ "loss": 1.076,
4994
+ "step": 807000
4995
+ },
4996
+ {
4997
+ "epoch": 1.2,
4998
+ "learning_rate": 9.775423070701888e-06,
4999
+ "loss": 1.0958,
5000
+ "step": 808000
5001
+ },
5002
+ {
5003
+ "epoch": 1.2,
5004
+ "learning_rate": 9.67753079151617e-06,
5005
+ "loss": 1.0053,
5006
+ "step": 809000
5007
+ },
5008
+ {
5009
+ "epoch": 1.2,
5010
+ "learning_rate": 9.57998126200062e-06,
5011
+ "loss": 0.8648,
5012
+ "step": 810000
5013
+ },
5014
+ {
5015
+ "epoch": 1.2,
5016
+ "learning_rate": 9.482970645690526e-06,
5017
+ "loss": 0.8919,
5018
+ "step": 811000
5019
+ },
5020
+ {
5021
+ "epoch": 1.2,
5022
+ "learning_rate": 9.38630578820755e-06,
5023
+ "loss": 0.8439,
5024
+ "step": 812000
5025
+ },
5026
+ {
5027
+ "epoch": 1.2,
5028
+ "learning_rate": 9.290181074089233e-06,
5029
+ "loss": 0.7462,
5030
+ "step": 813000
5031
+ },
5032
+ {
5033
+ "epoch": 1.2,
5034
+ "learning_rate": 9.194405112845483e-06,
5035
+ "loss": 0.8975,
5036
+ "step": 814000
5037
+ },
5038
+ {
5039
+ "epoch": 1.2,
5040
+ "learning_rate": 9.099170501521598e-06,
5041
+ "loss": 0.9167,
5042
+ "step": 815000
5043
+ },
5044
+ {
5045
+ "epoch": 1.21,
5046
+ "learning_rate": 9.00428762187942e-06,
5047
+ "loss": 0.8518,
5048
+ "step": 816000
5049
+ },
5050
+ {
5051
+ "epoch": 1.21,
5052
+ "learning_rate": 8.909947275055568e-06,
5053
+ "loss": 0.8843,
5054
+ "step": 817000
5055
+ },
5056
+ {
5057
+ "epoch": 1.21,
5058
+ "learning_rate": 8.815961623350038e-06,
5059
+ "loss": 0.926,
5060
+ "step": 818000
5061
+ },
5062
+ {
5063
+ "epoch": 1.21,
5064
+ "learning_rate": 8.722519663652901e-06,
5065
+ "loss": 0.9408,
5066
+ "step": 819000
5067
+ },
5068
+ {
5069
+ "epoch": 1.21,
5070
+ "learning_rate": 8.629435347010716e-06,
5071
+ "loss": 0.7628,
5072
+ "step": 820000
5073
+ },
5074
+ {
5075
+ "epoch": 1.21,
5076
+ "learning_rate": 8.536803452235437e-06,
5077
+ "loss": 0.7669,
5078
+ "step": 821000
5079
+ },
5080
+ {
5081
+ "epoch": 1.21,
5082
+ "learning_rate": 8.444624992334588e-06,
5083
+ "loss": 0.7749,
5084
+ "step": 822000
5085
+ },
5086
+ {
5087
+ "epoch": 1.21,
5088
+ "learning_rate": 8.352992472045557e-06,
5089
+ "loss": 0.7575,
5090
+ "step": 823000
5091
+ },
5092
+ {
5093
+ "epoch": 1.21,
5094
+ "learning_rate": 8.26172344512513e-06,
5095
+ "loss": 0.899,
5096
+ "step": 824000
5097
+ },
5098
+ {
5099
+ "epoch": 1.21,
5100
+ "learning_rate": 8.171001445569593e-06,
5101
+ "loss": 0.9519,
5102
+ "step": 825000
5103
+ },
5104
+ {
5105
+ "epoch": 1.22,
5106
+ "learning_rate": 8.080645840041112e-06,
5107
+ "loss": 0.9472,
5108
+ "step": 826000
5109
+ },
5110
+ {
5111
+ "epoch": 1.22,
5112
+ "learning_rate": 7.990838325725758e-06,
5113
+ "loss": 0.9576,
5114
+ "step": 827000
5115
+ },
5116
+ {
5117
+ "epoch": 1.22,
5118
+ "learning_rate": 7.901400090084665e-06,
5119
+ "loss": 0.9493,
5120
+ "step": 828000
5121
+ },
5122
+ {
5123
+ "epoch": 1.22,
5124
+ "learning_rate": 7.81251098555364e-06,
5125
+ "loss": 0.8743,
5126
+ "step": 829000
5127
+ },
5128
+ {
5129
+ "epoch": 1.22,
5130
+ "learning_rate": 7.723994028206778e-06,
5131
+ "loss": 0.8593,
5132
+ "step": 830000
5133
+ },
5134
+ {
5135
+ "epoch": 1.22,
5136
+ "learning_rate": 7.636027217870157e-06,
5137
+ "loss": 0.8767,
5138
+ "step": 831000
5139
+ },
5140
+ {
5141
+ "epoch": 1.22,
5142
+ "learning_rate": 7.54843540696496e-06,
5143
+ "loss": 0.853,
5144
+ "step": 832000
5145
+ },
5146
+ {
5147
+ "epoch": 1.22,
5148
+ "learning_rate": 7.461394734929022e-06,
5149
+ "loss": 0.8742,
5150
+ "step": 833000
5151
+ },
5152
+ {
5153
+ "epoch": 1.22,
5154
+ "learning_rate": 7.374731898184495e-06,
5155
+ "loss": 1.0273,
5156
+ "step": 834000
5157
+ },
5158
+ {
5159
+ "epoch": 1.22,
5160
+ "learning_rate": 7.2886211680837424e-06,
5161
+ "loss": 1.0271,
5162
+ "step": 835000
5163
+ },
5164
+ {
5165
+ "epoch": 1.23,
5166
+ "learning_rate": 7.202891092623126e-06,
5167
+ "loss": 1.049,
5168
+ "step": 836000
5169
+ },
5170
+ {
5171
+ "epoch": 1.23,
5172
+ "learning_rate": 7.11771406745404e-06,
5173
+ "loss": 1.0773,
5174
+ "step": 837000
5175
+ },
5176
+ {
5177
+ "epoch": 1.23,
5178
+ "learning_rate": 7.032920499639423e-06,
5179
+ "loss": 1.0882,
5180
+ "step": 838000
5181
+ },
5182
+ {
5183
+ "epoch": 1.23,
5184
+ "learning_rate": 6.94868090159605e-06,
5185
+ "loss": 0.9609,
5186
+ "step": 839000
5187
+ },
5188
+ {
5189
+ "epoch": 1.23,
5190
+ "learning_rate": 6.864827546864583e-06,
5191
+ "loss": 0.9468,
5192
+ "step": 840000
5193
+ },
5194
+ {
5195
+ "epoch": 1.23,
5196
+ "learning_rate": 6.781529057175845e-06,
5197
+ "loss": 0.8922,
5198
+ "step": 841000
5199
+ },
5200
+ {
5201
+ "epoch": 1.23,
5202
+ "learning_rate": 6.698619579877818e-06,
5203
+ "loss": 0.7689,
5204
+ "step": 842000
5205
+ },
5206
+ {
5207
+ "epoch": 1.23,
5208
+ "learning_rate": 6.616183639538559e-06,
5209
+ "loss": 0.852,
5210
+ "step": 843000
5211
+ },
5212
+ {
5213
+ "epoch": 1.23,
5214
+ "learning_rate": 6.534385586581854e-06,
5215
+ "loss": 1.0235,
5216
+ "step": 844000
5217
+ },
5218
+ {
5219
+ "epoch": 1.23,
5220
+ "learning_rate": 6.452898467929852e-06,
5221
+ "loss": 0.9675,
5222
+ "step": 845000
5223
+ },
5224
+ {
5225
+ "epoch": 1.24,
5226
+ "learning_rate": 6.371887573403335e-06,
5227
+ "loss": 0.9587,
5228
+ "step": 846000
5229
+ },
5230
+ {
5231
+ "epoch": 1.24,
5232
+ "learning_rate": 6.29135378892447e-06,
5233
+ "loss": 1.0189,
5234
+ "step": 847000
5235
+ },
5236
+ {
5237
+ "epoch": 1.24,
5238
+ "learning_rate": 6.211377811943364e-06,
5239
+ "loss": 0.9343,
5240
+ "step": 848000
5241
+ },
5242
+ {
5243
+ "epoch": 1.24,
5244
+ "learning_rate": 6.131879743067948e-06,
5245
+ "loss": 0.7473,
5246
+ "step": 849000
5247
+ },
5248
+ {
5249
+ "epoch": 1.24,
5250
+ "learning_rate": 6.0528604499385185e-06,
5251
+ "loss": 0.7263,
5252
+ "step": 850000
5253
+ },
5254
+ {
5255
+ "epoch": 1.24,
5256
+ "eval_accuracy": 0.8377542903972233,
5257
+ "eval_loss": 0.7093546986579895,
5258
+ "eval_runtime": 11.2849,
5259
+ "eval_samples_per_second": 443.071,
5260
+ "eval_steps_per_second": 0.886,
5261
+ "step": 850000
5262
  }
5263
  ],
5264
  "max_steps": 1000000,
5265
  "num_train_epochs": 9223372036854775807,
5266
+ "total_flos": 5.727433420305334e+19,
5267
  "trial_name": null,
5268
  "trial_params": null
5269
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ff9883c9f2807991733524fa8d970f3a557eb99e05c82b4c6739c1d5066cc9f
3
  size 3503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25faf06c9975aea2aa997e353bc16bb59f3eabd76c81f0c49c46d732c973aeb
3
  size 3503
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9308899e7cf9b42f3d67f27af3fc47d5047d1474ee940ca97311078cf54325b8
3
  size 442678571
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157afe62666c333a6a822593f8ac020489c93154c07c70eed8f687050fb132b0
3
  size 442678571
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ff9883c9f2807991733524fa8d970f3a557eb99e05c82b4c6739c1d5066cc9f
3
  size 3503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25faf06c9975aea2aa997e353bc16bb59f3eabd76c81f0c49c46d732c973aeb
3
  size 3503