Plim commited on
Commit
d6f309b
β€’
1 Parent(s): da04edc

Model save

Browse files
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c1f086a24d555df97dfb5f6d946142c487382a19b667581f20596eb7a64346a
3
  size 1262112241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe30ae12c53d8f58e684006bd1c1927acfc8a0a8ffea0b10a29486e0564b7ed8
3
  size 1262112241
wandb/run-20220203_135844-2tzexn1o/files/config.yaml CHANGED
@@ -4802,6 +4802,26 @@ _wandb:
4802
  5: 1
4803
  6:
4804
  - 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4805
  python_version: 3.8.8
4806
  start_time: 1643896724
4807
  t:
 
4802
  5: 1
4803
  6:
4804
  - 1
4805
+ - 1: train/train_runtime
4806
+ 5: 1
4807
+ 6:
4808
+ - 1
4809
+ - 1: train/train_samples_per_second
4810
+ 5: 1
4811
+ 6:
4812
+ - 1
4813
+ - 1: train/train_steps_per_second
4814
+ 5: 1
4815
+ 6:
4816
+ - 1
4817
+ - 1: train/total_flos
4818
+ 5: 1
4819
+ 6:
4820
+ - 1
4821
+ - 1: train/train_loss
4822
+ 5: 1
4823
+ 6:
4824
+ - 1
4825
  python_version: 3.8.8
4826
  start_time: 1643896724
4827
  t:
wandb/run-20220203_135844-2tzexn1o/files/output.log CHANGED
@@ -23082,3 +23082,452 @@ Deleting older checkpoint [checkpoint-8000] due to args.save_total_limit
23082
  {'eval_loss': inf, 'eval_wer': 0.2171683789697946, 'eval_runtime': 698.574, 'eval_samples_per_second': 22.934, 'eval_steps_per_second': 1.434, 'epoch': 4.87}
23083
  Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23084
  Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23082
  {'eval_loss': inf, 'eval_wer': 0.2171683789697946, 'eval_runtime': 698.574, 'eval_samples_per_second': 22.934, 'eval_steps_per_second': 1.434, 'epoch': 4.87}
23083
  Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23084
  Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23085
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23086
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23087
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23088
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23089
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23090
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23091
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23092
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23093
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23094
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23095
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23096
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23097
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23098
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23099
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23100
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23101
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23102
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23103
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23104
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23105
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23106
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23107
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23108
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23109
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23110
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23111
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23112
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23113
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23114
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23115
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23116
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23117
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23118
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23119
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23120
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23121
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23122
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23123
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23124
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23125
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23126
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23127
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23128
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23129
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23130
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23131
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23132
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23133
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23134
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23135
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23136
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23137
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23138
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23139
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23140
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23141
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23142
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23143
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23144
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23145
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23146
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23147
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23148
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23149
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23150
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23151
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23152
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23153
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23154
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23155
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23156
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23157
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23158
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23159
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23160
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23161
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23162
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23163
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23164
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23165
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23166
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23167
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23168
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23169
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23170
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23171
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23172
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23173
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23174
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23175
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23176
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23177
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23178
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23179
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23180
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23181
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23182
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23183
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23184
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23185
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23186
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23187
+ {'loss': 0.8419, 'learning_rate': 1.7001295336787564e-06, 'epoch': 4.9}
23188
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23189
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23190
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23191
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23192
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23193
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23194
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23195
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23196
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23197
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23198
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23199
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23200
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23201
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23202
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23203
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23204
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23205
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23206
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23207
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23208
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23209
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23210
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23211
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23212
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23213
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23214
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23215
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23216
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23217
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23218
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23219
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23220
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23221
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23222
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23223
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23224
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23225
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23226
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23227
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23228
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23229
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23230
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23231
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23232
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23233
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23234
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23235
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23236
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23237
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23238
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23239
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23240
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23241
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23242
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23243
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23244
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23245
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23246
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23247
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23248
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23249
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23250
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23251
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23252
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23253
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23254
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23255
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23256
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23257
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23258
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23259
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23260
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23261
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23262
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23263
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23264
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23265
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23266
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23267
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23268
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23269
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23270
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23271
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23272
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23273
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23274
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23275
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23276
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23277
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23278
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23279
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23280
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23281
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23282
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23283
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23284
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23285
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23286
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23287
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23288
+ {'loss': 0.8392, 'learning_rate': 1.2143782383419686e-06, 'epoch': 4.93}
23289
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23290
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23291
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23292
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23293
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23294
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23295
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23296
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23297
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23298
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23299
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23300
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23301
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23302
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23303
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23304
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23305
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23306
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23307
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23308
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23309
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23310
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23311
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23312
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23313
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23314
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23315
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23316
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23317
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23318
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23319
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23320
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23321
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23322
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23323
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23324
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23325
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23326
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23327
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23328
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23329
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23330
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23331
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23332
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23333
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23334
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23335
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23336
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23337
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23338
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23339
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23340
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23341
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23342
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23343
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23344
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23345
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23346
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23347
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23348
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23349
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23350
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23351
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23352
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23353
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23354
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23355
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23356
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23357
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23358
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23359
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23360
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23361
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23362
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23363
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23364
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23365
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23366
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23367
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23368
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23369
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23370
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23371
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23372
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23373
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23374
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23375
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23376
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23377
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23378
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23379
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23380
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23381
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23382
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23383
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23384
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23385
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23386
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23387
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23388
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23389
+ {'loss': 0.8369, 'learning_rate': 7.286269430051813e-07, 'epoch': 4.96}
23390
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23391
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23392
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23393
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23394
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23395
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23396
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23397
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23398
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23399
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23400
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23401
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23402
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23403
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23404
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23405
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23406
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23407
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23408
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23409
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23410
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23411
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23412
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23413
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23414
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23415
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23416
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23417
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23418
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23419
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23420
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23421
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23422
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23423
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23424
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23425
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23426
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23427
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23428
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23429
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23430
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23431
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23432
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23433
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23434
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23435
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23436
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23437
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23438
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23439
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23440
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23441
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23442
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23443
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23444
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23445
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23446
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23447
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23448
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23449
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23450
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23451
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23452
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23453
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23454
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23455
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23456
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23457
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23458
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23459
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23460
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23461
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23462
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23463
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23464
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23465
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23466
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23467
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23468
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23469
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23470
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23471
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23472
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23473
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23474
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23475
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23476
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23477
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23478
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23479
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23480
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23481
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23482
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23483
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23484
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23485
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23486
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23487
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23488
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23489
+ {'loss': 0.8428, 'learning_rate': 2.4287564766839375e-07, 'epoch': 4.99}
23490
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23491
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23492
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23493
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23494
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23495
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23496
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23497
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23498
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23499
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23500
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23501
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23502
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23503
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23504
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23505
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23506
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23507
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23508
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23509
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23510
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23511
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23512
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23513
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23514
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23515
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23516
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23517
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23518
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23519
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23520
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23521
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23522
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23523
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23524
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23525
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23526
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23527
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23528
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23529
+ Batch size = 16β–Œ | 81/1002 [00:50<10:06, 1.52it/s]
23530
+ Saving model checkpoint to ./ | 81/1002 [00:50<10:06, 1.52it/s]
23531
+ Saving model checkpoint to ./ | 81/1002 [00:50<10:06, 1.52it/s]
23532
+ {'train_runtime': 143109.8834, 'train_samples_per_second': 15.599, 'train_steps_per_second': 0.122, 'train_loss': 1.187884036554109, 'epoch': 5.0}
23533
+ Saving model checkpoint to ./ | 81/1002 [00:50<10:06, 1.52it/s]
wandb/run-20220203_135844-2tzexn1o/files/wandb-summary.json CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220203_135844-2tzexn1o/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220203_135844-2tzexn1o/run-2tzexn1o.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df43e16930eef95e41e7cf712b63a18e8452faa8aa0d378091af232373226149
3
- size 127201204
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16778d7647e5649a67b93699fa64a7d5752d469690253f71dd4aecd2ab1af7de
3
+ size 131078471