Model save
Browse files- pytorch_model.bin +1 -1
- wandb/run-20220203_135844-2tzexn1o/files/config.yaml +20 -0
- wandb/run-20220203_135844-2tzexn1o/files/output.log +449 -0
- wandb/run-20220203_135844-2tzexn1o/files/wandb-summary.json +0 -0
- wandb/run-20220203_135844-2tzexn1o/logs/debug-internal.log +0 -0
- wandb/run-20220203_135844-2tzexn1o/run-2tzexn1o.wandb +2 -2
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262112241
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe30ae12c53d8f58e684006bd1c1927acfc8a0a8ffea0b10a29486e0564b7ed8
|
3 |
size 1262112241
|
wandb/run-20220203_135844-2tzexn1o/files/config.yaml
CHANGED
@@ -4802,6 +4802,26 @@ _wandb:
|
|
4802 |
5: 1
|
4803 |
6:
|
4804 |
- 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4805 |
python_version: 3.8.8
|
4806 |
start_time: 1643896724
|
4807 |
t:
|
|
|
4802 |
5: 1
|
4803 |
6:
|
4804 |
- 1
|
4805 |
+
- 1: train/train_runtime
|
4806 |
+
5: 1
|
4807 |
+
6:
|
4808 |
+
- 1
|
4809 |
+
- 1: train/train_samples_per_second
|
4810 |
+
5: 1
|
4811 |
+
6:
|
4812 |
+
- 1
|
4813 |
+
- 1: train/train_steps_per_second
|
4814 |
+
5: 1
|
4815 |
+
6:
|
4816 |
+
- 1
|
4817 |
+
- 1: train/total_flos
|
4818 |
+
5: 1
|
4819 |
+
6:
|
4820 |
+
- 1
|
4821 |
+
- 1: train/train_loss
|
4822 |
+
5: 1
|
4823 |
+
6:
|
4824 |
+
- 1
|
4825 |
python_version: 3.8.8
|
4826 |
start_time: 1643896724
|
4827 |
t:
|
wandb/run-20220203_135844-2tzexn1o/files/output.log
CHANGED
@@ -23082,3 +23082,452 @@ Deleting older checkpoint [checkpoint-8000] due to args.save_total_limit
|
|
23082 |
{'eval_loss': inf, 'eval_wer': 0.2171683789697946, 'eval_runtime': 698.574, 'eval_samples_per_second': 22.934, 'eval_steps_per_second': 1.434, 'epoch': 4.87}
|
23083 |
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23084 |
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23082 |
{'eval_loss': inf, 'eval_wer': 0.2171683789697946, 'eval_runtime': 698.574, 'eval_samples_per_second': 22.934, 'eval_steps_per_second': 1.434, 'epoch': 4.87}
|
23083 |
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23084 |
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23085 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23086 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23087 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23088 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23089 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23090 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23091 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23092 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23093 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23094 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23095 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23096 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23097 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23098 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23099 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23100 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23101 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23102 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23103 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23104 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23105 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23106 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23107 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23108 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23109 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23110 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23111 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23112 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23113 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23114 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23115 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23116 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23117 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23118 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23119 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23120 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23121 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23122 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23123 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23124 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23125 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23126 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23127 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23128 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23129 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23130 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23131 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23132 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23133 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23134 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23135 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23136 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23137 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23138 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23139 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23140 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23141 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23142 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23143 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23144 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23145 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23146 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23147 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23148 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23149 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23150 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23151 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23152 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23153 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23154 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23155 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23156 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23157 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23158 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23159 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23160 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23161 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23162 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23163 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23164 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23165 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23166 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23167 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23168 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23169 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23170 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23171 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23172 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23173 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23174 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23175 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23176 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23177 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23178 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23179 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23180 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23181 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23182 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23183 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23184 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23185 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23186 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23187 |
+
{'loss': 0.8419, 'learning_rate': 1.7001295336787564e-06, 'epoch': 4.9}
|
23188 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23189 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23190 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23191 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23192 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23193 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23194 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23195 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23196 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23197 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23198 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23199 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23200 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23201 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23202 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23203 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23204 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23205 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23206 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23207 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23208 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23209 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23210 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23211 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23212 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23213 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23214 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23215 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23216 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23217 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23218 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23219 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23220 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23221 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23222 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23223 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23224 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23225 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23226 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23227 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23228 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23229 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23230 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23231 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23232 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23233 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23234 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23235 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23236 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23237 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23238 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23239 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23240 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23241 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23242 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23243 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23244 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23245 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23246 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23247 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23248 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23249 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23250 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23251 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23252 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23253 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23254 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23255 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23256 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23257 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23258 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23259 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23260 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23261 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23262 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23263 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23264 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23265 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23266 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23267 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23268 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23269 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23270 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23271 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23272 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23273 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23274 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23275 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23276 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23277 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23278 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23279 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23280 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23281 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23282 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23283 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23284 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23285 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23286 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23287 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23288 |
+
{'loss': 0.8392, 'learning_rate': 1.2143782383419686e-06, 'epoch': 4.93}
|
23289 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23290 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23291 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23292 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23293 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23294 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23295 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23296 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23297 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23298 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23299 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23300 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23301 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23302 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23303 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23304 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23305 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23306 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23307 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23308 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23309 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23310 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23311 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23312 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23313 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23314 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23315 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23316 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23317 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23318 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23319 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23320 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23321 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23322 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23323 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23324 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23325 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23326 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23327 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23328 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23329 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23330 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23331 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23332 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23333 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23334 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23335 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23336 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23337 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23338 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23339 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23340 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23341 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23342 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23343 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23344 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23345 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23346 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23347 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23348 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23349 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23350 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23351 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23352 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23353 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23354 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23355 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23356 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23357 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23358 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23359 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23360 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23361 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23362 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23363 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23364 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23365 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23366 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23367 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23368 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23369 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23370 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23371 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23372 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23373 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23374 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23375 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23376 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23377 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23378 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23379 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23380 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23381 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23382 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23383 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23384 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23385 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23386 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23387 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23388 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23389 |
+
{'loss': 0.8369, 'learning_rate': 7.286269430051813e-07, 'epoch': 4.96}
|
23390 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23391 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23392 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23393 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23394 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23395 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23396 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23397 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23398 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23399 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23400 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23401 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23402 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23403 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23404 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23405 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23406 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23407 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23408 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23409 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23410 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23411 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23412 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23413 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23414 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23415 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23416 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23417 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23418 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23419 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23420 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23421 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23422 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23423 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23424 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23425 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23426 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23427 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23428 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23429 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23430 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23431 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23432 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23433 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23434 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23435 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23436 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23437 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23438 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23439 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23440 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23441 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23442 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23443 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23444 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23445 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23446 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23447 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23448 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23449 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23450 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23451 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23452 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23453 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23454 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23455 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23456 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23457 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23458 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23459 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23460 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23461 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23462 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23463 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23464 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23465 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23466 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23467 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23468 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23469 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23470 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23471 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23472 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23473 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23474 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23475 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23476 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23477 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23478 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23479 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23480 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23481 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23482 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23483 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23484 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23485 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23486 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23487 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23488 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23489 |
+
{'loss': 0.8428, 'learning_rate': 2.4287564766839375e-07, 'epoch': 4.99}
|
23490 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23491 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23492 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23493 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23494 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23495 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23496 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23497 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23498 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23499 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23500 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23501 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23502 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23503 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23504 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23505 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23506 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23507 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23508 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23509 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23510 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23511 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23512 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23513 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23514 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23515 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23516 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23517 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23518 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23519 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23520 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23521 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23522 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23523 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23524 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23525 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23526 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23527 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23528 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23529 |
+
Batch size = 16β | 81/1002 [00:50<10:06, 1.52it/s]
|
23530 |
+
Saving model checkpoint to ./ | 81/1002 [00:50<10:06, 1.52it/s]
|
23531 |
+
Saving model checkpoint to ./ | 81/1002 [00:50<10:06, 1.52it/s]
|
23532 |
+
{'train_runtime': 143109.8834, 'train_samples_per_second': 15.599, 'train_steps_per_second': 0.122, 'train_loss': 1.187884036554109, 'epoch': 5.0}
|
23533 |
+
Saving model checkpoint to ./ | 81/1002 [00:50<10:06, 1.52it/s]
|
wandb/run-20220203_135844-2tzexn1o/files/wandb-summary.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20220203_135844-2tzexn1o/logs/debug-internal.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20220203_135844-2tzexn1o/run-2tzexn1o.wandb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16778d7647e5649a67b93699fa64a7d5752d469690253f71dd4aecd2ab1af7de
|
3 |
+
size 131078471
|