Uploaded model

Browse files

Files changed (6) hide show

dev.tsv +0 -0
final-model.pt +3 -0
loss.tsv +11 -0
test.tsv +0 -0
training.log +499 -0
weights.txt +0 -0

dev.tsv ADDED Viewed

The diff for this file is too large to render. See raw diff

final-model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc3208dc8d7d34302e550643da037c4e08e941bd59cfe33ec4d4792c5d0bcb61
+size 442654125

loss.tsv ADDED Viewed

	@@ -0,0 +1,11 @@

+EPOCH	TIMESTAMP	BAD_EPOCHS	LEARNING_RATE	TRAIN_LOSS	DEV_LOSS	DEV_PRECISION	DEV_RECALL	DEV_F1	DEV_ACCURACY
+1	13:32:00	4	0.0001	0.3934547606673132	0.038586683571338654	0.759	0.8903	0.8195	0.7139
+2	14:47:41	4	0.0000	0.1352322201632861	0.015217592008411884	0.9081	0.9248	0.9164	0.8626
+3	16:03:24	4	0.0000	0.10858782178342327	0.015040190890431404	0.9266	0.9286	0.9276	0.879
+4	17:18:55	4	0.0000	0.0878958630160346	0.015710221603512764	0.9289	0.9327	0.9308	0.8838
+5	18:33:23	4	0.0000	0.07165857778550887	0.017801353707909584	0.9277	0.9361	0.9319	0.8864
+6	19:48:52	4	0.0000	0.05868402400697055	0.018429730087518692	0.9306	0.9438	0.9371	0.8922
+7	21:04:47	4	0.0000	0.049209113448846445	0.02109825611114502	0.9344	0.938	0.9362	0.8926
+8	22:20:46	4	0.0000	0.042763134030078184	0.02112417109310627	0.9347	0.9446	0.9396	0.8985
+9	23:35:14	4	0.0000	0.03838577379283954	0.02171432413160801	0.9391	0.9446	0.9419	0.9008
+10	00:49:11	4	0.0000	0.0361115163669216	0.023424603044986725	0.9389	0.9444	0.9417	0.9019

test.tsv ADDED Viewed

The diff for this file is too large to render. See raw diff

training.log ADDED Viewed

	@@ -0,0 +1,499 @@

+2022-02-04 12:18:14,159 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:18:14,161 Model: "SequenceTagger(
+  (embeddings): TransformerWordEmbeddings(
+    (model): CamembertModel(
+      (embeddings): RobertaEmbeddings(
+        (word_embeddings): Embedding(32005, 768, padding_idx=1)
+        (position_embeddings): Embedding(514, 768, padding_idx=1)
+        (token_type_embeddings): Embedding(1, 768)
+        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (encoder): RobertaEncoder(
+        (layer): ModuleList(
+          (0): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (1): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (2): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (3): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (4): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (5): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (6): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (7): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (8): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (9): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (10): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (11): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+        )
+      )
+      (pooler): RobertaPooler(
+        (dense): Linear(in_features=768, out_features=768, bias=True)
+        (activation): Tanh()
+      )
+    )
+  )
+  (word_dropout): WordDropout(p=0.05)
+  (locked_dropout): LockedDropout(p=0.5)
+  (linear): Linear(in_features=768, out_features=18, bias=True)
+  (beta): 1.0
+  (weights): None
+  (weight_tensor) None
+)"
+2022-02-04 12:18:14,167 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:18:14,167 Corpus: "Corpus: 126973 train + 7037 dev + 7090 test sentences"
+2022-02-04 12:18:14,167 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:18:14,167 Parameters:
+2022-02-04 12:18:14,167  - learning_rate: "5e-05"
+2022-02-04 12:18:14,167  - mini_batch_size: "16"
+2022-02-04 12:18:14,167  - patience: "3"
+2022-02-04 12:18:14,167  - anneal_factor: "0.5"
+2022-02-04 12:18:14,167  - max_epochs: "10"
+2022-02-04 12:18:14,167  - shuffle: "True"
+2022-02-04 12:18:14,167  - train_with_dev: "False"
+2022-02-04 12:18:14,167  - batch_growth_annealing: "False"
+2022-02-04 12:18:14,167 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:18:14,167 Model training base path: "resources/taggers/ner-camembert"
+2022-02-04 12:18:14,167 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:18:14,167 Device: cuda:0
+2022-02-04 12:18:14,167 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:18:14,167 Embeddings storage mode: none
+2022-02-04 12:18:14,170 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:25:23,397 epoch 1 - iter 793/7936 - loss 1.64849782 - samples/sec: 29.56 - lr: 0.000005
+2022-02-04 12:33:59,649 epoch 1 - iter 1586/7936 - loss 1.11222779 - samples/sec: 24.58 - lr: 0.000010
+2022-02-04 12:41:09,132 epoch 1 - iter 2379/7936 - loss 0.85257016 - samples/sec: 29.55 - lr: 0.000015
+2022-02-04 12:47:44,896 epoch 1 - iter 3172/7936 - loss 0.71981753 - samples/sec: 32.07 - lr: 0.000020
+2022-02-04 12:55:15,449 epoch 1 - iter 3965/7936 - loss 0.60512907 - samples/sec: 28.16 - lr: 0.000025
+2022-02-04 13:02:35,238 epoch 1 - iter 4758/7936 - loss 0.52903622 - samples/sec: 28.85 - lr: 0.000030
+2022-02-04 13:09:27,012 epoch 1 - iter 5551/7936 - loss 0.48171220 - samples/sec: 30.82 - lr: 0.000035
+2022-02-04 13:15:53,083 epoch 1 - iter 6344/7936 - loss 0.44948661 - samples/sec: 32.87 - lr: 0.000040
+2022-02-04 13:22:02,650 epoch 1 - iter 7137/7936 - loss 0.42228564 - samples/sec: 34.34 - lr: 0.000045
+2022-02-04 13:28:59,445 epoch 1 - iter 7930/7936 - loss 0.39366725 - samples/sec: 30.45 - lr: 0.000050
+2022-02-04 13:29:03,026 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:29:03,028 EPOCH 1 done: loss 0.3935 - lr 0.0000500
+2022-02-04 13:32:00,102 DEV : loss 0.038586683571338654 - f1-score (micro avg)  0.8195
+2022-02-04 13:32:00,155 BAD EPOCHS (no improvement): 4
+2022-02-04 13:32:00,156 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:39:12,612 epoch 2 - iter 793/7936 - loss 0.14931520 - samples/sec: 29.34 - lr: 0.000049
+2022-02-04 13:46:36,550 epoch 2 - iter 1586/7936 - loss 0.14672871 - samples/sec: 28.58 - lr: 0.000049
+2022-02-04 13:53:49,885 epoch 2 - iter 2379/7936 - loss 0.14547274 - samples/sec: 29.28 - lr: 0.000048
+2022-02-04 14:01:13,739 epoch 2 - iter 3172/7936 - loss 0.14418846 - samples/sec: 28.59 - lr: 0.000048
+2022-02-04 14:08:30,985 epoch 2 - iter 3965/7936 - loss 0.14265825 - samples/sec: 29.02 - lr: 0.000047
+2022-02-04 14:15:46,742 epoch 2 - iter 4758/7936 - loss 0.14086599 - samples/sec: 29.12 - lr: 0.000047
+2022-02-04 14:23:11,181 epoch 2 - iter 5551/7936 - loss 0.13927378 - samples/sec: 28.55 - lr: 0.000046
+2022-02-04 14:30:19,706 epoch 2 - iter 6344/7936 - loss 0.13799042 - samples/sec: 29.61 - lr: 0.000046
+2022-02-04 14:37:30,554 epoch 2 - iter 7137/7936 - loss 0.13666296 - samples/sec: 29.45 - lr: 0.000045
+2022-02-04 14:44:52,886 epoch 2 - iter 7930/7936 - loss 0.13525042 - samples/sec: 28.69 - lr: 0.000044
+2022-02-04 14:44:56,060 ----------------------------------------------------------------------------------------------------
+2022-02-04 14:44:56,062 EPOCH 2 done: loss 0.1352 - lr 0.0000444
+2022-02-04 14:47:40,950 DEV : loss 0.015217592008411884 - f1-score (micro avg)  0.9164
+2022-02-04 14:47:41,011 BAD EPOCHS (no improvement): 4
+2022-02-04 14:47:41,014 ----------------------------------------------------------------------------------------------------
+2022-02-04 14:55:04,697 epoch 3 - iter 793/7936 - loss 0.11742558 - samples/sec: 28.60 - lr: 0.000044
+2022-02-04 15:02:16,388 epoch 3 - iter 1586/7936 - loss 0.11679901 - samples/sec: 29.40 - lr: 0.000043
+2022-02-04 15:09:29,924 epoch 3 - iter 2379/7936 - loss 0.11557918 - samples/sec: 29.27 - lr: 0.000043
+2022-02-04 15:16:54,356 epoch 3 - iter 3172/7936 - loss 0.11469700 - samples/sec: 28.55 - lr: 0.000042
+2022-02-04 15:24:11,817 epoch 3 - iter 3965/7936 - loss 0.11351908 - samples/sec: 29.01 - lr: 0.000042
+2022-02-04 15:31:20,620 epoch 3 - iter 4758/7936 - loss 0.11266101 - samples/sec: 29.59 - lr: 0.000041
+2022-02-04 15:38:42,882 epoch 3 - iter 5551/7936 - loss 0.11158730 - samples/sec: 28.69 - lr: 0.000041
+2022-02-04 15:45:50,317 epoch 3 - iter 6344/7936 - loss 0.11067669 - samples/sec: 29.69 - lr: 0.000040
+2022-02-04 15:53:16,035 epoch 3 - iter 7137/7936 - loss 0.10955013 - samples/sec: 28.47 - lr: 0.000039
+2022-02-04 16:00:25,858 epoch 3 - iter 7930/7936 - loss 0.10859645 - samples/sec: 29.52 - lr: 0.000039
+2022-02-04 16:00:29,034 ----------------------------------------------------------------------------------------------------
+2022-02-04 16:00:29,035 EPOCH 3 done: loss 0.1086 - lr 0.0000389
+2022-02-04 16:03:24,201 DEV : loss 0.015040190890431404 - f1-score (micro avg)  0.9276
+2022-02-04 16:03:24,261 BAD EPOCHS (no improvement): 4
+2022-02-04 16:03:24,262 ----------------------------------------------------------------------------------------------------
+2022-02-04 16:10:35,356 epoch 4 - iter 793/7936 - loss 0.09491620 - samples/sec: 29.44 - lr: 0.000038
+2022-02-04 16:17:46,476 epoch 4 - iter 1586/7936 - loss 0.09400900 - samples/sec: 29.43 - lr: 0.000038
+2022-02-04 16:25:10,503 epoch 4 - iter 2379/7936 - loss 0.09355228 - samples/sec: 28.58 - lr: 0.000037
+2022-02-04 16:32:21,829 epoch 4 - iter 3172/7936 - loss 0.09257257 - samples/sec: 29.42 - lr: 0.000037
+2022-02-04 16:39:34,717 epoch 4 - iter 3965/7936 - loss 0.09178491 - samples/sec: 29.31 - lr: 0.000036
+2022-02-04 16:46:54,536 epoch 4 - iter 4758/7936 - loss 0.09102086 - samples/sec: 28.85 - lr: 0.000036
+2022-02-04 16:54:08,674 epoch 4 - iter 5551/7936 - loss 0.09026061 - samples/sec: 29.23 - lr: 0.000035
+2022-02-04 17:01:24,799 epoch 4 - iter 6344/7936 - loss 0.08942621 - samples/sec: 29.10 - lr: 0.000034
+2022-02-04 17:08:44,577 epoch 4 - iter 7137/7936 - loss 0.08868927 - samples/sec: 28.85 - lr: 0.000034
+2022-02-04 17:15:57,678 epoch 4 - iter 7930/7936 - loss 0.08790466 - samples/sec: 29.30 - lr: 0.000033
+2022-02-04 17:16:00,787 ----------------------------------------------------------------------------------------------------
+2022-02-04 17:16:00,790 EPOCH 4 done: loss 0.0879 - lr 0.0000333
+2022-02-04 17:18:55,805 DEV : loss 0.015710221603512764 - f1-score (micro avg)  0.9308
+2022-02-04 17:18:55,865 BAD EPOCHS (no improvement): 4
+2022-02-04 17:18:55,873 ----------------------------------------------------------------------------------------------------
+2022-02-04 17:26:02,969 epoch 5 - iter 793/7936 - loss 0.07683748 - samples/sec: 29.71 - lr: 0.000033
+2022-02-04 17:33:13,355 epoch 5 - iter 1586/7936 - loss 0.07621969 - samples/sec: 29.49 - lr: 0.000032
+2022-02-04 17:40:38,247 epoch 5 - iter 2379/7936 - loss 0.07573593 - samples/sec: 28.52 - lr: 0.000032
+2022-02-04 17:47:40,269 epoch 5 - iter 3172/7936 - loss 0.07524740 - samples/sec: 30.07 - lr: 0.000031
+2022-02-04 17:54:59,036 epoch 5 - iter 3965/7936 - loss 0.07449799 - samples/sec: 28.92 - lr: 0.000031
+2022-02-04 18:02:03,686 epoch 5 - iter 4758/7936 - loss 0.07405311 - samples/sec: 29.88 - lr: 0.000030
+2022-02-04 18:09:11,646 epoch 5 - iter 5551/7936 - loss 0.07340830 - samples/sec: 29.65 - lr: 0.000029
+2022-02-04 18:16:27,240 epoch 5 - iter 6344/7936 - loss 0.07271787 - samples/sec: 29.13 - lr: 0.000029
+2022-02-04 18:23:29,669 epoch 5 - iter 7137/7936 - loss 0.07217288 - samples/sec: 30.04 - lr: 0.000028
+2022-02-04 18:30:30,597 epoch 5 - iter 7930/7936 - loss 0.07166288 - samples/sec: 30.15 - lr: 0.000028
+2022-02-04 18:30:33,919 ----------------------------------------------------------------------------------------------------
+2022-02-04 18:30:33,920 EPOCH 5 done: loss 0.0717 - lr 0.0000278
+2022-02-04 18:33:23,923 DEV : loss 0.017801353707909584 - f1-score (micro avg)  0.9319
+2022-02-04 18:33:23,983 BAD EPOCHS (no improvement): 4
+2022-02-04 18:33:23,983 ----------------------------------------------------------------------------------------------------
+2022-02-04 18:40:28,017 epoch 6 - iter 793/7936 - loss 0.06265627 - samples/sec: 29.93 - lr: 0.000027
+2022-02-04 18:47:46,740 epoch 6 - iter 1586/7936 - loss 0.06168821 - samples/sec: 28.92 - lr: 0.000027
+2022-02-04 18:54:59,429 epoch 6 - iter 2379/7936 - loss 0.06137959 - samples/sec: 29.33 - lr: 0.000026
+2022-02-04 19:02:08,367 epoch 6 - iter 3172/7936 - loss 0.06101991 - samples/sec: 29.58 - lr: 0.000026
+2022-02-04 19:09:34,369 epoch 6 - iter 3965/7936 - loss 0.06073221 - samples/sec: 28.45 - lr: 0.000025
+2022-02-04 19:16:53,646 epoch 6 - iter 4758/7936 - loss 0.06031513 - samples/sec: 28.89 - lr: 0.000024
+2022-02-04 19:24:05,427 epoch 6 - iter 5551/7936 - loss 0.05997466 - samples/sec: 29.39 - lr: 0.000024
+2022-02-04 19:31:27,470 epoch 6 - iter 6344/7936 - loss 0.05952743 - samples/sec: 28.71 - lr: 0.000023
+2022-02-04 19:38:37,449 epoch 6 - iter 7137/7936 - loss 0.05906427 - samples/sec: 29.51 - lr: 0.000023
+2022-02-04 19:46:02,608 epoch 6 - iter 7930/7936 - loss 0.05868560 - samples/sec: 28.51 - lr: 0.000022
+2022-02-04 19:46:05,790 ----------------------------------------------------------------------------------------------------
+2022-02-04 19:46:05,791 EPOCH 6 done: loss 0.0587 - lr 0.0000222
+2022-02-04 19:48:52,058 DEV : loss 0.018429730087518692 - f1-score (micro avg)  0.9371
+2022-02-04 19:48:52,117 BAD EPOCHS (no improvement): 4
+2022-02-04 19:48:52,118 ----------------------------------------------------------------------------------------------------
+2022-02-04 19:56:15,841 epoch 7 - iter 793/7936 - loss 0.05186660 - samples/sec: 28.60 - lr: 0.000022
+2022-02-04 20:03:27,574 epoch 7 - iter 1586/7936 - loss 0.05230029 - samples/sec: 29.39 - lr: 0.000021
+2022-02-04 20:10:42,349 epoch 7 - iter 2379/7936 - loss 0.05178480 - samples/sec: 29.19 - lr: 0.000021
+2022-02-04 20:18:09,822 epoch 7 - iter 3172/7936 - loss 0.05114746 - samples/sec: 28.36 - lr: 0.000020
+2022-02-04 20:25:23,574 epoch 7 - iter 3965/7936 - loss 0.05080701 - samples/sec: 29.26 - lr: 0.000019
+2022-02-04 20:32:39,287 epoch 7 - iter 4758/7936 - loss 0.05039880 - samples/sec: 29.12 - lr: 0.000019
+2022-02-04 20:40:04,807 epoch 7 - iter 5551/7936 - loss 0.05020234 - samples/sec: 28.48 - lr: 0.000018
+2022-02-04 20:47:17,356 epoch 7 - iter 6344/7936 - loss 0.04984342 - samples/sec: 29.34 - lr: 0.000018
+2022-02-04 20:54:31,673 epoch 7 - iter 7137/7936 - loss 0.04955538 - samples/sec: 29.22 - lr: 0.000017
+2022-02-04 21:01:58,187 epoch 7 - iter 7930/7936 - loss 0.04921375 - samples/sec: 28.42 - lr: 0.000017
+2022-02-04 21:02:01,071 ----------------------------------------------------------------------------------------------------
+2022-02-04 21:02:01,071 EPOCH 7 done: loss 0.0492 - lr 0.0000167
+2022-02-04 21:04:47,460 DEV : loss 0.02109825611114502 - f1-score (micro avg)  0.9362
+2022-02-04 21:04:47,519 BAD EPOCHS (no improvement): 4
+2022-02-04 21:04:47,519 ----------------------------------------------------------------------------------------------------
+2022-02-04 21:12:13,992 epoch 8 - iter 793/7936 - loss 0.04468006 - samples/sec: 28.42 - lr: 0.000016
+2022-02-04 21:19:25,811 epoch 8 - iter 1586/7936 - loss 0.04434977 - samples/sec: 29.39 - lr: 0.000016
+2022-02-04 21:26:35,161 epoch 8 - iter 2379/7936 - loss 0.04431108 - samples/sec: 29.56 - lr: 0.000015
+2022-02-04 21:33:55,512 epoch 8 - iter 3172/7936 - loss 0.04408371 - samples/sec: 28.82 - lr: 0.000014
+2022-02-04 21:41:09,449 epoch 8 - iter 3965/7936 - loss 0.04390607 - samples/sec: 29.24 - lr: 0.000014
+2022-02-04 21:48:30,449 epoch 8 - iter 4758/7936 - loss 0.04368218 - samples/sec: 28.77 - lr: 0.000013
+2022-02-04 21:55:47,346 epoch 8 - iter 5551/7936 - loss 0.04350544 - samples/sec: 29.05 - lr: 0.000013
+2022-02-04 22:03:02,107 epoch 8 - iter 6344/7936 - loss 0.04321482 - samples/sec: 29.19 - lr: 0.000012
+2022-02-04 22:10:29,225 epoch 8 - iter 7137/7936 - loss 0.04299359 - samples/sec: 28.38 - lr: 0.000012
+2022-02-04 22:17:46,915 epoch 8 - iter 7930/7936 - loss 0.04275655 - samples/sec: 28.99 - lr: 0.000011
+2022-02-04 22:17:50,251 ----------------------------------------------------------------------------------------------------
+2022-02-04 22:17:50,252 EPOCH 8 done: loss 0.0428 - lr 0.0000111
+2022-02-04 22:20:46,443 DEV : loss 0.02112417109310627 - f1-score (micro avg)  0.9396
+2022-02-04 22:20:46,502 BAD EPOCHS (no improvement): 4
+2022-02-04 22:20:46,502 ----------------------------------------------------------------------------------------------------
+2022-02-04 22:27:54,677 epoch 9 - iter 793/7936 - loss 0.03874630 - samples/sec: 29.64 - lr: 0.000011
+2022-02-04 22:35:07,034 epoch 9 - iter 1586/7936 - loss 0.03916791 - samples/sec: 29.35 - lr: 0.000010
+2022-02-04 22:42:33,861 epoch 9 - iter 2379/7936 - loss 0.03903771 - samples/sec: 28.40 - lr: 0.000009
+2022-02-04 22:49:45,768 epoch 9 - iter 3172/7936 - loss 0.03915089 - samples/sec: 29.38 - lr: 0.000009
+2022-02-04 22:56:49,271 epoch 9 - iter 3965/7936 - loss 0.03903752 - samples/sec: 29.96 - lr: 0.000008
+2022-02-04 23:04:02,033 epoch 9 - iter 4758/7936 - loss 0.03886980 - samples/sec: 29.32 - lr: 0.000008
+2022-02-04 23:11:05,006 epoch 9 - iter 5551/7936 - loss 0.03870274 - samples/sec: 30.00 - lr: 0.000007
+2022-02-04 23:18:05,622 epoch 9 - iter 6344/7936 - loss 0.03860323 - samples/sec: 30.17 - lr: 0.000007
+2022-02-04 23:25:20,470 epoch 9 - iter 7137/7936 - loss 0.03844156 - samples/sec: 29.18 - lr: 0.000006
+2022-02-04 23:32:20,810 epoch 9 - iter 7930/7936 - loss 0.03839073 - samples/sec: 30.19 - lr: 0.000006
+2022-02-04 23:32:23,941 ----------------------------------------------------------------------------------------------------
+2022-02-04 23:32:23,942 EPOCH 9 done: loss 0.0384 - lr 0.0000056
+2022-02-04 23:35:14,351 DEV : loss 0.02171432413160801 - f1-score (micro avg)  0.9419
+2022-02-04 23:35:14,411 BAD EPOCHS (no improvement): 4
+2022-02-04 23:35:14,412 ----------------------------------------------------------------------------------------------------
+2022-02-04 23:42:16,230 epoch 10 - iter 793/7936 - loss 0.03646154 - samples/sec: 30.08 - lr: 0.000005
+2022-02-04 23:49:27,305 epoch 10 - iter 1586/7936 - loss 0.03635515 - samples/sec: 29.44 - lr: 0.000004
+2022-02-04 23:56:27,850 epoch 10 - iter 2379/7936 - loss 0.03662968 - samples/sec: 30.17 - lr: 0.000004
+2022-02-05 00:03:30,598 epoch 10 - iter 3172/7936 - loss 0.03640152 - samples/sec: 30.02 - lr: 0.000003
+2022-02-05 00:10:46,058 epoch 10 - iter 3965/7936 - loss 0.03636994 - samples/sec: 29.14 - lr: 0.000003
+2022-02-05 00:17:50,999 epoch 10 - iter 4758/7936 - loss 0.03636800 - samples/sec: 29.86 - lr: 0.000002
+2022-02-05 00:24:51,167 epoch 10 - iter 5551/7936 - loss 0.03625499 - samples/sec: 30.20 - lr: 0.000002
+2022-02-05 00:32:07,970 epoch 10 - iter 6344/7936 - loss 0.03625737 - samples/sec: 29.05 - lr: 0.000001
+2022-02-05 00:39:14,867 epoch 10 - iter 7137/7936 - loss 0.03618156 - samples/sec: 29.73 - lr: 0.000001
+2022-02-05 00:46:17,991 epoch 10 - iter 7930/7936 - loss 0.03611184 - samples/sec: 29.99 - lr: 0.000000
+2022-02-05 00:46:21,120 ----------------------------------------------------------------------------------------------------
+2022-02-05 00:46:21,123 EPOCH 10 done: loss 0.0361 - lr 0.0000000
+2022-02-05 00:49:11,421 DEV : loss 0.023424603044986725 - f1-score (micro avg)  0.9417
+2022-02-05 00:49:11,486 BAD EPOCHS (no improvement): 4
+2022-02-05 00:49:12,641 ----------------------------------------------------------------------------------------------------
+2022-02-05 00:49:12,643 Testing using last state of model ...
+2022-02-05 00:52:03,154 0.9303	0.9309	0.9306	0.8856
+2022-02-05 00:52:03,155
+Results:
+- F-score (micro) 0.9306
+- F-score (macro) 0.9057
+- Accuracy 0.8856
+By class:
+              precision    recall  f1-score   support
+        pers     0.9373    0.9236    0.9304      2734
+         loc     0.9140    0.9371    0.9254      1384
+      amount     0.9840    0.9840    0.9840       250
+        time     0.9447    0.9407    0.9427       236
+        func     0.9209    0.9143    0.9176       140
+         org     0.8364    0.9388    0.8846        49
+        prod     0.7742    0.8889    0.8276        27
+       event     0.8333    0.8333    0.8333        12
+   micro avg     0.9303    0.9309    0.9306      4832
+   macro avg     0.8931    0.9201    0.9057      4832
+weighted avg     0.9307    0.9309    0.9307      4832
+ samples avg     0.8856    0.8856    0.8856      4832
+2022-02-05 00:52:03,155 ----------------------------------------------------------------------------------------------------

weights.txt ADDED Viewed

File without changes