Rodrigo1771 commited on
Commit
9dedde0
·
verified ·
1 Parent(s): a817870

Training in progress, epoch 1

Browse files
README.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: michiyasunaga/BioLinkBERT-base
4
+ tags:
5
+ - token-classification
6
+ - generated_from_trainer
7
+ datasets:
8
+ - Rodrigo1771/drugtemist-en-ner
9
+ metrics:
10
+ - precision
11
+ - recall
12
+ - f1
13
+ - accuracy
14
+ model-index:
15
+ - name: output
16
+ results:
17
+ - task:
18
+ name: Token Classification
19
+ type: token-classification
20
+ dataset:
21
+ name: Rodrigo1771/drugtemist-en-ner
22
+ type: Rodrigo1771/drugtemist-en-ner
23
+ config: DrugTEMIST English NER
24
+ split: validation
25
+ args: DrugTEMIST English NER
26
+ metrics:
27
+ - name: Precision
28
+ type: precision
29
+ value: 0.9327102803738317
30
+ - name: Recall
31
+ type: recall
32
+ value: 0.9301025163094129
33
+ - name: F1
34
+ type: f1
35
+ value: 0.9314045730284647
36
+ - name: Accuracy
37
+ type: accuracy
38
+ value: 0.9986953367008066
39
+ ---
40
+
41
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
42
+ should probably proofread and complete it, then remove this comment. -->
43
+
44
+ # output
45
+
46
+ This model is a fine-tuned version of [michiyasunaga/BioLinkBERT-base](https://huggingface.co/michiyasunaga/BioLinkBERT-base) on the Rodrigo1771/drugtemist-en-ner dataset.
47
+ It achieves the following results on the evaluation set:
48
+ - Loss: 0.0056
49
+ - Precision: 0.9327
50
+ - Recall: 0.9301
51
+ - F1: 0.9314
52
+ - Accuracy: 0.9987
53
+
54
+ ## Model description
55
+
56
+ More information needed
57
+
58
+ ## Intended uses & limitations
59
+
60
+ More information needed
61
+
62
+ ## Training and evaluation data
63
+
64
+ More information needed
65
+
66
+ ## Training procedure
67
+
68
+ ### Training hyperparameters
69
+
70
+ The following hyperparameters were used during training:
71
+ - learning_rate: 5e-05
72
+ - train_batch_size: 32
73
+ - eval_batch_size: 8
74
+ - seed: 42
75
+ - gradient_accumulation_steps: 2
76
+ - total_train_batch_size: 64
77
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
78
+ - lr_scheduler_type: linear
79
+ - num_epochs: 10.0
80
+
81
+ ### Training results
82
+
83
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
84
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
85
+ | No log | 1.0 | 434 | 0.0057 | 0.8938 | 0.8938 | 0.8938 | 0.9981 |
86
+ | 0.0182 | 2.0 | 868 | 0.0044 | 0.9024 | 0.9301 | 0.9160 | 0.9985 |
87
+ | 0.0039 | 3.0 | 1302 | 0.0045 | 0.9129 | 0.9282 | 0.9205 | 0.9987 |
88
+ | 0.0024 | 4.0 | 1736 | 0.0051 | 0.8821 | 0.9348 | 0.9077 | 0.9983 |
89
+ | 0.0017 | 5.0 | 2170 | 0.0057 | 0.9251 | 0.9320 | 0.9285 | 0.9986 |
90
+ | 0.0012 | 6.0 | 2604 | 0.0061 | 0.9001 | 0.9236 | 0.9117 | 0.9984 |
91
+ | 0.0009 | 7.0 | 3038 | 0.0056 | 0.9327 | 0.9301 | 0.9314 | 0.9987 |
92
+ | 0.0009 | 8.0 | 3472 | 0.0068 | 0.9118 | 0.9348 | 0.9231 | 0.9986 |
93
+ | 0.0006 | 9.0 | 3906 | 0.0072 | 0.9267 | 0.9310 | 0.9289 | 0.9987 |
94
+ | 0.0004 | 10.0 | 4340 | 0.0073 | 0.9192 | 0.9329 | 0.9260 | 0.9986 |
95
+
96
+
97
+ ### Framework versions
98
+
99
+ - Transformers 4.42.4
100
+ - Pytorch 2.4.0+cu121
101
+ - Datasets 2.21.0
102
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9986953367008066,
4
+ "eval_f1": 0.9314045730284647,
5
+ "eval_loss": 0.005624314770102501,
6
+ "eval_precision": 0.9327102803738317,
7
+ "eval_recall": 0.9301025163094129,
8
+ "eval_runtime": 13.3976,
9
+ "eval_samples": 6946,
10
+ "eval_samples_per_second": 518.45,
11
+ "eval_steps_per_second": 64.862,
12
+ "predict_accuracy": 0.9986842934577083,
13
+ "predict_f1": 0.9213546039742514,
14
+ "predict_loss": 0.005766334943473339,
15
+ "predict_precision": 0.8892490545651,
16
+ "predict_recall": 0.9558652729384437,
17
+ "predict_runtime": 26.2719,
18
+ "predict_samples_per_second": 560.104,
19
+ "predict_steps_per_second": 70.037,
20
+ "total_flos": 1.0996932656642544e+16,
21
+ "train_loss": 0.003382195293697344,
22
+ "train_runtime": 1039.0596,
23
+ "train_samples": 27768,
24
+ "train_samples_per_second": 267.242,
25
+ "train_steps_per_second": 4.177
26
+ }
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "IVN-RIN/bioBIT",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "ner",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "B-FARMACO",
15
+ "2": "I-FARMACO"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "B-FARMACO": 1,
21
+ "I-FARMACO": 2,
22
+ "O": 0
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.42.4",
33
+ "type_vocab_size": 2,
34
+ "use_cache": true,
35
+ "vocab_size": 31102
36
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9986953367008066,
4
+ "eval_f1": 0.9314045730284647,
5
+ "eval_loss": 0.005624314770102501,
6
+ "eval_precision": 0.9327102803738317,
7
+ "eval_recall": 0.9301025163094129,
8
+ "eval_runtime": 13.3976,
9
+ "eval_samples": 6946,
10
+ "eval_samples_per_second": 518.45,
11
+ "eval_steps_per_second": 64.862
12
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6044a275de8e32386a79764ea656c47179fefbc2ae788938459fb63a7ae8ef30
3
+ size 437380924
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.9986842934577083,
3
+ "predict_f1": 0.9213546039742514,
4
+ "predict_loss": 0.005766334943473339,
5
+ "predict_precision": 0.8892490545651,
6
+ "predict_recall": 0.9558652729384437,
7
+ "predict_runtime": 26.2719,
8
+ "predict_samples_per_second": 560.104,
9
+ "predict_steps_per_second": 70.037
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tb/events.out.tfevents.1725045346.6b97e535edda.2908.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e576d6fc5f437e9cdba3770a03c7980ab62c145def88b46a7b1d4e68f13bfde9
3
+ size 6546
tb/events.out.tfevents.1725046129.6b97e535edda.6370.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e94268d839c5a3c21781840b052643e729f1bd38c4ed1942a53d09ddd89bfc
3
+ size 12153
tb/events.out.tfevents.1725047358.6b97e535edda.6370.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6babdcbcafd1e6dd843214457a56f608115a0ebceecc401a4e103c0ed26fb36d
3
+ size 560
tb/events.out.tfevents.1725047806.6b97e535edda.13440.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1285ee11483b82cb81d6c4b8c4ba4a32b26fb1a1a6581781e536e31fc708b8
3
+ size 12153
tb/events.out.tfevents.1725049039.6b97e535edda.13440.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f105690f828d701992cc9bf50b2d6c540b9476f5cd4584b9d782e2db879b27e7
3
+ size 560
tb/events.out.tfevents.1725049548.6b97e535edda.20735.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288393d27fba4eca473a2f23089dcbee70efa764ffa6c2e99b2653da55d02bfe
3
+ size 11877
tb/events.out.tfevents.1725050776.6b97e535edda.20735.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:695bf5e0a27bceb5f4eb0bc971d0b2f97725e31e30a7b30d5ffedc4ae0991cc9
3
+ size 560
tb/events.out.tfevents.1725051499.6b97e535edda.28945.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ca4a4ffaf7acbeed39c4d27f30ad7cf515ebde1878f6dc71b3b56750533d7b
3
+ size 11865
tb/events.out.tfevents.1725052726.6b97e535edda.28945.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fb60c8fb42cab9f8aac087aa09811703dd64f9a0e29dac5418ca6f4883dc75
3
+ size 560
tb/events.out.tfevents.1725053057.6b97e535edda.35455.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:060b7fdb33d177fa8187153a2ec8df8664c9f3e11c25faef21bd32c1122357bf
3
+ size 11813
tb/events.out.tfevents.1725054116.6b97e535edda.35455.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04054a8bc0ae5fb6884e73facfa19e982ab0a679c09f6ba3d7275d87e66f1009
3
+ size 560
tb/events.out.tfevents.1725054888.6b97e535edda.43233.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9b3702f91219cedb846bffec9a3484c2046f4badfab204954c56ff47966a71d
3
+ size 5506
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "101": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "102": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "103": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "mask_token": "[MASK]",
49
+ "max_len": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "truncation": true,
58
+ "unk_token": "[UNK]"
59
+ }
train.log ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/4250 [00:00<?, ?it/s]
1
  0%| | 1/4250 [00:01<1:16:38, 1.08s/it]
2
  0%| | 2/4250 [00:01<42:55, 1.65it/s]
3
  0%| | 3/4250 [00:01<31:14, 2.27it/s]
4
  0%| | 4/4250 [00:01<27:28, 2.58it/s]
5
  0%| | 5/4250 [00:02<22:59, 3.08it/s]
6
  0%| | 6/4250 [00:02<20:46, 3.40it/s]
7
  0%| | 7/4250 [00:02<19:37, 3.60it/s]
8
  0%| | 8/4250 [00:02<19:26, 3.64it/s]
9
  0%| | 9/4250 [00:03<19:00, 3.72it/s]
10
  0%| | 10/4250 [00:03<18:07, 3.90it/s]
11
  0%| | 11/4250 [00:03<19:02, 3.71it/s]
12
  0%| | 12/4250 [00:04<24:46, 2.85it/s]
13
  0%| | 13/4250 [00:04<21:06, 3.34it/s]
14
  0%| | 14/4250 [00:04<20:00, 3.53it/s]
15
  0%| | 15/4250 [00:04<18:30, 3.81it/s]
16
  0%| | 16/4250 [00:05<18:06, 3.90it/s]
17
  0%| | 17/4250 [00:05<17:34, 4.01it/s]
18
  0%| | 18/4250 [00:05<18:38, 3.78it/s]
19
  0%| | 19/4250 [00:05<17:09, 4.11it/s]
20
  0%| | 20/4250 [00:06<19:46, 3.56it/s]
21
  0%| | 21/4250 [00:06<18:46, 3.75it/s]
22
  1%| | 22/4250 [00:06<17:49, 3.95it/s]
23
  1%| | 23/4250 [00:06<16:35, 4.25it/s]
24
  1%| | 24/4250 [00:07<16:31, 4.26it/s]
25
  1%| | 25/4250 [00:07<16:19, 4.31it/s]
26
  1%| | 26/4250 [00:07<17:22, 4.05it/s]
27
  1%| | 27/4250 [00:07<17:01, 4.14it/s]
28
  1%| | 28/4250 [00:08<17:18, 4.07it/s]
29
  1%| | 29/4250 [00:08<17:59, 3.91it/s]
30
  1%| | 30/4250 [00:08<17:04, 4.12it/s]
31
  1%| | 31/4250 [00:08<16:47, 4.19it/s]
32
  1%| | 32/4250 [00:08<16:27, 4.27it/s]
33
  1%| | 33/4250 [00:09<17:11, 4.09it/s]
34
  1%| | 34/4250 [00:09<15:30, 4.53it/s]
35
  1%| | 35/4250 [00:09<15:15, 4.60it/s]
36
  1%| | 36/4250 [00:09<15:39, 4.49it/s]
37
  1%| | 37/4250 [00:10<15:49, 4.44it/s]
38
  1%| | 38/4250 [00:10<16:11, 4.34it/s]
39
  1%| | 39/4250 [00:10<15:41, 4.47it/s]
40
  1%| | 40/4250 [00:10<15:55, 4.41it/s]
41
  1%| | 41/4250 [00:10<15:07, 4.64it/s]
42
  1%| | 42/4250 [00:11<14:56, 4.69it/s]
43
  1%| | 43/4250 [00:11<15:59, 4.39it/s]
44
  1%| | 44/4250 [00:11<15:40, 4.47it/s]
45
  1%| | 45/4250 [00:11<15:41, 4.47it/s]
46
  1%| | 46/4250 [00:12<17:35, 3.98it/s]
47
  1%| | 47/4250 [00:12<17:45, 3.94it/s]
48
  1%| | 48/4250 [00:12<16:40, 4.20it/s]
49
  1%| | 49/4250 [00:12<15:58, 4.38it/s]
50
  1%| | 50/4250 [00:13<17:10, 4.08it/s]
51
  1%| | 51/4250 [00:13<18:57, 3.69it/s]
52
  1%| | 52/4250 [00:13<19:58, 3.50it/s]
53
  1%| | 53/4250 [00:14<18:49, 3.72it/s]
54
  1%|▏ | 54/4250 [00:14<17:34, 3.98it/s]
55
  1%|▏ | 55/4250 [00:14<16:51, 4.15it/s]
56
  1%|▏ | 56/4250 [00:14<17:23, 4.02it/s]
57
  1%|▏ | 57/4250 [00:15<21:46, 3.21it/s]
58
  1%|▏ | 58/4250 [00:15<20:01, 3.49it/s]
59
  1%|▏ | 59/4250 [00:15<18:11, 3.84it/s]
60
  1%|▏ | 60/4250 [00:15<20:27, 3.41it/s]
61
  1%|▏ | 61/4250 [00:16<18:31, 3.77it/s]
62
  1%|▏ | 62/4250 [00:16<19:20, 3.61it/s]
63
  1%|▏ | 63/4250 [00:16<18:01, 3.87it/s]
64
  2%|▏ | 64/4250 [00:16<18:11, 3.83it/s]
65
  2%|▏ | 65/4250 [00:17<16:45, 4.16it/s]
66
  2%|▏ | 66/4250 [00:17<17:26, 4.00it/s]
67
  2%|▏ | 67/4250 [00:17<17:29, 3.98it/s]
68
  2%|▏ | 68/4250 [00:17<16:14, 4.29it/s]
69
  2%|▏ | 69/4250 [00:18<16:41, 4.18it/s]
70
  2%|▏ | 70/4250 [00:18<15:52, 4.39it/s]
71
  2%|▏ | 71/4250 [00:18<15:10, 4.59it/s]
72
  2%|▏ | 72/4250 [00:18<15:05, 4.61it/s]
73
  2%|▏ | 73/4250 [00:19<16:51, 4.13it/s]
74
  2%|▏ | 74/4250 [00:19<17:50, 3.90it/s]
75
  2%|▏ | 75/4250 [00:19<16:49, 4.14it/s]
76
  2%|▏ | 76/4250 [00:19<16:36, 4.19it/s]
77
  2%|▏ | 77/4250 [00:19<16:32, 4.21it/s]
78
  2%|▏ | 78/4250 [00:20<16:37, 4.18it/s]
79
  2%|▏ | 79/4250 [00:20<20:19, 3.42it/s]
80
  2%|▏ | 80/4250 [00:21<21:51, 3.18it/s]
81
  2%|▏ | 81/4250 [00:21<20:01, 3.47it/s]
82
  2%|▏ | 82/4250 [00:21<18:23, 3.78it/s]
83
  2%|▏ | 83/4250 [00:21<17:42, 3.92it/s]
84
  2%|▏ | 84/4250 [00:21<17:39, 3.93it/s]
85
  2%|▏ | 85/4250 [00:22<20:45, 3.34it/s]
86
  2%|▏ | 86/4250 [00:22<20:38, 3.36it/s]
87
  2%|▏ | 87/4250 [00:22<19:30, 3.56it/s]
88
  2%|▏ | 88/4250 [00:23<18:15, 3.80it/s]
89
  2%|▏ | 89/4250 [00:23<21:52, 3.17it/s]
90
  2%|▏ | 90/4250 [00:23<19:46, 3.51it/s]
91
  2%|▏ | 91/4250 [00:24<20:54, 3.32it/s]
92
  2%|▏ | 92/4250 [00:24<19:20, 3.58it/s]
93
  2%|▏ | 93/4250 [00:24<19:18, 3.59it/s]
94
  2%|▏ | 94/4250 [00:24<18:39, 3.71it/s]
95
  2%|▏ | 95/4250 [00:25<18:39, 3.71it/s]
96
  2%|▏ | 96/4250 [00:25<18:11, 3.81it/s]
97
  2%|▏ | 97/4250 [00:25<17:50, 3.88it/s]
98
  2%|▏ | 98/4250 [00:25<17:37, 3.93it/s]
99
  2%|▏ | 99/4250 [00:26<15:45, 4.39it/s]
100
  2%|▏ | 100/4250 [00:26<18:10, 3.80it/s]
101
  2%|▏ | 101/4250 [00:26<17:23, 3.98it/s]
102
  2%|▏ | 102/4250 [00:26<17:23, 3.97it/s]
103
  2%|▏ | 103/4250 [00:27<18:38, 3.71it/s]
104
  2%|▏ | 104/4250 [00:27<17:19, 3.99it/s]
105
  2%|▏ | 105/4250 [00:27<17:35, 3.93it/s]
106
  2%|▏ | 106/4250 [00:27<16:50, 4.10it/s]
107
  3%|▎ | 107/4250 [00:28<19:37, 3.52it/s]
108
  3%|▎ | 108/4250 [00:28<17:52, 3.86it/s]
109
  3%|▎ | 109/4250 [00:28<17:14, 4.00it/s]
110
  3%|▎ | 110/4250 [00:28<17:47, 3.88it/s]
111
  3%|▎ | 111/4250 [00:29<16:36, 4.15it/s]
112
  3%|▎ | 112/4250 [00:29<17:14, 4.00it/s]
113
  3%|▎ | 113/4250 [00:29<18:29, 3.73it/s]
114
  3%|▎ | 114/4250 [00:29<17:31, 3.93it/s]
115
  3%|▎ | 115/4250 [00:30<19:28, 3.54it/s]
116
  3%|▎ | 116/4250 [00:30<17:27, 3.95it/s]
117
  3%|▎ | 117/4250 [00:30<16:28, 4.18it/s]
118
  3%|▎ | 118/4250 [00:30<16:04, 4.28it/s]
119
  3%|▎ | 119/4250 [00:31<16:06, 4.28it/s]
120
  3%|▎ | 120/4250 [00:31<16:11, 4.25it/s]
121
  3%|▎ | 121/4250 [00:31<16:50, 4.09it/s]
122
  3%|▎ | 122/4250 [00:31<16:30, 4.17it/s]
123
  3%|▎ | 123/4250 [00:32<15:33, 4.42it/s]
124
  3%|▎ | 124/4250 [00:32<16:36, 4.14it/s]
125
  3%|▎ | 125/4250 [00:32<16:22, 4.20it/s]
126
  3%|▎ | 126/4250 [00:32<15:56, 4.31it/s]
127
  3%|▎ | 127/4250 [00:33<17:35, 3.91it/s]
128
  3%|▎ | 128/4250 [00:33<17:48, 3.86it/s]
129
  3%|▎ | 129/4250 [00:33<18:55, 3.63it/s]
130
  3%|▎ | 130/4250 [00:33<19:53, 3.45it/s]
131
  3%|▎ | 131/4250 [00:34<21:30, 3.19it/s]
132
  3%|▎ | 132/4250 [00:34<18:47, 3.65it/s]
133
  3%|▎ | 133/4250 [00:34<19:03, 3.60it/s]
134
  3%|▎ | 134/4250 [00:35<18:17, 3.75it/s]
135
  3%|▎ | 135/4250 [00:35<18:36, 3.69it/s]
136
  3%|▎ | 136/4250 [00:35<17:18, 3.96it/s]
137
  3%|▎ | 137/4250 [00:35<17:23, 3.94it/s]
138
  3%|▎ | 138/4250 [00:36<16:21, 4.19it/s]
139
  3%|▎ | 139/4250 [00:36<18:04, 3.79it/s]
140
  3%|▎ | 140/4250 [00:36<17:20, 3.95it/s]
141
  3%|▎ | 141/4250 [00:36<17:03, 4.01it/s]
142
  3%|▎ | 142/4250 [00:37<16:21, 4.18it/s]
143
  3%|▎ | 143/4250 [00:37<17:08, 3.99it/s]
144
  3%|▎ | 144/4250 [00:37<16:12, 4.22it/s]
145
  3%|▎ | 145/4250 [00:37<16:06, 4.25it/s]
146
  3%|▎ | 146/4250 [00:37<16:05, 4.25it/s]
147
  3%|▎ | 147/4250 [00:38<16:17, 4.20it/s]
148
  3%|▎ | 148/4250 [00:38<15:01, 4.55it/s]
149
  4%|▎ | 149/4250 [00:38<14:40, 4.66it/s]
150
  4%|▎ | 150/4250 [00:38<15:20, 4.45it/s]
151
  4%|▎ | 151/4250 [00:39<15:14, 4.48it/s]
152
  4%|▎ | 152/4250 [00:39<14:33, 4.69it/s]
153
  4%|▎ | 153/4250 [00:39<14:51, 4.59it/s]
154
  4%|▎ | 154/4250 [00:39<15:37, 4.37it/s]
155
  4%|▎ | 155/4250 [00:39<15:52, 4.30it/s]
156
  4%|▎ | 156/4250 [00:40<16:36, 4.11it/s]
157
  4%|▎ | 157/4250 [00:40<18:32, 3.68it/s]
158
  4%|▎ | 158/4250 [00:40<19:13, 3.55it/s]
159
  4%|▎ | 159/4250 [00:41<18:27, 3.69it/s]
160
  4%|▍ | 160/4250 [00:41<20:59, 3.25it/s]
161
  4%|▍ | 161/4250 [00:41<20:13, 3.37it/s]
162
  4%|▍ | 162/4250 [00:42<18:44, 3.63it/s]
163
  4%|▍ | 163/4250 [00:42<17:18, 3.94it/s]
164
  4%|▍ | 164/4250 [00:42<16:43, 4.07it/s]
165
  4%|▍ | 165/4250 [00:42<17:07, 3.97it/s]
166
  4%|▍ | 166/4250 [00:42<15:52, 4.29it/s]
167
  4%|▍ | 167/4250 [00:43<15:46, 4.31it/s]
168
  4%|▍ | 168/4250 [00:43<15:42, 4.33it/s]
169
  4%|▍ | 169/4250 [00:43<15:19, 4.44it/s]
170
  4%|▍ | 170/4250 [00:43<15:05, 4.51it/s]
171
  4%|▍ | 171/4250 [00:43<14:22, 4.73it/s]
172
  4%|▍ | 172/4250 [00:44<17:40, 3.84it/s]
173
  4%|▍ | 173/4250 [00:44<17:04, 3.98it/s]
174
  4%|▍ | 174/4250 [00:44<16:34, 4.10it/s]
175
  4%|▍ | 175/4250 [00:45<16:43, 4.06it/s]
176
  4%|▍ | 176/4250 [00:45<17:02, 3.98it/s]
177
  4%|▍ | 177/4250 [00:45<15:29, 4.38it/s]
178
  4%|▍ | 178/4250 [00:45<16:23, 4.14it/s]
179
  4%|▍ | 179/4250 [00:46<22:29, 3.02it/s]
180
  4%|▍ | 180/4250 [00:46<21:27, 3.16it/s]
181
  4%|▍ | 181/4250 [00:46<21:39, 3.13it/s]
182
  4%|▍ | 182/4250 [00:47<19:25, 3.49it/s]
183
  4%|▍ | 183/4250 [00:47<17:34, 3.86it/s]
184
  4%|▍ | 184/4250 [00:47<15:59, 4.24it/s]
185
  4%|▍ | 185/4250 [00:47<17:30, 3.87it/s]
186
  4%|▍ | 186/4250 [00:48<16:32, 4.10it/s]
187
  4%|▍ | 187/4250 [00:48<15:54, 4.26it/s]
188
  4%|▍ | 188/4250 [00:48<15:03, 4.50it/s]
189
  4%|▍ | 189/4250 [00:48<16:32, 4.09it/s]
190
  4%|▍ | 190/4250 [00:48<16:20, 4.14it/s]
191
  4%|▍ | 191/4250 [00:49<16:00, 4.23it/s]
192
  5%|▍ | 192/4250 [00:49<20:25, 3.31it/s]
193
  5%|▍ | 193/4250 [00:49<19:38, 3.44it/s]
194
  5%|▍ | 194/4250 [00:50<19:04, 3.54it/s]
195
  5%|▍ | 195/4250 [00:50<19:26, 3.48it/s]
196
  5%|▍ | 196/4250 [00:50<19:43, 3.43it/s]
197
  5%|▍ | 197/4250 [00:51<22:32, 3.00it/s]
198
  5%|▍ | 198/4250 [00:51<20:17, 3.33it/s]
199
  5%|▍ | 199/4250 [00:51<19:04, 3.54it/s]
200
  5%|▍ | 200/4250 [00:51<19:10, 3.52it/s]
201
  5%|▍ | 201/4250 [00:52<17:57, 3.76it/s]
202
  5%|▍ | 202/4250 [00:52<16:58, 3.98it/s]
203
  5%|▍ | 203/4250 [00:52<17:50, 3.78it/s]
204
  5%|▍ | 204/4250 [00:53<18:23, 3.67it/s]
205
  5%|▍ | 205/4250 [00:53<20:11, 3.34it/s]
206
  5%|▍ | 206/4250 [00:54<29:04, 2.32it/s]
207
  5%|▍ | 207/4250 [00:54<25:49, 2.61it/s]
208
  5%|▍ | 208/4250 [00:54<23:49, 2.83it/s]
209
  5%|▍ | 209/4250 [00:54<20:34, 3.27it/s]
210
  5%|▍ | 210/4250 [00:55<22:38, 2.97it/s]
211
  5%|▍ | 211/4250 [00:55<20:50, 3.23it/s]
212
  5%|▍ | 212/4250 [00:56<27:58, 2.41it/s]
213
  5%|▌ | 213/4250 [00:56<23:54, 2.81it/s]
214
  5%|▌ | 214/4250 [00:56<21:29, 3.13it/s]
215
  5%|▌ | 215/4250 [00:56<19:16, 3.49it/s]
216
  5%|▌ | 216/4250 [00:57<17:35, 3.82it/s]
217
  5%|▌ | 217/4250 [00:57<17:54, 3.75it/s]
218
  5%|▌ | 218/4250 [00:57<17:15, 3.89it/s]
219
  5%|▌ | 219/4250 [00:57<21:03, 3.19it/s]
220
  5%|▌ | 220/4250 [00:58<21:20, 3.15it/s]
221
  5%|▌ | 221/4250 [00:58<19:17, 3.48it/s]
222
  5%|▌ | 222/4250 [00:58<18:58, 3.54it/s]
223
  5%|▌ | 223/4250 [00:59<18:23, 3.65it/s]
224
  5%|▌ | 224/4250 [00:59<19:19, 3.47it/s]
225
  5%|▌ | 225/4250 [00:59<17:44, 3.78it/s]
226
  5%|▌ | 226/4250 [00:59<16:41, 4.02it/s]
227
  5%|▌ | 227/4250 [00:59<14:48, 4.53it/s]
228
  5%|▌ | 228/4250 [01:00<14:53, 4.50it/s]
229
  5%|▌ | 229/4250 [01:00<15:46, 4.25it/s]
230
  5%|▌ | 230/4250 [01:00<16:31, 4.05it/s]
231
  5%|▌ | 231/4250 [01:00<15:56, 4.20it/s]
232
  5%|▌ | 232/4250 [01:01<16:04, 4.17it/s]
233
  5%|▌ | 233/4250 [01:01<17:08, 3.90it/s]
234
  6%|▌ | 234/4250 [01:01<16:18, 4.11it/s]
235
  6%|▌ | 235/4250 [01:01<15:32, 4.30it/s]
236
  6%|▌ | 236/4250 [01:02<18:03, 3.70it/s]
237
  6%|▌ | 237/4250 [01:02<17:39, 3.79it/s]
238
  6%|▌ | 238/4250 [01:02<17:04, 3.92it/s]
239
  6%|▌ | 239/4250 [01:02<16:28, 4.06it/s]
240
  6%|▌ | 240/4250 [01:03<16:27, 4.06it/s]
241
  6%|▌ | 241/4250 [01:03<15:43, 4.25it/s]
242
  6%|▌ | 242/4250 [01:03<15:13, 4.39it/s]
243
  6%|▌ | 243/4250 [01:03<16:49, 3.97it/s]
244
  6%|▌ | 244/4250 [01:04<16:32, 4.04it/s]
245
  6%|▌ | 245/4250 [01:04<15:08, 4.41it/s]
246
  6%|▌ | 246/4250 [01:04<15:58, 4.18it/s]
247
  6%|▌ | 247/4250 [01:05<18:58, 3.52it/s]
248
  6%|▌ | 248/4250 [01:05<17:18, 3.85it/s]
249
  6%|▌ | 249/4250 [01:05<17:01, 3.92it/s]
250
  6%|▌ | 250/4250 [01:05<17:03, 3.91it/s]
251
  6%|▌ | 251/4250 [01:05<17:23, 3.83it/s]
252
  6%|▌ | 252/4250 [01:06<16:16, 4.10it/s]
253
  6%|▌ | 253/4250 [01:06<16:01, 4.16it/s]
254
  6%|▌ | 254/4250 [01:06<18:43, 3.56it/s]
255
  6%|▌ | 255/4250 [01:07<17:11, 3.87it/s]
256
  6%|▌ | 256/4250 [01:07<15:48, 4.21it/s]
257
  6%|▌ | 257/4250 [01:07<15:45, 4.22it/s]
258
  6%|▌ | 258/4250 [01:07<15:28, 4.30it/s]
259
  6%|▌ | 259/4250 [01:07<15:16, 4.35it/s]
260
  6%|▌ | 260/4250 [01:08<15:46, 4.22it/s]
261
  6%|▌ | 261/4250 [01:08<16:22, 4.06it/s]
262
  6%|▌ | 262/4250 [01:08<16:58, 3.92it/s]
263
  6%|▌ | 263/4250 [01:08<16:51, 3.94it/s]
264
  6%|▌ | 264/4250 [01:09<17:22, 3.82it/s]
265
  6%|▌ | 265/4250 [01:09<18:33, 3.58it/s]
266
  6%|▋ | 266/4250 [01:09<17:10, 3.87it/s]
267
  6%|▋ | 267/4250 [01:09<16:28, 4.03it/s]
268
  6%|▋ | 268/4250 [01:10<15:55, 4.17it/s]
269
  6%|▋ | 269/4250 [01:10<15:05, 4.39it/s]
270
  6%|▋ | 270/4250 [01:10<15:59, 4.15it/s]
271
  6%|▋ | 271/4250 [01:10<15:23, 4.31it/s]
272
  6%|▋ | 272/4250 [01:11<17:34, 3.77it/s]
273
  6%|▋ | 273/4250 [01:11<17:16, 3.84it/s]
274
  6%|▋ | 274/4250 [01:11<16:56, 3.91it/s]
275
  6%|▋ | 275/4250 [01:11<17:35, 3.77it/s]
276
  6%|▋ | 276/4250 [01:12<16:27, 4.02it/s]
277
  7%|▋ | 277/4250 [01:12<16:17, 4.07it/s]
278
  7%|▋ | 278/4250 [01:12<15:37, 4.24it/s]
279
  7%|▋ | 279/4250 [01:12<14:53, 4.45it/s]
280
  7%|▋ | 280/4250 [01:13<14:28, 4.57it/s]
281
  7%|▋ | 281/4250 [01:13<16:13, 4.08it/s]
282
  7%|▋ | 282/4250 [01:13<15:35, 4.24it/s]
283
  7%|▋ | 283/4250 [01:13<18:01, 3.67it/s]
284
  7%|▋ | 284/4250 [01:14<17:47, 3.72it/s]
285
  7%|▋ | 285/4250 [01:14<15:44, 4.20it/s]
286
  7%|▋ | 286/4250 [01:14<14:33, 4.54it/s]
287
  7%|▋ | 287/4250 [01:14<13:57, 4.73it/s]
288
  7%|▋ | 288/4250 [01:14<14:34, 4.53it/s]
289
  7%|▋ | 289/4250 [01:15<14:38, 4.51it/s]
290
  7%|▋ | 290/4250 [01:15<16:00, 4.12it/s]
291
  7%|▋ | 291/4250 [01:15<16:28, 4.01it/s]
292
  7%|▋ | 292/4250 [01:15<16:07, 4.09it/s]
293
  7%|▋ | 293/4250 [01:16<15:33, 4.24it/s]
294
  7%|▋ | 294/4250 [01:16<17:41, 3.73it/s]
295
  7%|▋ | 295/4250 [01:16<18:00, 3.66it/s]
296
  7%|▋ | 296/4250 [01:17<18:14, 3.61it/s]
297
  7%|▋ | 297/4250 [01:17<16:55, 3.89it/s]
298
  7%|▋ | 298/4250 [01:17<16:34, 3.97it/s]
299
  7%|▋ | 299/4250 [01:17<16:28, 4.00it/s]
300
  7%|▋ | 300/4250 [01:18<16:29, 3.99it/s]
301
  7%|▋ | 301/4250 [01:18<15:50, 4.16it/s]
302
  7%|▋ | 302/4250 [01:18<17:24, 3.78it/s]
303
  7%|▋ | 303/4250 [01:18<16:36, 3.96it/s]
304
  7%|▋ | 304/4250 [01:19<16:51, 3.90it/s]
305
  7%|▋ | 305/4250 [01:19<15:48, 4.16it/s]
306
  7%|▋ | 306/4250 [01:19<16:32, 3.97it/s]
307
  7%|▋ | 307/4250 [01:19<16:51, 3.90it/s]
308
  7%|▋ | 308/4250 [01:20<15:55, 4.13it/s]
309
  7%|▋ | 309/4250 [01:20<17:03, 3.85it/s]
310
  7%|▋ | 310/4250 [01:20<15:39, 4.19it/s]
311
  7%|▋ | 311/4250 [01:20<17:13, 3.81it/s]
312
  7%|▋ | 312/4250 [01:21<16:25, 4.00it/s]
313
  7%|▋ | 313/4250 [01:21<15:07, 4.34it/s]
314
  7%|▋ | 314/4250 [01:21<14:13, 4.61it/s]
315
  7%|▋ | 315/4250 [01:21<14:42, 4.46it/s]
316
  7%|▋ | 316/4250 [01:21<14:47, 4.43it/s]
317
  7%|▋ | 317/4250 [01:22<17:53, 3.67it/s]
318
  7%|▋ | 318/4250 [01:22<16:43, 3.92it/s]
319
  8%|▊ | 319/4250 [01:22<16:30, 3.97it/s]
320
  8%|▊ | 320/4250 [01:23<17:33, 3.73it/s]
321
  8%|▊ | 321/4250 [01:23<16:49, 3.89it/s]
322
  8%|▊ | 322/4250 [01:23<19:39, 3.33it/s]
323
  8%|▊ | 323/4250 [01:23<19:31, 3.35it/s]
324
  8%|▊ | 324/4250 [01:24<20:40, 3.17it/s]
325
  8%|▊ | 325/4250 [01:24<18:45, 3.49it/s]
326
  8%|▊ | 326/4250 [01:24<16:12, 4.04it/s]
327
  8%|▊ | 327/4250 [01:24<15:22, 4.25it/s]
328
  8%|▊ | 328/4250 [01:25<14:59, 4.36it/s]
329
  8%|▊ | 329/4250 [01:25<16:05, 4.06it/s]
330
  8%|▊ | 330/4250 [01:25<15:15, 4.28it/s]
331
  8%|▊ | 331/4250 [01:25<15:50, 4.12it/s]
332
  8%|▊ | 332/4250 [01:26<15:59, 4.08it/s]
333
  8%|▊ | 333/4250 [01:26<15:12, 4.29it/s]
334
  8%|▊ | 334/4250 [01:26<15:06, 4.32it/s]
335
  8%|▊ | 335/4250 [01:26<15:42, 4.15it/s]
336
  8%|▊ | 336/4250 [01:27<16:53, 3.86it/s]
337
  8%|▊ | 337/4250 [01:27<16:14, 4.02it/s]
338
  8%|▊ | 338/4250 [01:27<16:49, 3.87it/s]
339
  8%|▊ | 339/4250 [01:27<16:21, 3.98it/s]
340
  8%|▊ | 340/4250 [01:28<16:10, 4.03it/s]
341
  8%|▊ | 341/4250 [01:28<15:41, 4.15it/s]
342
  8%|▊ | 342/4250 [01:28<14:19, 4.55it/s]
343
  8%|▊ | 343/4250 [01:28<14:23, 4.53it/s]
344
  8%|▊ | 344/4250 [01:28<14:04, 4.62it/s]
345
  8%|▊ | 345/4250 [01:29<14:13, 4.57it/s]
346
  8%|▊ | 346/4250 [01:29<13:44, 4.74it/s]
347
  8%|▊ | 347/4250 [01:29<15:11, 4.28it/s]
348
  8%|▊ | 348/4250 [01:29<15:11, 4.28it/s]
349
  8%|▊ | 349/4250 [01:30<15:11, 4.28it/s]
350
  8%|▊ | 350/4250 [01:30<16:12, 4.01it/s]
351
  8%|▊ | 351/4250 [01:30<15:47, 4.11it/s]
352
  8%|▊ | 352/4250 [01:30<14:58, 4.34it/s]
353
  8%|▊ | 353/4250 [01:31<15:26, 4.20it/s]
354
  8%|▊ | 354/4250 [01:31<15:15, 4.26it/s]
355
  8%|▊ | 355/4250 [01:31<15:28, 4.20it/s]
356
  8%|▊ | 356/4250 [01:31<14:47, 4.39it/s]
357
  8%|▊ | 357/4250 [01:32<15:03, 4.31it/s]
358
  8%|▊ | 358/4250 [01:32<14:13, 4.56it/s]
359
  8%|▊ | 359/4250 [01:32<13:48, 4.69it/s]
360
  8%|▊ | 360/4250 [01:32<17:16, 3.75it/s]
361
  8%|▊ | 361/4250 [01:33<16:19, 3.97it/s]
362
  9%|▊ | 362/4250 [01:33<14:31, 4.46it/s]
363
  9%|▊ | 363/4250 [01:33<13:47, 4.69it/s]
364
  9%|▊ | 364/4250 [01:33<16:00, 4.05it/s]
365
  9%|▊ | 365/4250 [01:33<17:03, 3.79it/s]
366
  9%|▊ | 366/4250 [01:34<15:39, 4.13it/s]
367
  9%|▊ | 367/4250 [01:34<14:24, 4.49it/s]
368
  9%|▊ | 368/4250 [01:34<13:35, 4.76it/s]
369
  9%|▊ | 369/4250 [01:34<14:18, 4.52it/s]
370
  9%|▊ | 370/4250 [01:35<16:38, 3.89it/s]
371
  9%|▊ | 371/4250 [01:35<15:10, 4.26it/s]
372
  9%|▉ | 372/4250 [01:35<14:18, 4.52it/s]
373
  9%|▉ | 373/4250 [01:35<15:04, 4.29it/s]
374
  9%|▉ | 374/4250 [01:35<14:30, 4.45it/s]
375
  9%|▉ | 375/4250 [01:36<17:11, 3.76it/s]
376
  9%|▉ | 376/4250 [01:36<16:01, 4.03it/s]
377
  9%|▉ | 377/4250 [01:36<16:17, 3.96it/s]
378
  9%|▉ | 378/4250 [01:36<14:37, 4.41it/s]
379
  9%|▉ | 379/4250 [01:37<14:14, 4.53it/s]
380
  9%|▉ | 380/4250 [01:37<15:00, 4.30it/s]
381
  9%|▉ | 381/4250 [01:37<15:22, 4.19it/s]
382
  9%|▉ | 382/4250 [01:37<16:12, 3.98it/s]
383
  9%|▉ | 383/4250 [01:38<16:00, 4.03it/s]
384
  9%|▉ | 384/4250 [01:38<15:05, 4.27it/s]
385
  9%|▉ | 385/4250 [01:38<15:35, 4.13it/s]
386
  9%|▉ | 386/4250 [01:38<15:09, 4.25it/s]
387
  9%|▉ | 387/4250 [01:39<14:50, 4.34it/s]
388
  9%|▉ | 388/4250 [01:39<15:02, 4.28it/s]
389
  9%|▉ | 389/4250 [01:39<14:20, 4.49it/s]
390
  9%|▉ | 390/4250 [01:39<13:57, 4.61it/s]
391
  9%|▉ | 391/4250 [01:39<14:27, 4.45it/s]
392
  9%|▉ | 392/4250 [01:40<13:49, 4.65it/s]
393
  9%|▉ | 393/4250 [01:40<13:18, 4.83it/s]
394
  9%|▉ | 394/4250 [01:40<12:53, 4.98it/s]
395
  9%|▉ | 395/4250 [01:40<13:55, 4.61it/s]
396
  9%|▉ | 396/4250 [01:41<15:28, 4.15it/s]
397
  9%|▉ | 397/4250 [01:41<16:24, 3.91it/s]
398
  9%|▉ | 398/4250 [01:41<15:34, 4.12it/s]
399
  9%|▉ | 399/4250 [01:41<16:41, 3.84it/s]
400
  9%|▉ | 400/4250 [01:42<15:57, 4.02it/s]
401
  9%|▉ | 401/4250 [01:42<15:42, 4.08it/s]
402
  9%|▉ | 402/4250 [01:42<14:43, 4.36it/s]
403
  9%|▉ | 403/4250 [01:42<15:24, 4.16it/s]
404
  10%|▉ | 404/4250 [01:43<15:47, 4.06it/s]
405
  10%|▉ | 405/4250 [01:43<15:24, 4.16it/s]
406
  10%|▉ | 406/4250 [01:43<15:58, 4.01it/s]
407
  10%|▉ | 407/4250 [01:43<15:39, 4.09it/s]
408
  10%|▉ | 408/4250 [01:44<14:33, 4.40it/s]
409
  10%|▉ | 409/4250 [01:44<17:09, 3.73it/s]
410
  10%|▉ | 410/4250 [01:44<16:43, 3.83it/s]
411
  10%|▉ | 411/4250 [01:44<17:14, 3.71it/s]
412
  10%|▉ | 412/4250 [01:45<17:18, 3.70it/s]
413
  10%|▉ | 413/4250 [01:45<17:05, 3.74it/s]
414
  10%|▉ | 414/4250 [01:45<17:29, 3.65it/s]
415
  10%|▉ | 415/4250 [01:46<18:02, 3.54it/s]
416
  10%|▉ | 416/4250 [01:46<19:17, 3.31it/s]
417
  10%|▉ | 417/4250 [01:46<20:16, 3.15it/s]
418
  10%|▉ | 418/4250 [01:46<17:55, 3.56it/s]
419
  10%|▉ | 419/4250 [01:47<19:14, 3.32it/s]
420
  10%|▉ | 420/4250 [01:47<18:01, 3.54it/s]
421
  10%|▉ | 421/4250 [01:47<16:54, 3.77it/s]
422
  10%|▉ | 422/4250 [01:47<16:11, 3.94it/s]
423
  10%|▉ | 423/4250 [01:48<16:08, 3.95it/s]
424
  10%|▉ | 424/4250 [01:48<14:54, 4.28it/s]
425
  10%|█ | 425/4250 [01:48<14:18, 4.46it/s][INFO|trainer.py:805] 2024-08-30 21:56:36,656 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
 
 
 
 
 
 
426
  0%| | 0/850 [00:00<?, ?it/s]
 
427
  1%| | 10/850 [00:00<00:08, 98.30it/s]
 
428
  2%|▏ | 20/850 [00:00<00:09, 85.95it/s]
 
429
  3%|▎ | 29/850 [00:00<00:09, 85.06it/s]
 
430
  4%|▍ | 38/850 [00:00<00:09, 85.47it/s]
 
431
  6%|▌ | 47/850 [00:00<00:09, 84.58it/s]
 
432
  7%|▋ | 56/850 [00:00<00:10, 78.37it/s]
 
433
  8%|▊ | 64/850 [00:00<00:10, 77.70it/s]
 
434
  8%|▊ | 72/850 [00:00<00:10, 74.39it/s]
 
435
  9%|▉ | 80/850 [00:01<00:10, 74.24it/s]
 
436
  10%|█ | 89/850 [00:01<00:09, 76.30it/s]
 
437
  12%|█▏ | 98/850 [00:01<00:09, 78.01it/s]
 
438
  13%|█▎ | 108/850 [00:01<00:09, 81.89it/s]
 
439
  14%|█▍ | 117/850 [00:01<00:08, 81.96it/s]
 
440
  15%|█▍ | 127/850 [00:01<00:08, 84.21it/s]
 
441
  16%|█▌ | 136/850 [00:01<00:08, 83.16it/s]
 
442
  17%|█▋ | 145/850 [00:01<00:08, 79.13it/s]
 
443
  18%|█▊ | 153/850 [00:01<00:08, 78.99it/s]
 
444
  19%|█▉ | 162/850 [00:02<00:08, 81.23it/s]
 
445
  20%|██ | 171/850 [00:02<00:08, 82.29it/s]
 
446
  21%|██ | 180/850 [00:02<00:07, 83.94it/s]
 
447
  22%|██▏ | 189/850 [00:02<00:08, 81.23it/s]
 
448
  23%|██▎ | 198/850 [00:02<00:07, 82.48it/s]
 
449
  24%|██▍ | 207/850 [00:02<00:07, 81.00it/s]
 
450
  25%|██▌ | 216/850 [00:02<00:08, 78.97it/s]
 
451
  26%|██▋ | 225/850 [00:02<00:07, 81.69it/s]
 
452
  28%|██▊ | 234/850 [00:02<00:07, 83.51it/s]
 
453
  29%|██▊ | 243/850 [00:02<00:07, 81.58it/s]
 
454
  30%|██▉ | 252/850 [00:03<00:07, 82.44it/s]
 
455
  31%|███ | 262/850 [00:03<00:07, 83.56it/s]
 
456
  32%|███▏ | 271/850 [00:03<00:06, 84.66it/s]
 
457
  33%|███▎ | 280/850 [00:03<00:06, 85.74it/s]
 
458
  34%|███▍ | 289/850 [00:03<00:06, 86.37it/s]
 
459
  35%|███▌ | 298/850 [00:03<00:06, 86.47it/s]
 
460
  36%|███▌ | 307/850 [00:03<00:06, 84.84it/s]
 
461
  37%|███▋ | 316/850 [00:03<00:06, 83.69it/s]
 
462
  38%|███▊ | 325/850 [00:03<00:06, 84.26it/s]
 
463
  39%|███▉ | 334/850 [00:04<00:06, 84.48it/s]
 
464
  40%|████ | 343/850 [00:04<00:06, 84.34it/s]
 
465
  42%|████▏ | 353/850 [00:04<00:05, 86.48it/s]
 
466
  43%|████▎ | 362/850 [00:04<00:05, 83.81it/s]
 
467
  44%|████▎ | 371/850 [00:04<00:05, 84.14it/s]
 
468
  45%|████▍ | 380/850 [00:04<00:05, 80.65it/s]
 
469
  46%|████▌ | 389/850 [00:04<00:05, 82.30it/s]
 
470
  47%|████▋ | 398/850 [00:04<00:05, 79.21it/s]
 
471
  48%|████▊ | 408/850 [00:04<00:05, 83.80it/s]
 
472
  49%|████▉ | 418/850 [00:05<00:04, 86.99it/s]
 
473
  50%|█████ | 428/850 [00:05<00:04, 88.54it/s]
 
474
  51%|█████▏ | 437/850 [00:05<00:04, 86.84it/s]
 
475
  52%|█████▏ | 446/850 [00:05<00:04, 84.38it/s]
 
476
  54%|█████▎ | 455/850 [00:05<00:04, 84.52it/s]
 
477
  55%|█████▍ | 465/850 [00:05<00:04, 87.83it/s]
 
478
  56%|█████▌ | 475/850 [00:05<00:04, 90.15it/s]
 
479
  57%|█████▋ | 485/850 [00:05<00:04, 88.56it/s]
 
480
  58%|█████▊ | 494/850 [00:05<00:04, 88.49it/s]
 
481
  59%|█████▉ | 504/850 [00:06<00:03, 90.91it/s]
 
482
  60%|██████ | 514/850 [00:06<00:03, 90.17it/s]
 
483
  62%|██████▏ | 524/850 [00:06<00:03, 90.41it/s]
 
484
  63%|██████▎ | 534/850 [00:06<00:03, 86.47it/s]
 
485
  64%|██████▍ | 543/850 [00:06<00:03, 84.59it/s]
 
486
  65%|██████▍ | 552/850 [00:06<00:03, 83.56it/s]
 
487
  66%|██████▌ | 561/850 [00:06<00:03, 82.10it/s]
 
488
  67%|██████▋ | 570/850 [00:06<00:03, 80.03it/s]
 
489
  68%|██████▊ | 579/850 [00:06<00:03, 77.55it/s]
 
490
  69%|██████▉ | 588/850 [00:07<00:03, 78.59it/s]
 
491
  70%|███████ | 597/850 [00:07<00:03, 80.01it/s]
 
492
  71%|███████▏ | 606/850 [00:07<00:02, 81.43it/s]
 
493
  72%|███████▏ | 615/850 [00:07<00:02, 83.13it/s]
 
494
  74%|███████▎ | 625/850 [00:07<00:02, 85.27it/s]
 
495
  75%|███████▍ | 634/850 [00:07<00:02, 84.44it/s]
 
496
  76%|███████▌ | 643/850 [00:07<00:02, 85.83it/s]
 
497
  77%|███████▋ | 653/850 [00:07<00:02, 89.01it/s]
 
498
  78%|███████▊ | 662/850 [00:07<00:02, 88.83it/s]
 
499
  79%|███████▉ | 671/850 [00:08<00:02, 86.72it/s]
 
500
  80%|████████ | 680/850 [00:08<00:01, 85.40it/s]
 
501
  81%|████████ | 689/850 [00:08<00:01, 84.31it/s]
 
502
  82%|████████▏ | 698/850 [00:08<00:01, 84.10it/s]
 
503
  83%|████████▎ | 707/850 [00:08<00:01, 82.22it/s]
 
504
  84%|████████▍ | 716/850 [00:08<00:01, 82.16it/s]
 
505
  85%|████████▌ | 725/850 [00:08<00:01, 79.13it/s]
 
506
  86%|████████▋ | 735/850 [00:08<00:01, 82.74it/s]
 
507
  88%|████████▊ | 744/850 [00:08<00:01, 80.60it/s]
 
508
  89%|██���█████▊ | 753/850 [00:09<00:01, 80.30it/s]
 
509
  90%|████████▉ | 762/850 [00:09<00:01, 82.27it/s]
 
510
  91%|█████████ | 771/850 [00:09<00:00, 81.44it/s]
 
511
  92%|█████████▏| 780/850 [00:09<00:00, 80.66it/s]
 
512
  93%|█████████▎| 789/850 [00:09<00:00, 78.52it/s]
 
513
  94%|█████████▍| 798/850 [00:09<00:00, 81.16it/s]
 
514
  95%|█████████▍| 807/850 [00:09<00:00, 81.60it/s]
 
515
  96%|█████████▌| 816/850 [00:09<00:00, 80.79it/s]
 
516
  97%|█████████▋| 826/850 [00:09<00:00, 81.84it/s]
 
517
  98%|█████████▊| 835/850 [00:10<00:00, 82.47it/s]
 
518
  99%|█████████▉| 844/850 [00:10<00:00, 83.17it/s]
519
 
 
520
 
521
  10%|█ | 425/4250 [02:02<14:18, 4.46it/s]
 
 
522
  [INFO|trainer.py:3478] 2024-08-30 21:56:50,913 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-425
 
 
 
 
 
 
 
523
  10%|█ | 426/4250 [02:06<5:49:07, 5.48s/it]
524
  10%|█ | 427/4250 [02:06<4:08:55, 3.91s/it]
525
  10%|█ | 428/4250 [02:06<2:58:35, 2.80s/it]
526
  10%|█ | 429/4250 [02:07<2:09:47, 2.04s/it]
527
  10%|█ | 430/4250 [02:07<1:34:28, 1.48s/it]
528
  10%|█ | 431/4250 [02:07<1:10:13, 1.10s/it]
529
  10%|█ | 432/4250 [02:07<52:57, 1.20it/s]
530
  10%|█ | 433/4250 [02:08<44:00, 1.45it/s]
531
  10%|█ | 434/4250 [02:08<35:08, 1.81it/s]
532
  10%|█ | 435/4250 [02:08<29:02, 2.19it/s]
533
  10%|█ | 436/4250 [02:08<24:36, 2.58it/s]
534
  10%|█ | 437/4250 [02:08<21:08, 3.01it/s]
535
  10%|█ | 438/4250 [02:09<18:48, 3.38it/s]
536
  10%|█ | 439/4250 [02:09<17:50, 3.56it/s]
537
  10%|█ | 440/4250 [02:09<16:07, 3.94it/s]
538
  10%|█ | 441/4250 [02:09<15:15, 4.16it/s]
539
  10%|█ | 442/4250 [02:10<15:13, 4.17it/s]
540
  10%|█ | 443/4250 [02:10<13:33, 4.68it/s]
541
  10%|█ | 444/4250 [02:10<13:28, 4.71it/s]
542
  10%|█ | 445/4250 [02:10<14:04, 4.50it/s]
543
  10%|█ | 446/4250 [02:10<15:40, 4.04it/s]
544
  11%|█ | 447/4250 [02:11<15:52, 3.99it/s]
545
  11%|█ | 448/4250 [02:11<14:48, 4.28it/s]
546
  11%|█ | 449/4250 [02:11<16:28, 3.85it/s]
547
  11%|█ | 450/4250 [02:11<15:36, 4.06it/s]
548
  11%|█ | 451/4250 [02:12<16:05, 3.93it/s]
549
  11%|█ | 452/4250 [02:12<14:50, 4.27it/s]
550
  11%|█ | 453/4250 [02:12<16:32, 3.83it/s]
551
  11%|█ | 454/4250 [02:12<15:02, 4.20it/s]
552
  11%|█ | 455/4250 [02:13<14:32, 4.35it/s]
553
  11%|█ | 456/4250 [02:13<14:43, 4.29it/s]
554
  11%|█ | 457/4250 [02:13<14:25, 4.38it/s]
555
  11%|█ | 458/4250 [02:13<14:56, 4.23it/s]
556
  11%|█ | 459/4250 [02:14<15:10, 4.16it/s]
557
  11%|█ | 460/4250 [02:14<15:13, 4.15it/s]
558
  11%|█ | 461/4250 [02:14<14:27, 4.37it/s]
559
  11%|█ | 462/4250 [02:14<14:50, 4.25it/s]
560
  11%|█ | 463/4250 [02:14<14:23, 4.38it/s]
561
  11%|█ | 464/4250 [02:15<15:27, 4.08it/s]
562
  11%|█ | 465/4250 [02:15<15:11, 4.15it/s]
563
  11%|█ | 466/4250 [02:15<13:40, 4.61it/s]
564
  11%|█ | 467/4250 [02:15<14:22, 4.39it/s]
565
  11%|█ | 468/4250 [02:16<15:43, 4.01it/s]
566
  11%|█ | 469/4250 [02:16<15:14, 4.14it/s]
567
  11%|█ | 470/4250 [02:16<15:34, 4.05it/s]
568
  11%|█ | 471/4250 [02:16<15:31, 4.06it/s]
569
  11%|█ | 472/4250 [02:17<16:26, 3.83it/s]
570
  11%|█ | 473/4250 [02:17<19:00, 3.31it/s]
571
  11%|█ | 474/4250 [02:17<18:14, 3.45it/s]
572
  11%|█ | 475/4250 [02:18<20:13, 3.11it/s]
573
  11%|█ | 476/4250 [02:18<18:00, 3.49it/s]
574
  11%|█ | 477/4250 [02:18<16:28, 3.82it/s]
575
  11%|█ | 478/4250 [02:18<17:09, 3.66it/s]
576
  11%|█▏ | 479/4250 [02:19<15:33, 4.04it/s]
577
  11%|█▏ | 480/4250 [02:19<14:39, 4.29it/s]
578
  11%|█▏ | 481/4250 [02:19<16:24, 3.83it/s]
579
  11%|█▏ | 482/4250 [02:19<16:23, 3.83it/s]
580
  11%|█▏ | 483/4250 [02:20<16:34, 3.79it/s]
581
  11%|█▏ | 484/4250 [02:20<16:50, 3.73it/s]
582
  11%|█▏ | 485/4250 [02:20<16:14, 3.86it/s]
583
  11%|█▏ | 486/4250 [02:20<14:51, 4.22it/s]
584
  11%|█▏ | 487/4250 [02:21<14:57, 4.19it/s]
585
  11%|█▏ | 488/4250 [02:21<15:03, 4.16it/s]
 
1
+ 2024-08-30 21:54:12.390238: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2
+ 2024-08-30 21:54:12.408272: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
3
+ 2024-08-30 21:54:12.429605: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
4
+ 2024-08-30 21:54:12.436048: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
5
+ 2024-08-30 21:54:12.451309: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
6
+ To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
7
+ 2024-08-30 21:54:13.743493: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
8
+ /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1494: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
9
+ warnings.warn(
10
+ 08/30/2024 21:54:15 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
11
+ 08/30/2024 21:54:15 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
12
+ _n_gpu=1,
13
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
14
+ adafactor=False,
15
+ adam_beta1=0.9,
16
+ adam_beta2=0.999,
17
+ adam_epsilon=1e-08,
18
+ auto_find_batch_size=False,
19
+ batch_eval_metrics=False,
20
+ bf16=False,
21
+ bf16_full_eval=False,
22
+ data_seed=None,
23
+ dataloader_drop_last=False,
24
+ dataloader_num_workers=0,
25
+ dataloader_persistent_workers=False,
26
+ dataloader_pin_memory=True,
27
+ dataloader_prefetch_factor=None,
28
+ ddp_backend=None,
29
+ ddp_broadcast_buffers=None,
30
+ ddp_bucket_cap_mb=None,
31
+ ddp_find_unused_parameters=None,
32
+ ddp_timeout=1800,
33
+ debug=[],
34
+ deepspeed=None,
35
+ disable_tqdm=False,
36
+ dispatch_batches=None,
37
+ do_eval=True,
38
+ do_predict=True,
39
+ do_train=True,
40
+ eval_accumulation_steps=None,
41
+ eval_delay=0,
42
+ eval_do_concat_batches=True,
43
+ eval_on_start=False,
44
+ eval_steps=None,
45
+ eval_strategy=epoch,
46
+ evaluation_strategy=epoch,
47
+ fp16=False,
48
+ fp16_backend=auto,
49
+ fp16_full_eval=False,
50
+ fp16_opt_level=O1,
51
+ fsdp=[],
52
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
53
+ fsdp_min_num_params=0,
54
+ fsdp_transformer_layer_cls_to_wrap=None,
55
+ full_determinism=False,
56
+ gradient_accumulation_steps=2,
57
+ gradient_checkpointing=False,
58
+ gradient_checkpointing_kwargs=None,
59
+ greater_is_better=True,
60
+ group_by_length=False,
61
+ half_precision_backend=auto,
62
+ hub_always_push=False,
63
+ hub_model_id=None,
64
+ hub_private_repo=False,
65
+ hub_strategy=every_save,
66
+ hub_token=<HUB_TOKEN>,
67
+ ignore_data_skip=False,
68
+ include_inputs_for_metrics=False,
69
+ include_num_input_tokens_seen=False,
70
+ include_tokens_per_second=False,
71
+ jit_mode_eval=False,
72
+ label_names=None,
73
+ label_smoothing_factor=0.0,
74
+ learning_rate=5e-05,
75
+ length_column_name=length,
76
+ load_best_model_at_end=True,
77
+ local_rank=0,
78
+ log_level=passive,
79
+ log_level_replica=warning,
80
+ log_on_each_node=True,
81
+ logging_dir=/content/dissertation/scripts/ner/output/tb,
82
+ logging_first_step=False,
83
+ logging_nan_inf_filter=True,
84
+ logging_steps=500,
85
+ logging_strategy=steps,
86
+ lr_scheduler_kwargs={},
87
+ lr_scheduler_type=linear,
88
+ max_grad_norm=1.0,
89
+ max_steps=-1,
90
+ metric_for_best_model=f1,
91
+ mp_parameters=,
92
+ neftune_noise_alpha=None,
93
+ no_cuda=False,
94
+ num_train_epochs=10.0,
95
+ optim=adamw_torch,
96
+ optim_args=None,
97
+ optim_target_modules=None,
98
+ output_dir=/content/dissertation/scripts/ner/output,
99
+ overwrite_output_dir=True,
100
+ past_index=-1,
101
+ per_device_eval_batch_size=8,
102
+ per_device_train_batch_size=32,
103
+ prediction_loss_only=False,
104
+ push_to_hub=True,
105
+ push_to_hub_model_id=None,
106
+ push_to_hub_organization=None,
107
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
108
+ ray_scope=last,
109
+ remove_unused_columns=True,
110
+ report_to=['tensorboard'],
111
+ restore_callback_states_from_checkpoint=False,
112
+ resume_from_checkpoint=None,
113
+ run_name=/content/dissertation/scripts/ner/output,
114
+ save_on_each_node=False,
115
+ save_only_model=False,
116
+ save_safetensors=True,
117
+ save_steps=500,
118
+ save_strategy=epoch,
119
+ save_total_limit=None,
120
+ seed=42,
121
+ skip_memory_metrics=True,
122
+ split_batches=None,
123
+ tf32=None,
124
+ torch_compile=False,
125
+ torch_compile_backend=None,
126
+ torch_compile_mode=None,
127
+ torchdynamo=None,
128
+ tpu_metrics_debug=False,
129
+ tpu_num_cores=None,
130
+ use_cpu=False,
131
+ use_ipex=False,
132
+ use_legacy_prediction_loop=False,
133
+ use_mps_device=False,
134
+ warmup_ratio=0.0,
135
+ warmup_steps=0,
136
+ weight_decay=0.0,
137
+ )
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+ [INFO|configuration_utils.py:733] 2024-08-30 21:54:27,962 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/config.json
146
+ [INFO|configuration_utils.py:800] 2024-08-30 21:54:27,966 >> Model config BertConfig {
147
+ "_name_or_path": "IVN-RIN/bioBIT",
148
+ "architectures": [
149
+ "BertForMaskedLM"
150
+ ],
151
+ "attention_probs_dropout_prob": 0.1,
152
+ "classifier_dropout": null,
153
+ "finetuning_task": "ner",
154
+ "hidden_act": "gelu",
155
+ "hidden_dropout_prob": 0.1,
156
+ "hidden_size": 768,
157
+ "id2label": {
158
+ "0": "O",
159
+ "1": "B-FARMACO",
160
+ "2": "I-FARMACO"
161
+ },
162
+ "initializer_range": 0.02,
163
+ "intermediate_size": 3072,
164
+ "label2id": {
165
+ "B-FARMACO": 1,
166
+ "I-FARMACO": 2,
167
+ "O": 0
168
+ },
169
+ "layer_norm_eps": 1e-12,
170
+ "max_position_embeddings": 512,
171
+ "model_type": "bert",
172
+ "num_attention_heads": 12,
173
+ "num_hidden_layers": 12,
174
+ "pad_token_id": 0,
175
+ "position_embedding_type": "absolute",
176
+ "torch_dtype": "float32",
177
+ "transformers_version": "4.42.4",
178
+ "type_vocab_size": 2,
179
+ "use_cache": true,
180
+ "vocab_size": 31102
181
+ }
182
+
183
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 21:54:29,333 >> loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/vocab.txt
184
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 21:54:29,334 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/tokenizer.json
185
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 21:54:29,334 >> loading file added_tokens.json from cache at None
186
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 21:54:29,334 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/special_tokens_map.json
187
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 21:54:29,334 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/tokenizer_config.json
188
+ [INFO|modeling_utils.py:3556] 2024-08-30 21:54:40,888 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/model.safetensors
189
+ [INFO|modeling_utils.py:4354] 2024-08-30 21:54:40,995 >> Some weights of the model checkpoint at IVN-RIN/bioBIT were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
190
+ - This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
191
+ - This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
192
+ [WARNING|modeling_utils.py:4366] 2024-08-30 21:54:40,995 >> Some weights of BertForTokenClassification were not initialized from the model checkpoint at IVN-RIN/bioBIT and are newly initialized: ['classifier.bias', 'classifier.weight']
193
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
194
+
195
+
196
+
197
+ /content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
198
+ metric = load_metric("seqeval", trust_remote_code=True)
199
+ [INFO|trainer.py:805] 2024-08-30 21:54:47,484 >> The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
200
+ [INFO|trainer.py:2128] 2024-08-30 21:54:48,041 >> ***** Running training *****
201
+ [INFO|trainer.py:2129] 2024-08-30 21:54:48,041 >> Num examples = 27,198
202
+ [INFO|trainer.py:2130] 2024-08-30 21:54:48,041 >> Num Epochs = 10
203
+ [INFO|trainer.py:2131] 2024-08-30 21:54:48,041 >> Instantaneous batch size per device = 32
204
+ [INFO|trainer.py:2134] 2024-08-30 21:54:48,041 >> Total train batch size (w. parallel, distributed & accumulation) = 64
205
+ [INFO|trainer.py:2135] 2024-08-30 21:54:48,041 >> Gradient Accumulation steps = 2
206
+ [INFO|trainer.py:2136] 2024-08-30 21:54:48,041 >> Total optimization steps = 4,250
207
+ [INFO|trainer.py:2137] 2024-08-30 21:54:48,042 >> Number of trainable parameters = 109,339,395
208
+
209
  0%| | 0/4250 [00:00<?, ?it/s]
210
  0%| | 1/4250 [00:01<1:16:38, 1.08s/it]
211
  0%| | 2/4250 [00:01<42:55, 1.65it/s]
212
  0%| | 3/4250 [00:01<31:14, 2.27it/s]
213
  0%| | 4/4250 [00:01<27:28, 2.58it/s]
214
  0%| | 5/4250 [00:02<22:59, 3.08it/s]
215
  0%| | 6/4250 [00:02<20:46, 3.40it/s]
216
  0%| | 7/4250 [00:02<19:37, 3.60it/s]
217
  0%| | 8/4250 [00:02<19:26, 3.64it/s]
218
  0%| | 9/4250 [00:03<19:00, 3.72it/s]
219
  0%| | 10/4250 [00:03<18:07, 3.90it/s]
220
  0%| | 11/4250 [00:03<19:02, 3.71it/s]
221
  0%| | 12/4250 [00:04<24:46, 2.85it/s]
222
  0%| | 13/4250 [00:04<21:06, 3.34it/s]
223
  0%| | 14/4250 [00:04<20:00, 3.53it/s]
224
  0%| | 15/4250 [00:04<18:30, 3.81it/s]
225
  0%| | 16/4250 [00:05<18:06, 3.90it/s]
226
  0%| | 17/4250 [00:05<17:34, 4.01it/s]
227
  0%| | 18/4250 [00:05<18:38, 3.78it/s]
228
  0%| | 19/4250 [00:05<17:09, 4.11it/s]
229
  0%| | 20/4250 [00:06<19:46, 3.56it/s]
230
  0%| | 21/4250 [00:06<18:46, 3.75it/s]
231
  1%| | 22/4250 [00:06<17:49, 3.95it/s]
232
  1%| | 23/4250 [00:06<16:35, 4.25it/s]
233
  1%| | 24/4250 [00:07<16:31, 4.26it/s]
234
  1%| | 25/4250 [00:07<16:19, 4.31it/s]
235
  1%| | 26/4250 [00:07<17:22, 4.05it/s]
236
  1%| | 27/4250 [00:07<17:01, 4.14it/s]
237
  1%| | 28/4250 [00:08<17:18, 4.07it/s]
238
  1%| | 29/4250 [00:08<17:59, 3.91it/s]
239
  1%| | 30/4250 [00:08<17:04, 4.12it/s]
240
  1%| | 31/4250 [00:08<16:47, 4.19it/s]
241
  1%| | 32/4250 [00:08<16:27, 4.27it/s]
242
  1%| | 33/4250 [00:09<17:11, 4.09it/s]
243
  1%| | 34/4250 [00:09<15:30, 4.53it/s]
244
  1%| | 35/4250 [00:09<15:15, 4.60it/s]
245
  1%| | 36/4250 [00:09<15:39, 4.49it/s]
246
  1%| | 37/4250 [00:10<15:49, 4.44it/s]
247
  1%| | 38/4250 [00:10<16:11, 4.34it/s]
248
  1%| | 39/4250 [00:10<15:41, 4.47it/s]
249
  1%| | 40/4250 [00:10<15:55, 4.41it/s]
250
  1%| | 41/4250 [00:10<15:07, 4.64it/s]
251
  1%| | 42/4250 [00:11<14:56, 4.69it/s]
252
  1%| | 43/4250 [00:11<15:59, 4.39it/s]
253
  1%| | 44/4250 [00:11<15:40, 4.47it/s]
254
  1%| | 45/4250 [00:11<15:41, 4.47it/s]
255
  1%| | 46/4250 [00:12<17:35, 3.98it/s]
256
  1%| | 47/4250 [00:12<17:45, 3.94it/s]
257
  1%| | 48/4250 [00:12<16:40, 4.20it/s]
258
  1%| | 49/4250 [00:12<15:58, 4.38it/s]
259
  1%| | 50/4250 [00:13<17:10, 4.08it/s]
260
  1%| | 51/4250 [00:13<18:57, 3.69it/s]
261
  1%| | 52/4250 [00:13<19:58, 3.50it/s]
262
  1%| | 53/4250 [00:14<18:49, 3.72it/s]
263
  1%|▏ | 54/4250 [00:14<17:34, 3.98it/s]
264
  1%|▏ | 55/4250 [00:14<16:51, 4.15it/s]
265
  1%|▏ | 56/4250 [00:14<17:23, 4.02it/s]
266
  1%|▏ | 57/4250 [00:15<21:46, 3.21it/s]
267
  1%|▏ | 58/4250 [00:15<20:01, 3.49it/s]
268
  1%|▏ | 59/4250 [00:15<18:11, 3.84it/s]
269
  1%|▏ | 60/4250 [00:15<20:27, 3.41it/s]
270
  1%|▏ | 61/4250 [00:16<18:31, 3.77it/s]
271
  1%|▏ | 62/4250 [00:16<19:20, 3.61it/s]
272
  1%|▏ | 63/4250 [00:16<18:01, 3.87it/s]
273
  2%|▏ | 64/4250 [00:16<18:11, 3.83it/s]
274
  2%|▏ | 65/4250 [00:17<16:45, 4.16it/s]
275
  2%|▏ | 66/4250 [00:17<17:26, 4.00it/s]
276
  2%|▏ | 67/4250 [00:17<17:29, 3.98it/s]
277
  2%|▏ | 68/4250 [00:17<16:14, 4.29it/s]
278
  2%|▏ | 69/4250 [00:18<16:41, 4.18it/s]
279
  2%|▏ | 70/4250 [00:18<15:52, 4.39it/s]
280
  2%|▏ | 71/4250 [00:18<15:10, 4.59it/s]
281
  2%|▏ | 72/4250 [00:18<15:05, 4.61it/s]
282
  2%|▏ | 73/4250 [00:19<16:51, 4.13it/s]
283
  2%|▏ | 74/4250 [00:19<17:50, 3.90it/s]
284
  2%|▏ | 75/4250 [00:19<16:49, 4.14it/s]
285
  2%|▏ | 76/4250 [00:19<16:36, 4.19it/s]
286
  2%|▏ | 77/4250 [00:19<16:32, 4.21it/s]
287
  2%|▏ | 78/4250 [00:20<16:37, 4.18it/s]
288
  2%|▏ | 79/4250 [00:20<20:19, 3.42it/s]
289
  2%|▏ | 80/4250 [00:21<21:51, 3.18it/s]
290
  2%|▏ | 81/4250 [00:21<20:01, 3.47it/s]
291
  2%|▏ | 82/4250 [00:21<18:23, 3.78it/s]
292
  2%|▏ | 83/4250 [00:21<17:42, 3.92it/s]
293
  2%|▏ | 84/4250 [00:21<17:39, 3.93it/s]
294
  2%|▏ | 85/4250 [00:22<20:45, 3.34it/s]
295
  2%|▏ | 86/4250 [00:22<20:38, 3.36it/s]
296
  2%|▏ | 87/4250 [00:22<19:30, 3.56it/s]
297
  2%|▏ | 88/4250 [00:23<18:15, 3.80it/s]
298
  2%|▏ | 89/4250 [00:23<21:52, 3.17it/s]
299
  2%|▏ | 90/4250 [00:23<19:46, 3.51it/s]
300
  2%|▏ | 91/4250 [00:24<20:54, 3.32it/s]
301
  2%|▏ | 92/4250 [00:24<19:20, 3.58it/s]
302
  2%|▏ | 93/4250 [00:24<19:18, 3.59it/s]
303
  2%|▏ | 94/4250 [00:24<18:39, 3.71it/s]
304
  2%|▏ | 95/4250 [00:25<18:39, 3.71it/s]
305
  2%|▏ | 96/4250 [00:25<18:11, 3.81it/s]
306
  2%|▏ | 97/4250 [00:25<17:50, 3.88it/s]
307
  2%|▏ | 98/4250 [00:25<17:37, 3.93it/s]
308
  2%|▏ | 99/4250 [00:26<15:45, 4.39it/s]
309
  2%|▏ | 100/4250 [00:26<18:10, 3.80it/s]
310
  2%|▏ | 101/4250 [00:26<17:23, 3.98it/s]
311
  2%|▏ | 102/4250 [00:26<17:23, 3.97it/s]
312
  2%|▏ | 103/4250 [00:27<18:38, 3.71it/s]
313
  2%|▏ | 104/4250 [00:27<17:19, 3.99it/s]
314
  2%|▏ | 105/4250 [00:27<17:35, 3.93it/s]
315
  2%|▏ | 106/4250 [00:27<16:50, 4.10it/s]
316
  3%|▎ | 107/4250 [00:28<19:37, 3.52it/s]
317
  3%|▎ | 108/4250 [00:28<17:52, 3.86it/s]
318
  3%|▎ | 109/4250 [00:28<17:14, 4.00it/s]
319
  3%|▎ | 110/4250 [00:28<17:47, 3.88it/s]
320
  3%|▎ | 111/4250 [00:29<16:36, 4.15it/s]
321
  3%|▎ | 112/4250 [00:29<17:14, 4.00it/s]
322
  3%|▎ | 113/4250 [00:29<18:29, 3.73it/s]
323
  3%|▎ | 114/4250 [00:29<17:31, 3.93it/s]
324
  3%|▎ | 115/4250 [00:30<19:28, 3.54it/s]
325
  3%|▎ | 116/4250 [00:30<17:27, 3.95it/s]
326
  3%|▎ | 117/4250 [00:30<16:28, 4.18it/s]
327
  3%|▎ | 118/4250 [00:30<16:04, 4.28it/s]
328
  3%|▎ | 119/4250 [00:31<16:06, 4.28it/s]
329
  3%|▎ | 120/4250 [00:31<16:11, 4.25it/s]
330
  3%|▎ | 121/4250 [00:31<16:50, 4.09it/s]
331
  3%|▎ | 122/4250 [00:31<16:30, 4.17it/s]
332
  3%|▎ | 123/4250 [00:32<15:33, 4.42it/s]
333
  3%|▎ | 124/4250 [00:32<16:36, 4.14it/s]
334
  3%|▎ | 125/4250 [00:32<16:22, 4.20it/s]
335
  3%|▎ | 126/4250 [00:32<15:56, 4.31it/s]
336
  3%|▎ | 127/4250 [00:33<17:35, 3.91it/s]
337
  3%|▎ | 128/4250 [00:33<17:48, 3.86it/s]
338
  3%|▎ | 129/4250 [00:33<18:55, 3.63it/s]
339
  3%|▎ | 130/4250 [00:33<19:53, 3.45it/s]
340
  3%|▎ | 131/4250 [00:34<21:30, 3.19it/s]
341
  3%|▎ | 132/4250 [00:34<18:47, 3.65it/s]
342
  3%|▎ | 133/4250 [00:34<19:03, 3.60it/s]
343
  3%|▎ | 134/4250 [00:35<18:17, 3.75it/s]
344
  3%|▎ | 135/4250 [00:35<18:36, 3.69it/s]
345
  3%|▎ | 136/4250 [00:35<17:18, 3.96it/s]
346
  3%|▎ | 137/4250 [00:35<17:23, 3.94it/s]
347
  3%|▎ | 138/4250 [00:36<16:21, 4.19it/s]
348
  3%|▎ | 139/4250 [00:36<18:04, 3.79it/s]
349
  3%|▎ | 140/4250 [00:36<17:20, 3.95it/s]
350
  3%|▎ | 141/4250 [00:36<17:03, 4.01it/s]
351
  3%|▎ | 142/4250 [00:37<16:21, 4.18it/s]
352
  3%|▎ | 143/4250 [00:37<17:08, 3.99it/s]
353
  3%|▎ | 144/4250 [00:37<16:12, 4.22it/s]
354
  3%|▎ | 145/4250 [00:37<16:06, 4.25it/s]
355
  3%|▎ | 146/4250 [00:37<16:05, 4.25it/s]
356
  3%|▎ | 147/4250 [00:38<16:17, 4.20it/s]
357
  3%|▎ | 148/4250 [00:38<15:01, 4.55it/s]
358
  4%|▎ | 149/4250 [00:38<14:40, 4.66it/s]
359
  4%|▎ | 150/4250 [00:38<15:20, 4.45it/s]
360
  4%|▎ | 151/4250 [00:39<15:14, 4.48it/s]
361
  4%|▎ | 152/4250 [00:39<14:33, 4.69it/s]
362
  4%|▎ | 153/4250 [00:39<14:51, 4.59it/s]
363
  4%|▎ | 154/4250 [00:39<15:37, 4.37it/s]
364
  4%|▎ | 155/4250 [00:39<15:52, 4.30it/s]
365
  4%|▎ | 156/4250 [00:40<16:36, 4.11it/s]
366
  4%|▎ | 157/4250 [00:40<18:32, 3.68it/s]
367
  4%|▎ | 158/4250 [00:40<19:13, 3.55it/s]
368
  4%|▎ | 159/4250 [00:41<18:27, 3.69it/s]
369
  4%|▍ | 160/4250 [00:41<20:59, 3.25it/s]
370
  4%|▍ | 161/4250 [00:41<20:13, 3.37it/s]
371
  4%|▍ | 162/4250 [00:42<18:44, 3.63it/s]
372
  4%|▍ | 163/4250 [00:42<17:18, 3.94it/s]
373
  4%|▍ | 164/4250 [00:42<16:43, 4.07it/s]
374
  4%|▍ | 165/4250 [00:42<17:07, 3.97it/s]
375
  4%|▍ | 166/4250 [00:42<15:52, 4.29it/s]
376
  4%|▍ | 167/4250 [00:43<15:46, 4.31it/s]
377
  4%|▍ | 168/4250 [00:43<15:42, 4.33it/s]
378
  4%|▍ | 169/4250 [00:43<15:19, 4.44it/s]
379
  4%|▍ | 170/4250 [00:43<15:05, 4.51it/s]
380
  4%|▍ | 171/4250 [00:43<14:22, 4.73it/s]
381
  4%|▍ | 172/4250 [00:44<17:40, 3.84it/s]
382
  4%|▍ | 173/4250 [00:44<17:04, 3.98it/s]
383
  4%|▍ | 174/4250 [00:44<16:34, 4.10it/s]
384
  4%|▍ | 175/4250 [00:45<16:43, 4.06it/s]
385
  4%|▍ | 176/4250 [00:45<17:02, 3.98it/s]
386
  4%|▍ | 177/4250 [00:45<15:29, 4.38it/s]
387
  4%|▍ | 178/4250 [00:45<16:23, 4.14it/s]
388
  4%|▍ | 179/4250 [00:46<22:29, 3.02it/s]
389
  4%|▍ | 180/4250 [00:46<21:27, 3.16it/s]
390
  4%|▍ | 181/4250 [00:46<21:39, 3.13it/s]
391
  4%|▍ | 182/4250 [00:47<19:25, 3.49it/s]
392
  4%|▍ | 183/4250 [00:47<17:34, 3.86it/s]
393
  4%|▍ | 184/4250 [00:47<15:59, 4.24it/s]
394
  4%|▍ | 185/4250 [00:47<17:30, 3.87it/s]
395
  4%|▍ | 186/4250 [00:48<16:32, 4.10it/s]
396
  4%|▍ | 187/4250 [00:48<15:54, 4.26it/s]
397
  4%|▍ | 188/4250 [00:48<15:03, 4.50it/s]
398
  4%|▍ | 189/4250 [00:48<16:32, 4.09it/s]
399
  4%|▍ | 190/4250 [00:48<16:20, 4.14it/s]
400
  4%|▍ | 191/4250 [00:49<16:00, 4.23it/s]
401
  5%|▍ | 192/4250 [00:49<20:25, 3.31it/s]
402
  5%|▍ | 193/4250 [00:49<19:38, 3.44it/s]
403
  5%|▍ | 194/4250 [00:50<19:04, 3.54it/s]
404
  5%|▍ | 195/4250 [00:50<19:26, 3.48it/s]
405
  5%|▍ | 196/4250 [00:50<19:43, 3.43it/s]
406
  5%|▍ | 197/4250 [00:51<22:32, 3.00it/s]
407
  5%|▍ | 198/4250 [00:51<20:17, 3.33it/s]
408
  5%|▍ | 199/4250 [00:51<19:04, 3.54it/s]
409
  5%|▍ | 200/4250 [00:51<19:10, 3.52it/s]
410
  5%|▍ | 201/4250 [00:52<17:57, 3.76it/s]
411
  5%|▍ | 202/4250 [00:52<16:58, 3.98it/s]
412
  5%|▍ | 203/4250 [00:52<17:50, 3.78it/s]
413
  5%|▍ | 204/4250 [00:53<18:23, 3.67it/s]
414
  5%|▍ | 205/4250 [00:53<20:11, 3.34it/s]
415
  5%|▍ | 206/4250 [00:54<29:04, 2.32it/s]
416
  5%|▍ | 207/4250 [00:54<25:49, 2.61it/s]
417
  5%|▍ | 208/4250 [00:54<23:49, 2.83it/s]
418
  5%|▍ | 209/4250 [00:54<20:34, 3.27it/s]
419
  5%|▍ | 210/4250 [00:55<22:38, 2.97it/s]
420
  5%|▍ | 211/4250 [00:55<20:50, 3.23it/s]
421
  5%|▍ | 212/4250 [00:56<27:58, 2.41it/s]
422
  5%|▌ | 213/4250 [00:56<23:54, 2.81it/s]
423
  5%|▌ | 214/4250 [00:56<21:29, 3.13it/s]
424
  5%|▌ | 215/4250 [00:56<19:16, 3.49it/s]
425
  5%|▌ | 216/4250 [00:57<17:35, 3.82it/s]
426
  5%|▌ | 217/4250 [00:57<17:54, 3.75it/s]
427
  5%|▌ | 218/4250 [00:57<17:15, 3.89it/s]
428
  5%|▌ | 219/4250 [00:57<21:03, 3.19it/s]
429
  5%|▌ | 220/4250 [00:58<21:20, 3.15it/s]
430
  5%|▌ | 221/4250 [00:58<19:17, 3.48it/s]
431
  5%|▌ | 222/4250 [00:58<18:58, 3.54it/s]
432
  5%|▌ | 223/4250 [00:59<18:23, 3.65it/s]
433
  5%|▌ | 224/4250 [00:59<19:19, 3.47it/s]
434
  5%|▌ | 225/4250 [00:59<17:44, 3.78it/s]
435
  5%|▌ | 226/4250 [00:59<16:41, 4.02it/s]
436
  5%|▌ | 227/4250 [00:59<14:48, 4.53it/s]
437
  5%|▌ | 228/4250 [01:00<14:53, 4.50it/s]
438
  5%|▌ | 229/4250 [01:00<15:46, 4.25it/s]
439
  5%|▌ | 230/4250 [01:00<16:31, 4.05it/s]
440
  5%|▌ | 231/4250 [01:00<15:56, 4.20it/s]
441
  5%|▌ | 232/4250 [01:01<16:04, 4.17it/s]
442
  5%|▌ | 233/4250 [01:01<17:08, 3.90it/s]
443
  6%|▌ | 234/4250 [01:01<16:18, 4.11it/s]
444
  6%|▌ | 235/4250 [01:01<15:32, 4.30it/s]
445
  6%|▌ | 236/4250 [01:02<18:03, 3.70it/s]
446
  6%|▌ | 237/4250 [01:02<17:39, 3.79it/s]
447
  6%|▌ | 238/4250 [01:02<17:04, 3.92it/s]
448
  6%|▌ | 239/4250 [01:02<16:28, 4.06it/s]
449
  6%|▌ | 240/4250 [01:03<16:27, 4.06it/s]
450
  6%|▌ | 241/4250 [01:03<15:43, 4.25it/s]
451
  6%|▌ | 242/4250 [01:03<15:13, 4.39it/s]
452
  6%|▌ | 243/4250 [01:03<16:49, 3.97it/s]
453
  6%|▌ | 244/4250 [01:04<16:32, 4.04it/s]
454
  6%|▌ | 245/4250 [01:04<15:08, 4.41it/s]
455
  6%|▌ | 246/4250 [01:04<15:58, 4.18it/s]
456
  6%|▌ | 247/4250 [01:05<18:58, 3.52it/s]
457
  6%|▌ | 248/4250 [01:05<17:18, 3.85it/s]
458
  6%|▌ | 249/4250 [01:05<17:01, 3.92it/s]
459
  6%|▌ | 250/4250 [01:05<17:03, 3.91it/s]
460
  6%|▌ | 251/4250 [01:05<17:23, 3.83it/s]
461
  6%|▌ | 252/4250 [01:06<16:16, 4.10it/s]
462
  6%|▌ | 253/4250 [01:06<16:01, 4.16it/s]
463
  6%|▌ | 254/4250 [01:06<18:43, 3.56it/s]
464
  6%|▌ | 255/4250 [01:07<17:11, 3.87it/s]
465
  6%|▌ | 256/4250 [01:07<15:48, 4.21it/s]
466
  6%|▌ | 257/4250 [01:07<15:45, 4.22it/s]
467
  6%|▌ | 258/4250 [01:07<15:28, 4.30it/s]
468
  6%|▌ | 259/4250 [01:07<15:16, 4.35it/s]
469
  6%|▌ | 260/4250 [01:08<15:46, 4.22it/s]
470
  6%|▌ | 261/4250 [01:08<16:22, 4.06it/s]
471
  6%|▌ | 262/4250 [01:08<16:58, 3.92it/s]
472
  6%|▌ | 263/4250 [01:08<16:51, 3.94it/s]
473
  6%|▌ | 264/4250 [01:09<17:22, 3.82it/s]
474
  6%|▌ | 265/4250 [01:09<18:33, 3.58it/s]
475
  6%|▋ | 266/4250 [01:09<17:10, 3.87it/s]
476
  6%|▋ | 267/4250 [01:09<16:28, 4.03it/s]
477
  6%|▋ | 268/4250 [01:10<15:55, 4.17it/s]
478
  6%|▋ | 269/4250 [01:10<15:05, 4.39it/s]
479
  6%|▋ | 270/4250 [01:10<15:59, 4.15it/s]
480
  6%|▋ | 271/4250 [01:10<15:23, 4.31it/s]
481
  6%|▋ | 272/4250 [01:11<17:34, 3.77it/s]
482
  6%|▋ | 273/4250 [01:11<17:16, 3.84it/s]
483
  6%|▋ | 274/4250 [01:11<16:56, 3.91it/s]
484
  6%|▋ | 275/4250 [01:11<17:35, 3.77it/s]
485
  6%|▋ | 276/4250 [01:12<16:27, 4.02it/s]
486
  7%|▋ | 277/4250 [01:12<16:17, 4.07it/s]
487
  7%|▋ | 278/4250 [01:12<15:37, 4.24it/s]
488
  7%|▋ | 279/4250 [01:12<14:53, 4.45it/s]
489
  7%|▋ | 280/4250 [01:13<14:28, 4.57it/s]
490
  7%|▋ | 281/4250 [01:13<16:13, 4.08it/s]
491
  7%|▋ | 282/4250 [01:13<15:35, 4.24it/s]
492
  7%|▋ | 283/4250 [01:13<18:01, 3.67it/s]
493
  7%|▋ | 284/4250 [01:14<17:47, 3.72it/s]
494
  7%|▋ | 285/4250 [01:14<15:44, 4.20it/s]
495
  7%|▋ | 286/4250 [01:14<14:33, 4.54it/s]
496
  7%|▋ | 287/4250 [01:14<13:57, 4.73it/s]
497
  7%|▋ | 288/4250 [01:14<14:34, 4.53it/s]
498
  7%|▋ | 289/4250 [01:15<14:38, 4.51it/s]
499
  7%|▋ | 290/4250 [01:15<16:00, 4.12it/s]
500
  7%|▋ | 291/4250 [01:15<16:28, 4.01it/s]
501
  7%|▋ | 292/4250 [01:15<16:07, 4.09it/s]
502
  7%|▋ | 293/4250 [01:16<15:33, 4.24it/s]
503
  7%|▋ | 294/4250 [01:16<17:41, 3.73it/s]
504
  7%|▋ | 295/4250 [01:16<18:00, 3.66it/s]
505
  7%|▋ | 296/4250 [01:17<18:14, 3.61it/s]
506
  7%|▋ | 297/4250 [01:17<16:55, 3.89it/s]
507
  7%|▋ | 298/4250 [01:17<16:34, 3.97it/s]
508
  7%|▋ | 299/4250 [01:17<16:28, 4.00it/s]
509
  7%|▋ | 300/4250 [01:18<16:29, 3.99it/s]
510
  7%|▋ | 301/4250 [01:18<15:50, 4.16it/s]
511
  7%|▋ | 302/4250 [01:18<17:24, 3.78it/s]
512
  7%|▋ | 303/4250 [01:18<16:36, 3.96it/s]
513
  7%|▋ | 304/4250 [01:19<16:51, 3.90it/s]
514
  7%|▋ | 305/4250 [01:19<15:48, 4.16it/s]
515
  7%|▋ | 306/4250 [01:19<16:32, 3.97it/s]
516
  7%|▋ | 307/4250 [01:19<16:51, 3.90it/s]
517
  7%|▋ | 308/4250 [01:20<15:55, 4.13it/s]
518
  7%|▋ | 309/4250 [01:20<17:03, 3.85it/s]
519
  7%|▋ | 310/4250 [01:20<15:39, 4.19it/s]
520
  7%|▋ | 311/4250 [01:20<17:13, 3.81it/s]
521
  7%|▋ | 312/4250 [01:21<16:25, 4.00it/s]
522
  7%|▋ | 313/4250 [01:21<15:07, 4.34it/s]
523
  7%|▋ | 314/4250 [01:21<14:13, 4.61it/s]
524
  7%|▋ | 315/4250 [01:21<14:42, 4.46it/s]
525
  7%|▋ | 316/4250 [01:21<14:47, 4.43it/s]
526
  7%|▋ | 317/4250 [01:22<17:53, 3.67it/s]
527
  7%|▋ | 318/4250 [01:22<16:43, 3.92it/s]
528
  8%|▊ | 319/4250 [01:22<16:30, 3.97it/s]
529
  8%|▊ | 320/4250 [01:23<17:33, 3.73it/s]
530
  8%|▊ | 321/4250 [01:23<16:49, 3.89it/s]
531
  8%|▊ | 322/4250 [01:23<19:39, 3.33it/s]
532
  8%|▊ | 323/4250 [01:23<19:31, 3.35it/s]
533
  8%|▊ | 324/4250 [01:24<20:40, 3.17it/s]
534
  8%|▊ | 325/4250 [01:24<18:45, 3.49it/s]
535
  8%|▊ | 326/4250 [01:24<16:12, 4.04it/s]
536
  8%|▊ | 327/4250 [01:24<15:22, 4.25it/s]
537
  8%|▊ | 328/4250 [01:25<14:59, 4.36it/s]
538
  8%|▊ | 329/4250 [01:25<16:05, 4.06it/s]
539
  8%|▊ | 330/4250 [01:25<15:15, 4.28it/s]
540
  8%|▊ | 331/4250 [01:25<15:50, 4.12it/s]
541
  8%|▊ | 332/4250 [01:26<15:59, 4.08it/s]
542
  8%|▊ | 333/4250 [01:26<15:12, 4.29it/s]
543
  8%|▊ | 334/4250 [01:26<15:06, 4.32it/s]
544
  8%|▊ | 335/4250 [01:26<15:42, 4.15it/s]
545
  8%|▊ | 336/4250 [01:27<16:53, 3.86it/s]
546
  8%|▊ | 337/4250 [01:27<16:14, 4.02it/s]
547
  8%|▊ | 338/4250 [01:27<16:49, 3.87it/s]
548
  8%|▊ | 339/4250 [01:27<16:21, 3.98it/s]
549
  8%|▊ | 340/4250 [01:28<16:10, 4.03it/s]
550
  8%|▊ | 341/4250 [01:28<15:41, 4.15it/s]
551
  8%|▊ | 342/4250 [01:28<14:19, 4.55it/s]
552
  8%|▊ | 343/4250 [01:28<14:23, 4.53it/s]
553
  8%|▊ | 344/4250 [01:28<14:04, 4.62it/s]
554
  8%|▊ | 345/4250 [01:29<14:13, 4.57it/s]
555
  8%|▊ | 346/4250 [01:29<13:44, 4.74it/s]
556
  8%|▊ | 347/4250 [01:29<15:11, 4.28it/s]
557
  8%|▊ | 348/4250 [01:29<15:11, 4.28it/s]
558
  8%|▊ | 349/4250 [01:30<15:11, 4.28it/s]
559
  8%|▊ | 350/4250 [01:30<16:12, 4.01it/s]
560
  8%|▊ | 351/4250 [01:30<15:47, 4.11it/s]
561
  8%|▊ | 352/4250 [01:30<14:58, 4.34it/s]
562
  8%|▊ | 353/4250 [01:31<15:26, 4.20it/s]
563
  8%|▊ | 354/4250 [01:31<15:15, 4.26it/s]
564
  8%|▊ | 355/4250 [01:31<15:28, 4.20it/s]
565
  8%|▊ | 356/4250 [01:31<14:47, 4.39it/s]
566
  8%|▊ | 357/4250 [01:32<15:03, 4.31it/s]
567
  8%|▊ | 358/4250 [01:32<14:13, 4.56it/s]
568
  8%|▊ | 359/4250 [01:32<13:48, 4.69it/s]
569
  8%|▊ | 360/4250 [01:32<17:16, 3.75it/s]
570
  8%|▊ | 361/4250 [01:33<16:19, 3.97it/s]
571
  9%|▊ | 362/4250 [01:33<14:31, 4.46it/s]
572
  9%|▊ | 363/4250 [01:33<13:47, 4.69it/s]
573
  9%|▊ | 364/4250 [01:33<16:00, 4.05it/s]
574
  9%|▊ | 365/4250 [01:33<17:03, 3.79it/s]
575
  9%|▊ | 366/4250 [01:34<15:39, 4.13it/s]
576
  9%|▊ | 367/4250 [01:34<14:24, 4.49it/s]
577
  9%|▊ | 368/4250 [01:34<13:35, 4.76it/s]
578
  9%|▊ | 369/4250 [01:34<14:18, 4.52it/s]
579
  9%|▊ | 370/4250 [01:35<16:38, 3.89it/s]
580
  9%|▊ | 371/4250 [01:35<15:10, 4.26it/s]
581
  9%|▉ | 372/4250 [01:35<14:18, 4.52it/s]
582
  9%|▉ | 373/4250 [01:35<15:04, 4.29it/s]
583
  9%|▉ | 374/4250 [01:35<14:30, 4.45it/s]
584
  9%|▉ | 375/4250 [01:36<17:11, 3.76it/s]
585
  9%|▉ | 376/4250 [01:36<16:01, 4.03it/s]
586
  9%|▉ | 377/4250 [01:36<16:17, 3.96it/s]
587
  9%|▉ | 378/4250 [01:36<14:37, 4.41it/s]
588
  9%|▉ | 379/4250 [01:37<14:14, 4.53it/s]
589
  9%|▉ | 380/4250 [01:37<15:00, 4.30it/s]
590
  9%|▉ | 381/4250 [01:37<15:22, 4.19it/s]
591
  9%|▉ | 382/4250 [01:37<16:12, 3.98it/s]
592
  9%|▉ | 383/4250 [01:38<16:00, 4.03it/s]
593
  9%|▉ | 384/4250 [01:38<15:05, 4.27it/s]
594
  9%|▉ | 385/4250 [01:38<15:35, 4.13it/s]
595
  9%|▉ | 386/4250 [01:38<15:09, 4.25it/s]
596
  9%|▉ | 387/4250 [01:39<14:50, 4.34it/s]
597
  9%|▉ | 388/4250 [01:39<15:02, 4.28it/s]
598
  9%|▉ | 389/4250 [01:39<14:20, 4.49it/s]
599
  9%|▉ | 390/4250 [01:39<13:57, 4.61it/s]
600
  9%|▉ | 391/4250 [01:39<14:27, 4.45it/s]
601
  9%|▉ | 392/4250 [01:40<13:49, 4.65it/s]
602
  9%|▉ | 393/4250 [01:40<13:18, 4.83it/s]
603
  9%|▉ | 394/4250 [01:40<12:53, 4.98it/s]
604
  9%|▉ | 395/4250 [01:40<13:55, 4.61it/s]
605
  9%|▉ | 396/4250 [01:41<15:28, 4.15it/s]
606
  9%|▉ | 397/4250 [01:41<16:24, 3.91it/s]
607
  9%|▉ | 398/4250 [01:41<15:34, 4.12it/s]
608
  9%|▉ | 399/4250 [01:41<16:41, 3.84it/s]
609
  9%|▉ | 400/4250 [01:42<15:57, 4.02it/s]
610
  9%|▉ | 401/4250 [01:42<15:42, 4.08it/s]
611
  9%|▉ | 402/4250 [01:42<14:43, 4.36it/s]
612
  9%|▉ | 403/4250 [01:42<15:24, 4.16it/s]
613
  10%|▉ | 404/4250 [01:43<15:47, 4.06it/s]
614
  10%|▉ | 405/4250 [01:43<15:24, 4.16it/s]
615
  10%|▉ | 406/4250 [01:43<15:58, 4.01it/s]
616
  10%|▉ | 407/4250 [01:43<15:39, 4.09it/s]
617
  10%|▉ | 408/4250 [01:44<14:33, 4.40it/s]
618
  10%|▉ | 409/4250 [01:44<17:09, 3.73it/s]
619
  10%|▉ | 410/4250 [01:44<16:43, 3.83it/s]
620
  10%|▉ | 411/4250 [01:44<17:14, 3.71it/s]
621
  10%|▉ | 412/4250 [01:45<17:18, 3.70it/s]
622
  10%|▉ | 413/4250 [01:45<17:05, 3.74it/s]
623
  10%|▉ | 414/4250 [01:45<17:29, 3.65it/s]
624
  10%|▉ | 415/4250 [01:46<18:02, 3.54it/s]
625
  10%|▉ | 416/4250 [01:46<19:17, 3.31it/s]
626
  10%|▉ | 417/4250 [01:46<20:16, 3.15it/s]
627
  10%|▉ | 418/4250 [01:46<17:55, 3.56it/s]
628
  10%|▉ | 419/4250 [01:47<19:14, 3.32it/s]
629
  10%|▉ | 420/4250 [01:47<18:01, 3.54it/s]
630
  10%|▉ | 421/4250 [01:47<16:54, 3.77it/s]
631
  10%|▉ | 422/4250 [01:47<16:11, 3.94it/s]
632
  10%|▉ | 423/4250 [01:48<16:08, 3.95it/s]
633
  10%|▉ | 424/4250 [01:48<14:54, 4.28it/s]
634
  10%|█ | 425/4250 [01:48<14:18, 4.46it/s][INFO|trainer.py:805] 2024-08-30 21:56:36,656 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
635
+ [INFO|trainer.py:3788] 2024-08-30 21:56:36,658 >>
636
+ ***** Running Evaluation *****
637
+ [INFO|trainer.py:3790] 2024-08-30 21:56:36,658 >> Num examples = 6798
638
+ [INFO|trainer.py:3793] 2024-08-30 21:56:36,658 >> Batch size = 8
639
+
640
+
641
  0%| | 0/850 [00:00<?, ?it/s]
642
+
643
  1%| | 10/850 [00:00<00:08, 98.30it/s]
644
+
645
  2%|▏ | 20/850 [00:00<00:09, 85.95it/s]
646
+
647
  3%|▎ | 29/850 [00:00<00:09, 85.06it/s]
648
+
649
  4%|▍ | 38/850 [00:00<00:09, 85.47it/s]
650
+
651
  6%|▌ | 47/850 [00:00<00:09, 84.58it/s]
652
+
653
  7%|▋ | 56/850 [00:00<00:10, 78.37it/s]
654
+
655
  8%|▊ | 64/850 [00:00<00:10, 77.70it/s]
656
+
657
  8%|▊ | 72/850 [00:00<00:10, 74.39it/s]
658
+
659
  9%|▉ | 80/850 [00:01<00:10, 74.24it/s]
660
+
661
  10%|█ | 89/850 [00:01<00:09, 76.30it/s]
662
+
663
  12%|█▏ | 98/850 [00:01<00:09, 78.01it/s]
664
+
665
  13%|█▎ | 108/850 [00:01<00:09, 81.89it/s]
666
+
667
  14%|█▍ | 117/850 [00:01<00:08, 81.96it/s]
668
+
669
  15%|█▍ | 127/850 [00:01<00:08, 84.21it/s]
670
+
671
  16%|█▌ | 136/850 [00:01<00:08, 83.16it/s]
672
+
673
  17%|█▋ | 145/850 [00:01<00:08, 79.13it/s]
674
+
675
  18%|█▊ | 153/850 [00:01<00:08, 78.99it/s]
676
+
677
  19%|█▉ | 162/850 [00:02<00:08, 81.23it/s]
678
+
679
  20%|██ | 171/850 [00:02<00:08, 82.29it/s]
680
+
681
  21%|██ | 180/850 [00:02<00:07, 83.94it/s]
682
+
683
  22%|██▏ | 189/850 [00:02<00:08, 81.23it/s]
684
+
685
  23%|██▎ | 198/850 [00:02<00:07, 82.48it/s]
686
+
687
  24%|██▍ | 207/850 [00:02<00:07, 81.00it/s]
688
+
689
  25%|██▌ | 216/850 [00:02<00:08, 78.97it/s]
690
+
691
  26%|██▋ | 225/850 [00:02<00:07, 81.69it/s]
692
+
693
  28%|██▊ | 234/850 [00:02<00:07, 83.51it/s]
694
+
695
  29%|██▊ | 243/850 [00:02<00:07, 81.58it/s]
696
+
697
  30%|██▉ | 252/850 [00:03<00:07, 82.44it/s]
698
+
699
  31%|███ | 262/850 [00:03<00:07, 83.56it/s]
700
+
701
  32%|███▏ | 271/850 [00:03<00:06, 84.66it/s]
702
+
703
  33%|███▎ | 280/850 [00:03<00:06, 85.74it/s]
704
+
705
  34%|███▍ | 289/850 [00:03<00:06, 86.37it/s]
706
+
707
  35%|███▌ | 298/850 [00:03<00:06, 86.47it/s]
708
+
709
  36%|███▌ | 307/850 [00:03<00:06, 84.84it/s]
710
+
711
  37%|███▋ | 316/850 [00:03<00:06, 83.69it/s]
712
+
713
  38%|███▊ | 325/850 [00:03<00:06, 84.26it/s]
714
+
715
  39%|███▉ | 334/850 [00:04<00:06, 84.48it/s]
716
+
717
  40%|████ | 343/850 [00:04<00:06, 84.34it/s]
718
+
719
  42%|████▏ | 353/850 [00:04<00:05, 86.48it/s]
720
+
721
  43%|████▎ | 362/850 [00:04<00:05, 83.81it/s]
722
+
723
  44%|████▎ | 371/850 [00:04<00:05, 84.14it/s]
724
+
725
  45%|████▍ | 380/850 [00:04<00:05, 80.65it/s]
726
+
727
  46%|████▌ | 389/850 [00:04<00:05, 82.30it/s]
728
+
729
  47%|████▋ | 398/850 [00:04<00:05, 79.21it/s]
730
+
731
  48%|████▊ | 408/850 [00:04<00:05, 83.80it/s]
732
+
733
  49%|████▉ | 418/850 [00:05<00:04, 86.99it/s]
734
+
735
  50%|█████ | 428/850 [00:05<00:04, 88.54it/s]
736
+
737
  51%|█████▏ | 437/850 [00:05<00:04, 86.84it/s]
738
+
739
  52%|█████▏ | 446/850 [00:05<00:04, 84.38it/s]
740
+
741
  54%|█████▎ | 455/850 [00:05<00:04, 84.52it/s]
742
+
743
  55%|█████▍ | 465/850 [00:05<00:04, 87.83it/s]
744
+
745
  56%|█████▌ | 475/850 [00:05<00:04, 90.15it/s]
746
+
747
  57%|█████▋ | 485/850 [00:05<00:04, 88.56it/s]
748
+
749
  58%|█████▊ | 494/850 [00:05<00:04, 88.49it/s]
750
+
751
  59%|█████▉ | 504/850 [00:06<00:03, 90.91it/s]
752
+
753
  60%|██████ | 514/850 [00:06<00:03, 90.17it/s]
754
+
755
  62%|██████▏ | 524/850 [00:06<00:03, 90.41it/s]
756
+
757
  63%|██████▎ | 534/850 [00:06<00:03, 86.47it/s]
758
+
759
  64%|██████▍ | 543/850 [00:06<00:03, 84.59it/s]
760
+
761
  65%|██████▍ | 552/850 [00:06<00:03, 83.56it/s]
762
+
763
  66%|██████▌ | 561/850 [00:06<00:03, 82.10it/s]
764
+
765
  67%|██████▋ | 570/850 [00:06<00:03, 80.03it/s]
766
+
767
  68%|██████▊ | 579/850 [00:06<00:03, 77.55it/s]
768
+
769
  69%|██████▉ | 588/850 [00:07<00:03, 78.59it/s]
770
+
771
  70%|███████ | 597/850 [00:07<00:03, 80.01it/s]
772
+
773
  71%|███████▏ | 606/850 [00:07<00:02, 81.43it/s]
774
+
775
  72%|███████▏ | 615/850 [00:07<00:02, 83.13it/s]
776
+
777
  74%|███████▎ | 625/850 [00:07<00:02, 85.27it/s]
778
+
779
  75%|███████▍ | 634/850 [00:07<00:02, 84.44it/s]
780
+
781
  76%|███████▌ | 643/850 [00:07<00:02, 85.83it/s]
782
+
783
  77%|███████▋ | 653/850 [00:07<00:02, 89.01it/s]
784
+
785
  78%|███████▊ | 662/850 [00:07<00:02, 88.83it/s]
786
+
787
  79%|███████▉ | 671/850 [00:08<00:02, 86.72it/s]
788
+
789
  80%|████████ | 680/850 [00:08<00:01, 85.40it/s]
790
+
791
  81%|████████ | 689/850 [00:08<00:01, 84.31it/s]
792
+
793
  82%|████████▏ | 698/850 [00:08<00:01, 84.10it/s]
794
+
795
  83%|████████▎ | 707/850 [00:08<00:01, 82.22it/s]
796
+
797
  84%|████████▍ | 716/850 [00:08<00:01, 82.16it/s]
798
+
799
  85%|████████▌ | 725/850 [00:08<00:01, 79.13it/s]
800
+
801
  86%|████████▋ | 735/850 [00:08<00:01, 82.74it/s]
802
+
803
  88%|████████▊ | 744/850 [00:08<00:01, 80.60it/s]
804
+
805
  89%|██���█████▊ | 753/850 [00:09<00:01, 80.30it/s]
806
+
807
  90%|████████▉ | 762/850 [00:09<00:01, 82.27it/s]
808
+
809
  91%|█████████ | 771/850 [00:09<00:00, 81.44it/s]
810
+
811
  92%|█████████▏| 780/850 [00:09<00:00, 80.66it/s]
812
+
813
  93%|█████████▎| 789/850 [00:09<00:00, 78.52it/s]
814
+
815
  94%|█████████▍| 798/850 [00:09<00:00, 81.16it/s]
816
+
817
  95%|█████████▍| 807/850 [00:09<00:00, 81.60it/s]
818
+
819
  96%|█████████▌| 816/850 [00:09<00:00, 80.79it/s]
820
+
821
  97%|█████████▋| 826/850 [00:09<00:00, 81.84it/s]
822
+
823
  98%|█████████▊| 835/850 [00:10<00:00, 82.47it/s]
824
+
825
  99%|█████████▉| 844/850 [00:10<00:00, 83.17it/s]
826
 
827
+
828
 
829
  10%|█ | 425/4250 [02:02<14:18, 4.46it/s]
830
+
831
+
832
  [INFO|trainer.py:3478] 2024-08-30 21:56:50,913 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-425
833
+ [INFO|configuration_utils.py:472] 2024-08-30 21:56:50,914 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-425/config.json
834
+ [INFO|modeling_utils.py:2690] 2024-08-30 21:56:52,125 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-425/model.safetensors
835
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 21:56:52,126 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-425/tokenizer_config.json
836
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 21:56:52,126 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-425/special_tokens_map.json
837
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 21:56:54,071 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
838
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 21:56:54,071 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
839
+
840
  10%|█ | 426/4250 [02:06<5:49:07, 5.48s/it]
841
  10%|█ | 427/4250 [02:06<4:08:55, 3.91s/it]
842
  10%|█ | 428/4250 [02:06<2:58:35, 2.80s/it]
843
  10%|█ | 429/4250 [02:07<2:09:47, 2.04s/it]
844
  10%|█ | 430/4250 [02:07<1:34:28, 1.48s/it]
845
  10%|█ | 431/4250 [02:07<1:10:13, 1.10s/it]
846
  10%|█ | 432/4250 [02:07<52:57, 1.20it/s]
847
  10%|█ | 433/4250 [02:08<44:00, 1.45it/s]
848
  10%|█ | 434/4250 [02:08<35:08, 1.81it/s]
849
  10%|█ | 435/4250 [02:08<29:02, 2.19it/s]
850
  10%|█ | 436/4250 [02:08<24:36, 2.58it/s]
851
  10%|█ | 437/4250 [02:08<21:08, 3.01it/s]
852
  10%|█ | 438/4250 [02:09<18:48, 3.38it/s]
853
  10%|█ | 439/4250 [02:09<17:50, 3.56it/s]
854
  10%|█ | 440/4250 [02:09<16:07, 3.94it/s]
855
  10%|█ | 441/4250 [02:09<15:15, 4.16it/s]
856
  10%|█ | 442/4250 [02:10<15:13, 4.17it/s]
857
  10%|█ | 443/4250 [02:10<13:33, 4.68it/s]
858
  10%|█ | 444/4250 [02:10<13:28, 4.71it/s]
859
  10%|█ | 445/4250 [02:10<14:04, 4.50it/s]
860
  10%|█ | 446/4250 [02:10<15:40, 4.04it/s]
861
  11%|█ | 447/4250 [02:11<15:52, 3.99it/s]
862
  11%|█ | 448/4250 [02:11<14:48, 4.28it/s]
863
  11%|█ | 449/4250 [02:11<16:28, 3.85it/s]
864
  11%|█ | 450/4250 [02:11<15:36, 4.06it/s]
865
  11%|█ | 451/4250 [02:12<16:05, 3.93it/s]
866
  11%|█ | 452/4250 [02:12<14:50, 4.27it/s]
867
  11%|█ | 453/4250 [02:12<16:32, 3.83it/s]
868
  11%|█ | 454/4250 [02:12<15:02, 4.20it/s]
869
  11%|█ | 455/4250 [02:13<14:32, 4.35it/s]
870
  11%|█ | 456/4250 [02:13<14:43, 4.29it/s]
871
  11%|█ | 457/4250 [02:13<14:25, 4.38it/s]
872
  11%|█ | 458/4250 [02:13<14:56, 4.23it/s]
873
  11%|█ | 459/4250 [02:14<15:10, 4.16it/s]
874
  11%|█ | 460/4250 [02:14<15:13, 4.15it/s]
875
  11%|█ | 461/4250 [02:14<14:27, 4.37it/s]
876
  11%|█ | 462/4250 [02:14<14:50, 4.25it/s]
877
  11%|█ | 463/4250 [02:14<14:23, 4.38it/s]
878
  11%|█ | 464/4250 [02:15<15:27, 4.08it/s]
879
  11%|█ | 465/4250 [02:15<15:11, 4.15it/s]
880
  11%|█ | 466/4250 [02:15<13:40, 4.61it/s]
881
  11%|█ | 467/4250 [02:15<14:22, 4.39it/s]
882
  11%|█ | 468/4250 [02:16<15:43, 4.01it/s]
883
  11%|█ | 469/4250 [02:16<15:14, 4.14it/s]
884
  11%|█ | 470/4250 [02:16<15:34, 4.05it/s]
885
  11%|█ | 471/4250 [02:16<15:31, 4.06it/s]
886
  11%|█ | 472/4250 [02:17<16:26, 3.83it/s]
887
  11%|█ | 473/4250 [02:17<19:00, 3.31it/s]
888
  11%|█ | 474/4250 [02:17<18:14, 3.45it/s]
889
  11%|█ | 475/4250 [02:18<20:13, 3.11it/s]
890
  11%|█ | 476/4250 [02:18<18:00, 3.49it/s]
891
  11%|█ | 477/4250 [02:18<16:28, 3.82it/s]
892
  11%|█ | 478/4250 [02:18<17:09, 3.66it/s]
893
  11%|█▏ | 479/4250 [02:19<15:33, 4.04it/s]
894
  11%|█▏ | 480/4250 [02:19<14:39, 4.29it/s]
895
  11%|█▏ | 481/4250 [02:19<16:24, 3.83it/s]
896
  11%|█▏ | 482/4250 [02:19<16:23, 3.83it/s]
897
  11%|█▏ | 483/4250 [02:20<16:34, 3.79it/s]
898
  11%|█▏ | 484/4250 [02:20<16:50, 3.73it/s]
899
  11%|█▏ | 485/4250 [02:20<16:14, 3.86it/s]
900
  11%|█▏ | 486/4250 [02:20<14:51, 4.22it/s]
901
  11%|█▏ | 487/4250 [02:21<14:57, 4.19it/s]
902
  11%|█▏ | 488/4250 [02:21<15:03, 4.16it/s]
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 1.0996932656642544e+16,
4
+ "train_loss": 0.003382195293697344,
5
+ "train_runtime": 1039.0596,
6
+ "train_samples": 27768,
7
+ "train_samples_per_second": 267.242,
8
+ "train_steps_per_second": 4.177
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9314045730284647,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3038",
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4340,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.9980813775011861,
14
+ "eval_f1": 0.8937558247903076,
15
+ "eval_loss": 0.005673006176948547,
16
+ "eval_precision": 0.8937558247903076,
17
+ "eval_recall": 0.8937558247903076,
18
+ "eval_runtime": 13.5502,
19
+ "eval_samples_per_second": 512.611,
20
+ "eval_steps_per_second": 64.132,
21
+ "step": 434
22
+ },
23
+ {
24
+ "epoch": 1.1520737327188941,
25
+ "grad_norm": 0.324382483959198,
26
+ "learning_rate": 4.423963133640553e-05,
27
+ "loss": 0.0182,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_accuracy": 0.9985418469009014,
33
+ "eval_f1": 0.9160165213400643,
34
+ "eval_loss": 0.004419892560690641,
35
+ "eval_precision": 0.9023508137432188,
36
+ "eval_recall": 0.9301025163094129,
37
+ "eval_runtime": 13.4449,
38
+ "eval_samples_per_second": 516.626,
39
+ "eval_steps_per_second": 64.634,
40
+ "step": 868
41
+ },
42
+ {
43
+ "epoch": 2.3041474654377883,
44
+ "grad_norm": 0.0693276971578598,
45
+ "learning_rate": 3.847926267281106e-05,
46
+ "loss": 0.0039,
47
+ "step": 1000
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "eval_accuracy": 0.9986534758462869,
52
+ "eval_f1": 0.9205175600739371,
53
+ "eval_loss": 0.0044819144532084465,
54
+ "eval_precision": 0.9129239230064161,
55
+ "eval_recall": 0.9282385834109972,
56
+ "eval_runtime": 13.422,
57
+ "eval_samples_per_second": 517.51,
58
+ "eval_steps_per_second": 64.745,
59
+ "step": 1302
60
+ },
61
+ {
62
+ "epoch": 3.456221198156682,
63
+ "grad_norm": 0.06998981535434723,
64
+ "learning_rate": 3.271889400921659e-05,
65
+ "loss": 0.0024,
66
+ "step": 1500
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "eval_accuracy": 0.9983255658192169,
71
+ "eval_f1": 0.9076923076923078,
72
+ "eval_loss": 0.005129755008965731,
73
+ "eval_precision": 0.882145998240985,
74
+ "eval_recall": 0.934762348555452,
75
+ "eval_runtime": 13.6729,
76
+ "eval_samples_per_second": 508.013,
77
+ "eval_steps_per_second": 63.556,
78
+ "step": 1736
79
+ },
80
+ {
81
+ "epoch": 4.6082949308755765,
82
+ "grad_norm": 0.0027600331231951714,
83
+ "learning_rate": 2.6958525345622122e-05,
84
+ "loss": 0.0017,
85
+ "step": 2000
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "eval_accuracy": 0.9986464990372004,
90
+ "eval_f1": 0.9285051067780873,
91
+ "eval_loss": 0.0056963409297168255,
92
+ "eval_precision": 0.9250693802035153,
93
+ "eval_recall": 0.9319664492078286,
94
+ "eval_runtime": 13.4387,
95
+ "eval_samples_per_second": 516.864,
96
+ "eval_steps_per_second": 64.664,
97
+ "step": 2170
98
+ },
99
+ {
100
+ "epoch": 5.76036866359447,
101
+ "grad_norm": 0.018086101859807968,
102
+ "learning_rate": 2.1198156682027652e-05,
103
+ "loss": 0.0012,
104
+ "step": 2500
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "eval_accuracy": 0.9984023107191695,
109
+ "eval_f1": 0.9116835326586937,
110
+ "eval_loss": 0.006062328349798918,
111
+ "eval_precision": 0.9000908265213442,
112
+ "eval_recall": 0.923578751164958,
113
+ "eval_runtime": 13.3783,
114
+ "eval_samples_per_second": 519.199,
115
+ "eval_steps_per_second": 64.956,
116
+ "step": 2604
117
+ },
118
+ {
119
+ "epoch": 6.912442396313364,
120
+ "grad_norm": 0.009607589803636074,
121
+ "learning_rate": 1.543778801843318e-05,
122
+ "loss": 0.0009,
123
+ "step": 3000
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "eval_accuracy": 0.9986953367008066,
128
+ "eval_f1": 0.9314045730284647,
129
+ "eval_loss": 0.005624314770102501,
130
+ "eval_precision": 0.9327102803738317,
131
+ "eval_recall": 0.9301025163094129,
132
+ "eval_runtime": 13.399,
133
+ "eval_samples_per_second": 518.397,
134
+ "eval_steps_per_second": 64.856,
135
+ "step": 3038
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_accuracy": 0.9985837077554209,
140
+ "eval_f1": 0.9231477220432582,
141
+ "eval_loss": 0.006831143982708454,
142
+ "eval_precision": 0.9118181818181819,
143
+ "eval_recall": 0.934762348555452,
144
+ "eval_runtime": 13.3718,
145
+ "eval_samples_per_second": 519.45,
146
+ "eval_steps_per_second": 64.987,
147
+ "step": 3472
148
+ },
149
+ {
150
+ "epoch": 8.064516129032258,
151
+ "grad_norm": 0.0005392630700953305,
152
+ "learning_rate": 9.67741935483871e-06,
153
+ "loss": 0.0006,
154
+ "step": 3500
155
+ },
156
+ {
157
+ "epoch": 9.0,
158
+ "eval_accuracy": 0.9986813830826333,
159
+ "eval_f1": 0.9288702928870293,
160
+ "eval_loss": 0.0071532572619616985,
161
+ "eval_precision": 0.9267161410018553,
162
+ "eval_recall": 0.9310344827586207,
163
+ "eval_runtime": 13.5255,
164
+ "eval_samples_per_second": 513.548,
165
+ "eval_steps_per_second": 64.249,
166
+ "step": 3906
167
+ },
168
+ {
169
+ "epoch": 9.216589861751151,
170
+ "grad_norm": 0.0045371875166893005,
171
+ "learning_rate": 3.9170506912442395e-06,
172
+ "loss": 0.0004,
173
+ "step": 4000
174
+ },
175
+ {
176
+ "epoch": 10.0,
177
+ "eval_accuracy": 0.998618591800854,
178
+ "eval_f1": 0.9259944495837187,
179
+ "eval_loss": 0.007294897455722094,
180
+ "eval_precision": 0.9191919191919192,
181
+ "eval_recall": 0.9328984156570364,
182
+ "eval_runtime": 13.8041,
183
+ "eval_samples_per_second": 503.184,
184
+ "eval_steps_per_second": 62.952,
185
+ "step": 4340
186
+ },
187
+ {
188
+ "epoch": 10.0,
189
+ "step": 4340,
190
+ "total_flos": 1.0996932656642544e+16,
191
+ "train_loss": 0.003382195293697344,
192
+ "train_runtime": 1039.0596,
193
+ "train_samples_per_second": 267.242,
194
+ "train_steps_per_second": 4.177
195
+ }
196
+ ],
197
+ "logging_steps": 500,
198
+ "max_steps": 4340,
199
+ "num_input_tokens_seen": 0,
200
+ "num_train_epochs": 10,
201
+ "save_steps": 500,
202
+ "stateful_callbacks": {
203
+ "TrainerControl": {
204
+ "args": {
205
+ "should_epoch_stop": false,
206
+ "should_evaluate": false,
207
+ "should_log": false,
208
+ "should_save": true,
209
+ "should_training_stop": true
210
+ },
211
+ "attributes": {}
212
+ }
213
+ },
214
+ "total_flos": 1.0996932656642544e+16,
215
+ "train_batch_size": 32,
216
+ "trial_name": null,
217
+ "trial_params": null
218
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7fb15010252c8cbc6a884d820aedc076e55d4b9641d17f55264378c0fcf155
3
+ size 5176
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff