Rodrigo1771 commited on
Commit
f6b0742
·
verified ·
1 Parent(s): 513df21

Training in progress, epoch 1

Browse files
README.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: IVN-RIN/bioBIT
4
+ tags:
5
+ - token-classification
6
+ - generated_from_trainer
7
+ datasets:
8
+ - Rodrigo1771/drugtemist-it-8-ner
9
+ metrics:
10
+ - precision
11
+ - recall
12
+ - f1
13
+ - accuracy
14
+ model-index:
15
+ - name: output
16
+ results:
17
+ - task:
18
+ name: Token Classification
19
+ type: token-classification
20
+ dataset:
21
+ name: Rodrigo1771/drugtemist-it-8-ner
22
+ type: Rodrigo1771/drugtemist-it-8-ner
23
+ config: DrugTEMIST Italian NER
24
+ split: validation
25
+ args: DrugTEMIST Italian NER
26
+ metrics:
27
+ - name: Precision
28
+ type: precision
29
+ value: 0.9122468659594986
30
+ - name: Recall
31
+ type: recall
32
+ value: 0.9157792836398838
33
+ - name: F1
34
+ type: f1
35
+ value: 0.9140096618357488
36
+ - name: Accuracy
37
+ type: accuracy
38
+ value: 0.9985198649701377
39
+ ---
40
+
41
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
42
+ should probably proofread and complete it, then remove this comment. -->
43
+
44
+ # output
45
+
46
+ This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the Rodrigo1771/drugtemist-it-8-ner dataset.
47
+ It achieves the following results on the evaluation set:
48
+ - Loss: 0.0085
49
+ - Precision: 0.9122
50
+ - Recall: 0.9158
51
+ - F1: 0.9140
52
+ - Accuracy: 0.9985
53
+
54
+ ## Model description
55
+
56
+ More information needed
57
+
58
+ ## Intended uses & limitations
59
+
60
+ More information needed
61
+
62
+ ## Training and evaluation data
63
+
64
+ More information needed
65
+
66
+ ## Training procedure
67
+
68
+ ### Training hyperparameters
69
+
70
+ The following hyperparameters were used during training:
71
+ - learning_rate: 5e-05
72
+ - train_batch_size: 32
73
+ - eval_batch_size: 8
74
+ - seed: 42
75
+ - gradient_accumulation_steps: 2
76
+ - total_train_batch_size: 64
77
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
78
+ - lr_scheduler_type: linear
79
+ - num_epochs: 10.0
80
+
81
+ ### Training results
82
+
83
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
84
+ |:-------------:|:------:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
85
+ | No log | 0.9990 | 494 | 0.0050 | 0.8807 | 0.9003 | 0.8904 | 0.9983 |
86
+ | 0.0131 | 2.0 | 989 | 0.0046 | 0.9034 | 0.9148 | 0.9091 | 0.9985 |
87
+ | 0.0037 | 2.9990 | 1483 | 0.0068 | 0.8972 | 0.9129 | 0.9050 | 0.9984 |
88
+ | 0.0021 | 4.0 | 1978 | 0.0069 | 0.8807 | 0.9293 | 0.9044 | 0.9983 |
89
+ | 0.0012 | 4.9990 | 2472 | 0.0073 | 0.8865 | 0.9226 | 0.9042 | 0.9984 |
90
+ | 0.0006 | 6.0 | 2967 | 0.0077 | 0.8932 | 0.9313 | 0.9118 | 0.9984 |
91
+ | 0.0004 | 6.9990 | 3461 | 0.0072 | 0.8978 | 0.9274 | 0.9124 | 0.9985 |
92
+ | 0.0004 | 8.0 | 3956 | 0.0078 | 0.9138 | 0.9129 | 0.9133 | 0.9986 |
93
+ | 0.0001 | 8.9990 | 4450 | 0.0084 | 0.9138 | 0.9138 | 0.9138 | 0.9986 |
94
+ | 0.0001 | 9.9899 | 4940 | 0.0085 | 0.9122 | 0.9158 | 0.9140 | 0.9985 |
95
+
96
+
97
+ ### Framework versions
98
+
99
+ - Transformers 4.44.2
100
+ - Pytorch 2.4.0+cu121
101
+ - Datasets 2.21.0
102
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.989888776541962,
3
+ "eval_accuracy": 0.9985198649701377,
4
+ "eval_f1": 0.9140096618357488,
5
+ "eval_loss": 0.008521749638020992,
6
+ "eval_precision": 0.9122468659594986,
7
+ "eval_recall": 0.9157792836398838,
8
+ "eval_runtime": 14.0315,
9
+ "eval_samples": 6798,
10
+ "eval_samples_per_second": 484.48,
11
+ "eval_steps_per_second": 60.578,
12
+ "predict_accuracy": 0.9982115932160268,
13
+ "predict_f1": 0.8982300884955752,
14
+ "predict_loss": 0.011793443001806736,
15
+ "predict_precision": 0.8874316939890711,
16
+ "predict_recall": 0.9092945128779395,
17
+ "predict_runtime": 27.4059,
18
+ "predict_samples_per_second": 532.915,
19
+ "predict_steps_per_second": 66.628,
20
+ "total_flos": 1.7928149517546354e+16,
21
+ "train_loss": 0.002201772875978276,
22
+ "train_runtime": 1511.1925,
23
+ "train_samples": 31619,
24
+ "train_samples_per_second": 209.232,
25
+ "train_steps_per_second": 3.269
26
+ }
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "IVN-RIN/bioBIT",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "ner",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "B-FARMACO",
15
+ "2": "I-FARMACO"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "B-FARMACO": 1,
21
+ "I-FARMACO": 2,
22
+ "O": 0
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.44.2",
33
+ "type_vocab_size": 2,
34
+ "use_cache": true,
35
+ "vocab_size": 31102
36
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.989888776541962,
3
+ "eval_accuracy": 0.9985198649701377,
4
+ "eval_f1": 0.9140096618357488,
5
+ "eval_loss": 0.008521749638020992,
6
+ "eval_precision": 0.9122468659594986,
7
+ "eval_recall": 0.9157792836398838,
8
+ "eval_runtime": 14.0315,
9
+ "eval_samples": 6798,
10
+ "eval_samples_per_second": 484.48,
11
+ "eval_steps_per_second": 60.578
12
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3daec2c688aa0a46d9db382fbf86ed161eca7c49c159572a0d4bad0e523defb
3
+ size 437380924
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.9982115932160268,
3
+ "predict_f1": 0.8982300884955752,
4
+ "predict_loss": 0.011793443001806736,
5
+ "predict_precision": 0.8874316939890711,
6
+ "predict_recall": 0.9092945128779395,
7
+ "predict_runtime": 27.4059,
8
+ "predict_samples_per_second": 532.915,
9
+ "predict_steps_per_second": 66.628
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tb/events.out.tfevents.1725539834.da2ff10c1388.1139.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c63f332f72d5e8ce062a3570a23e513552485e35f9c3e717f676a61770d4de
3
+ size 12077
tb/events.out.tfevents.1725541385.da2ff10c1388.1139.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6b4673a2b54281137b214f6b38c5f460343279bcdc24f6bbcb40b41533804b
3
+ size 560
tb/events.out.tfevents.1725541614.da2ff10c1388.8786.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4584b4d5777e5b7f96447a5414d6b2ef6ff8feef93772e32125e6e667e4a082c
3
+ size 12077
tb/events.out.tfevents.1725543147.da2ff10c1388.8786.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:271b965f32982eddbd96116b6a0e21c0c303d7829ea9fa6ebdbd89621c357117
3
+ size 560
tb/events.out.tfevents.1725543309.da2ff10c1388.16078.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f77f4dbef1c3beb3875e5f7a213fc0182c8d07213f227ef8289a8f2a3570cf
3
+ size 5576
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "101": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "102": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "103": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "mask_token": "[MASK]",
49
+ "max_len": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "truncation": true,
58
+ "unk_token": "[UNK]"
59
+ }
train.log ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/4790 [00:00<?, ?it/s]
1
  0%| | 1/4790 [00:01<1:20:56, 1.01s/it]
2
  0%| | 2/4790 [00:01<50:37, 1.58it/s]
3
  0%| | 3/4790 [00:01<37:18, 2.14it/s]
4
  0%| | 4/4790 [00:01<29:46, 2.68it/s]
5
  0%| | 5/4790 [00:02<25:48, 3.09it/s]
6
  0%| | 6/4790 [00:02<26:14, 3.04it/s]
7
  0%| | 7/4790 [00:02<24:55, 3.20it/s]
8
  0%| | 8/4790 [00:02<22:17, 3.57it/s]
9
  0%| | 9/4790 [00:03<21:39, 3.68it/s]
10
  0%| | 10/4790 [00:03<22:52, 3.48it/s]
11
  0%| | 11/4790 [00:03<21:15, 3.75it/s]
12
  0%| | 12/4790 [00:04<22:03, 3.61it/s]
13
  0%| | 13/4790 [00:04<20:44, 3.84it/s]
14
  0%| | 14/4790 [00:04<19:51, 4.01it/s]
15
  0%| | 15/4790 [00:04<18:24, 4.32it/s]
16
  0%| | 16/4790 [00:04<18:34, 4.28it/s]
17
  0%| | 17/4790 [00:05<18:30, 4.30it/s]
18
  0%| | 18/4790 [00:05<18:13, 4.37it/s]
19
  0%| | 19/4790 [00:05<20:34, 3.86it/s]
20
  0%| | 20/4790 [00:05<19:30, 4.07it/s]
21
  0%| | 21/4790 [00:06<19:47, 4.02it/s]
22
  0%| | 22/4790 [00:06<19:42, 4.03it/s]
23
  0%| | 23/4790 [00:06<19:29, 4.08it/s]
24
  1%| | 24/4790 [00:06<20:18, 3.91it/s]
25
  1%| | 25/4790 [00:07<20:17, 3.91it/s]
26
  1%| | 26/4790 [00:07<19:08, 4.15it/s]
27
  1%| | 27/4790 [00:07<18:37, 4.26it/s]
28
  1%| | 28/4790 [00:07<20:30, 3.87it/s]
29
  1%| | 29/4790 [00:08<20:31, 3.86it/s]
30
  1%| | 30/4790 [00:08<21:02, 3.77it/s]
31
  1%| | 31/4790 [00:08<20:09, 3.93it/s]
32
  1%| | 32/4790 [00:09<22:20, 3.55it/s]
33
  1%| | 33/4790 [00:09<20:31, 3.86it/s]
34
  1%| | 34/4790 [00:09<21:36, 3.67it/s]
35
  1%| | 35/4790 [00:09<22:11, 3.57it/s]
36
  1%| | 36/4790 [00:10<20:46, 3.81it/s]
37
  1%| | 37/4790 [00:10<23:52, 3.32it/s]
38
  1%| | 38/4790 [00:10<21:50, 3.63it/s]
39
  1%| | 39/4790 [00:10<22:49, 3.47it/s]
40
  1%| | 40/4790 [00:11<23:36, 3.35it/s]
41
  1%| | 41/4790 [00:11<21:06, 3.75it/s]
42
  1%| | 42/4790 [00:11<20:37, 3.84it/s]
43
  1%| | 43/4790 [00:12<20:36, 3.84it/s]
44
  1%| | 44/4790 [00:12<30:21, 2.61it/s]
45
  1%| | 45/4790 [00:12<26:51, 2.94it/s]
46
  1%| | 46/4790 [00:13<24:36, 3.21it/s]
47
  1%| | 47/4790 [00:13<26:25, 2.99it/s]
48
  1%| | 48/4790 [00:13<24:41, 3.20it/s]
49
  1%| | 49/4790 [00:14<23:30, 3.36it/s]
50
  1%| | 50/4790 [00:14<23:01, 3.43it/s]
51
  1%| | 51/4790 [00:14<22:17, 3.54it/s]
52
  1%| | 52/4790 [00:14<21:00, 3.76it/s]
53
  1%| | 53/4790 [00:15<19:53, 3.97it/s]
54
  1%| | 54/4790 [00:15<19:08, 4.12it/s]
55
  1%| | 55/4790 [00:15<21:09, 3.73it/s]
56
  1%| | 56/4790 [00:15<19:04, 4.14it/s]
57
  1%| | 57/4790 [00:16<20:34, 3.84it/s]
58
  1%| | 58/4790 [00:16<20:14, 3.90it/s]
59
  1%| | 59/4790 [00:16<19:47, 3.98it/s]
60
  1%|▏ | 60/4790 [00:16<22:20, 3.53it/s]
61
  1%|▏ | 61/4790 [00:17<21:23, 3.68it/s]
62
  1%|▏ | 62/4790 [00:17<21:38, 3.64it/s]
63
  1%|▏ | 63/4790 [00:17<22:00, 3.58it/s]
64
  1%|▏ | 64/4790 [00:18<21:29, 3.67it/s]
65
  1%|▏ | 65/4790 [00:18<21:47, 3.61it/s]
66
  1%|▏ | 66/4790 [00:18<20:36, 3.82it/s]
67
  1%|▏ | 67/4790 [00:19<27:23, 2.87it/s]
68
  1%|▏ | 68/4790 [00:19<26:32, 2.97it/s]
69
  1%|▏ | 69/4790 [00:19<24:31, 3.21it/s]
70
  1%|▏ | 70/4790 [00:19<23:00, 3.42it/s]
71
  1%|▏ | 71/4790 [00:20<23:36, 3.33it/s]
72
  2%|▏ | 72/4790 [00:20<22:01, 3.57it/s]
73
  2%|▏ | 73/4790 [00:20<20:12, 3.89it/s]
74
  2%|▏ | 74/4790 [00:20<18:49, 4.17it/s]
75
  2%|▏ | 75/4790 [00:21<20:11, 3.89it/s]
76
  2%|▏ | 76/4790 [00:21<19:23, 4.05it/s]
77
  2%|▏ | 77/4790 [00:21<21:03, 3.73it/s]
78
  2%|▏ | 78/4790 [00:21<20:42, 3.79it/s]
79
  2%|▏ | 79/4790 [00:22<20:11, 3.89it/s]
80
  2%|▏ | 80/4790 [00:22<20:50, 3.77it/s]
81
  2%|▏ | 81/4790 [00:22<19:14, 4.08it/s]
82
  2%|▏ | 82/4790 [00:22<21:12, 3.70it/s]
83
  2%|▏ | 83/4790 [00:23<20:43, 3.78it/s]
84
  2%|▏ | 84/4790 [00:23<21:25, 3.66it/s]
85
  2%|▏ | 85/4790 [00:23<22:19, 3.51it/s]
86
  2%|▏ | 86/4790 [00:24<21:02, 3.72it/s]
87
  2%|▏ | 87/4790 [00:24<19:44, 3.97it/s]
88
  2%|▏ | 88/4790 [00:24<18:39, 4.20it/s]
89
  2%|▏ | 89/4790 [00:24<19:58, 3.92it/s]
90
  2%|▏ | 90/4790 [00:25<20:11, 3.88it/s]
91
  2%|▏ | 91/4790 [00:25<22:09, 3.53it/s]
92
  2%|▏ | 92/4790 [00:25<21:43, 3.60it/s]
93
  2%|▏ | 93/4790 [00:25<20:35, 3.80it/s]
94
  2%|▏ | 94/4790 [00:26<20:45, 3.77it/s]
95
  2%|▏ | 95/4790 [00:26<18:51, 4.15it/s]
96
  2%|▏ | 96/4790 [00:26<23:08, 3.38it/s]
97
  2%|▏ | 97/4790 [00:26<21:22, 3.66it/s]
98
  2%|▏ | 98/4790 [00:27<21:11, 3.69it/s]
99
  2%|▏ | 99/4790 [00:27<23:06, 3.38it/s]
100
  2%|▏ | 100/4790 [00:27<22:34, 3.46it/s]
101
  2%|▏ | 101/4790 [00:28<22:19, 3.50it/s]
102
  2%|▏ | 102/4790 [00:28<22:22, 3.49it/s]
103
  2%|▏ | 103/4790 [00:28<21:22, 3.65it/s]
104
  2%|▏ | 104/4790 [00:28<20:50, 3.75it/s]
105
  2%|▏ | 105/4790 [00:29<19:16, 4.05it/s]
106
  2%|▏ | 106/4790 [00:29<19:27, 4.01it/s]
107
  2%|▏ | 107/4790 [00:29<20:03, 3.89it/s]
108
  2%|▏ | 108/4790 [00:29<21:03, 3.70it/s]
109
  2%|▏ | 109/4790 [00:30<19:57, 3.91it/s]
110
  2%|▏ | 110/4790 [00:30<18:51, 4.14it/s]
111
  2%|▏ | 111/4790 [00:30<19:09, 4.07it/s]
112
  2%|▏ | 112/4790 [00:30<19:05, 4.08it/s]
113
  2%|▏ | 113/4790 [00:31<18:14, 4.27it/s]
114
  2%|▏ | 114/4790 [00:31<18:41, 4.17it/s]
115
  2%|▏ | 115/4790 [00:31<17:37, 4.42it/s]
116
  2%|▏ | 116/4790 [00:31<18:21, 4.24it/s]
117
  2%|▏ | 117/4790 [00:32<20:53, 3.73it/s]
118
  2%|▏ | 118/4790 [00:32<20:04, 3.88it/s]
119
  2%|▏ | 119/4790 [00:32<21:01, 3.70it/s]
120
  3%|▎ | 120/4790 [00:32<19:06, 4.07it/s]
121
  3%|▎ | 121/4790 [00:33<20:35, 3.78it/s]
122
  3%|▎ | 122/4790 [00:33<21:02, 3.70it/s]
123
  3%|▎ | 123/4790 [00:33<24:04, 3.23it/s]
124
  3%|▎ | 124/4790 [00:34<21:34, 3.60it/s]
125
  3%|▎ | 125/4790 [00:34<21:45, 3.57it/s]
126
  3%|▎ | 126/4790 [00:34<21:13, 3.66it/s]
127
  3%|▎ | 127/4790 [00:34<22:31, 3.45it/s]
128
  3%|▎ | 128/4790 [00:35<21:58, 3.54it/s]
129
  3%|▎ | 129/4790 [00:35<21:51, 3.55it/s]
130
  3%|▎ | 130/4790 [00:35<22:45, 3.41it/s]
131
  3%|▎ | 131/4790 [00:36<21:18, 3.64it/s]
132
  3%|▎ | 132/4790 [00:36<20:34, 3.77it/s]
133
  3%|▎ | 133/4790 [00:36<20:01, 3.88it/s]
134
  3%|▎ | 134/4790 [00:36<22:14, 3.49it/s]
135
  3%|▎ | 135/4790 [00:37<21:41, 3.58it/s]
136
  3%|▎ | 136/4790 [00:37<25:05, 3.09it/s]
137
  3%|▎ | 137/4790 [00:37<23:27, 3.31it/s]
138
  3%|▎ | 138/4790 [00:38<24:07, 3.21it/s]
139
  3%|▎ | 139/4790 [00:38<24:49, 3.12it/s]
140
  3%|▎ | 140/4790 [00:38<22:58, 3.37it/s]
141
  3%|▎ | 141/4790 [00:38<21:06, 3.67it/s]
142
  3%|▎ | 142/4790 [00:39<20:26, 3.79it/s]
143
  3%|▎ | 143/4790 [00:39<21:11, 3.65it/s]
144
  3%|▎ | 144/4790 [00:39<19:58, 3.88it/s]
145
  3%|▎ | 145/4790 [00:39<18:59, 4.08it/s]
146
  3%|▎ | 146/4790 [00:40<18:44, 4.13it/s]
147
  3%|▎ | 147/4790 [00:40<20:34, 3.76it/s]
148
  3%|▎ | 148/4790 [00:40<19:30, 3.97it/s]
149
  3%|▎ | 149/4790 [00:41<21:12, 3.65it/s]
150
  3%|▎ | 150/4790 [00:41<21:36, 3.58it/s]
151
  3%|▎ | 151/4790 [00:41<20:19, 3.80it/s]
152
  3%|▎ | 152/4790 [00:41<20:08, 3.84it/s]
153
  3%|▎ | 153/4790 [00:42<21:57, 3.52it/s]
154
  3%|▎ | 154/4790 [00:42<22:08, 3.49it/s]
155
  3%|▎ | 155/4790 [00:42<20:34, 3.76it/s]
156
  3%|▎ | 156/4790 [00:42<22:06, 3.49it/s]
157
  3%|▎ | 157/4790 [00:43<20:52, 3.70it/s]
158
  3%|▎ | 158/4790 [00:43<19:31, 3.95it/s]
159
  3%|▎ | 159/4790 [00:43<21:04, 3.66it/s]
160
  3%|▎ | 160/4790 [00:44<22:25, 3.44it/s]
161
  3%|▎ | 161/4790 [00:44<21:51, 3.53it/s]
162
  3%|▎ | 162/4790 [00:44<20:32, 3.75it/s]
163
  3%|▎ | 163/4790 [00:44<19:43, 3.91it/s]
164
  3%|▎ | 164/4790 [00:45<20:41, 3.72it/s]
165
  3%|▎ | 165/4790 [00:45<19:02, 4.05it/s]
166
  3%|▎ | 166/4790 [00:45<20:47, 3.71it/s]
167
  3%|▎ | 167/4790 [00:45<22:56, 3.36it/s]
168
  4%|▎ | 168/4790 [00:46<23:05, 3.34it/s]
169
  4%|▎ | 169/4790 [00:46<21:23, 3.60it/s]
170
  4%|▎ | 170/4790 [00:46<21:05, 3.65it/s]
171
  4%|▎ | 171/4790 [00:47<20:26, 3.76it/s]
172
  4%|▎ | 172/4790 [00:47<19:43, 3.90it/s]
173
  4%|▎ | 173/4790 [00:47<21:36, 3.56it/s]
174
  4%|▎ | 174/4790 [00:47<21:23, 3.60it/s]
175
  4%|▎ | 175/4790 [00:48<20:53, 3.68it/s]
176
  4%|▎ | 176/4790 [00:48<20:12, 3.81it/s]
177
  4%|▎ | 177/4790 [00:48<19:38, 3.92it/s]
178
  4%|▎ | 178/4790 [00:48<19:02, 4.04it/s]
179
  4%|▎ | 179/4790 [00:49<18:51, 4.08it/s]
180
  4%|▍ | 180/4790 [00:49<17:55, 4.29it/s]
181
  4%|▍ | 181/4790 [00:49<20:31, 3.74it/s]
182
  4%|▍ | 182/4790 [00:49<19:12, 4.00it/s]
183
  4%|▍ | 183/4790 [00:50<19:09, 4.01it/s]
184
  4%|▍ | 184/4790 [00:50<17:15, 4.45it/s]
185
  4%|▍ | 185/4790 [00:50<21:16, 3.61it/s]
186
  4%|▍ | 186/4790 [00:50<21:56, 3.50it/s]
187
  4%|▍ | 187/4790 [00:51<21:13, 3.61it/s]
188
  4%|▍ | 188/4790 [00:51<19:37, 3.91it/s]
189
  4%|▍ | 189/4790 [00:51<19:49, 3.87it/s]
190
  4%|▍ | 190/4790 [00:51<19:27, 3.94it/s]
191
  4%|▍ | 191/4790 [00:52<18:22, 4.17it/s]
192
  4%|▍ | 192/4790 [00:52<19:45, 3.88it/s]
193
  4%|▍ | 193/4790 [00:52<19:55, 3.85it/s]
194
  4%|▍ | 194/4790 [00:52<19:56, 3.84it/s]
195
  4%|▍ | 195/4790 [00:53<19:34, 3.91it/s]
196
  4%|▍ | 196/4790 [00:53<18:31, 4.13it/s]
197
  4%|▍ | 197/4790 [00:53<17:44, 4.32it/s]
198
  4%|▍ | 198/4790 [00:53<19:39, 3.89it/s]
199
  4%|▍ | 199/4790 [00:54<19:52, 3.85it/s]
200
  4%|▍ | 200/4790 [00:54<19:50, 3.86it/s]
201
  4%|▍ | 201/4790 [00:54<20:10, 3.79it/s]
202
  4%|▍ | 202/4790 [00:54<18:09, 4.21it/s]
203
  4%|▍ | 203/4790 [00:55<18:39, 4.10it/s]
204
  4%|▍ | 204/4790 [00:55<23:44, 3.22it/s]
205
  4%|▍ | 205/4790 [00:55<21:02, 3.63it/s]
206
  4%|▍ | 206/4790 [00:56<20:28, 3.73it/s]
207
  4%|▍ | 207/4790 [00:56<19:35, 3.90it/s]
208
  4%|▍ | 208/4790 [00:56<19:23, 3.94it/s]
209
  4%|▍ | 209/4790 [00:56<20:38, 3.70it/s]
210
  4%|▍ | 210/4790 [00:57<20:28, 3.73it/s]
211
  4%|▍ | 211/4790 [00:57<21:45, 3.51it/s]
212
  4%|▍ | 212/4790 [00:57<19:52, 3.84it/s]
213
  4%|▍ | 213/4790 [00:57<19:25, 3.93it/s]
214
  4%|▍ | 214/4790 [00:58<18:19, 4.16it/s]
215
  4%|▍ | 215/4790 [00:58<19:07, 3.99it/s]
216
  5%|▍ | 216/4790 [00:58<19:16, 3.96it/s]
217
  5%|▍ | 217/4790 [00:58<20:18, 3.75it/s]
218
  5%|▍ | 218/4790 [00:59<21:08, 3.61it/s]
219
  5%|▍ | 219/4790 [00:59<19:39, 3.88it/s]
220
  5%|▍ | 220/4790 [00:59<19:09, 3.97it/s]
221
  5%|▍ | 221/4790 [00:59<18:31, 4.11it/s]
222
  5%|▍ | 222/4790 [01:00<17:59, 4.23it/s]
223
  5%|▍ | 223/4790 [01:00<18:22, 4.14it/s]
224
  5%|▍ | 224/4790 [01:00<18:34, 4.10it/s]
225
  5%|▍ | 225/4790 [01:00<18:29, 4.12it/s]
226
  5%|▍ | 226/4790 [01:01<22:07, 3.44it/s]
227
  5%|▍ | 227/4790 [01:01<21:26, 3.55it/s]
228
  5%|▍ | 228/4790 [01:01<20:48, 3.65it/s]
229
  5%|▍ | 229/4790 [01:02<20:04, 3.79it/s]
230
  5%|▍ | 230/4790 [01:02<19:58, 3.80it/s]
231
  5%|▍ | 231/4790 [01:02<23:39, 3.21it/s]
232
  5%|▍ | 232/4790 [01:02<22:14, 3.42it/s]
233
  5%|▍ | 233/4790 [01:03<22:43, 3.34it/s]
234
  5%|▍ | 234/4790 [01:03<22:02, 3.45it/s]
235
  5%|▍ | 235/4790 [01:03<20:39, 3.67it/s]
236
  5%|▍ | 236/4790 [01:04<23:49, 3.18it/s]
237
  5%|▍ | 237/4790 [01:04<24:54, 3.05it/s]
238
  5%|▍ | 238/4790 [01:04<24:56, 3.04it/s]
239
  5%|▍ | 239/4790 [01:05<24:16, 3.12it/s]
240
  5%|▌ | 240/4790 [01:05<22:37, 3.35it/s]
241
  5%|▌ | 241/4790 [01:05<26:11, 2.89it/s]
242
  5%|▌ | 242/4790 [01:06<23:40, 3.20it/s]
243
  5%|▌ | 243/4790 [01:06<24:51, 3.05it/s]
244
  5%|▌ | 244/4790 [01:06<24:47, 3.06it/s]
245
  5%|▌ | 245/4790 [01:07<22:59, 3.29it/s]
246
  5%|▌ | 246/4790 [01:07<22:32, 3.36it/s]
247
  5%|▌ | 247/4790 [01:07<20:16, 3.73it/s]
248
  5%|▌ | 248/4790 [01:07<19:43, 3.84it/s]
249
  5%|▌ | 249/4790 [01:08<21:41, 3.49it/s]
250
  5%|▌ | 250/4790 [01:08<25:08, 3.01it/s]
251
  5%|▌ | 251/4790 [01:08<25:17, 2.99it/s]
252
  5%|▌ | 252/4790 [01:09<22:38, 3.34it/s]
253
  5%|▌ | 253/4790 [01:09<20:45, 3.64it/s]
254
  5%|▌ | 254/4790 [01:09<19:55, 3.79it/s]
255
  5%|▌ | 255/4790 [01:09<19:58, 3.78it/s]
256
  5%|▌ | 256/4790 [01:10<20:54, 3.61it/s]
257
  5%|▌ | 257/4790 [01:10<22:47, 3.32it/s]
258
  5%|▌ | 258/4790 [01:10<20:47, 3.63it/s]
259
  5%|▌ | 259/4790 [01:10<18:58, 3.98it/s]
260
  5%|▌ | 260/4790 [01:11<21:14, 3.55it/s]
261
  5%|▌ | 261/4790 [01:11<20:03, 3.76it/s]
262
  5%|▌ | 262/4790 [01:11<21:05, 3.58it/s]
263
  5%|▌ | 263/4790 [01:12<21:12, 3.56it/s]
264
  6%|▌ | 264/4790 [01:12<20:51, 3.62it/s]
265
  6%|▌ | 265/4790 [01:12<22:23, 3.37it/s]
266
  6%|▌ | 266/4790 [01:13<21:56, 3.44it/s]
267
  6%|▌ | 267/4790 [01:13<22:31, 3.35it/s]
268
  6%|▌ | 268/4790 [01:13<21:57, 3.43it/s]
269
  6%|▌ | 269/4790 [01:13<22:48, 3.30it/s]
270
  6%|▌ | 270/4790 [01:14<22:38, 3.33it/s]
271
  6%|▌ | 271/4790 [01:14<21:37, 3.48it/s]
272
  6%|▌ | 272/4790 [01:14<19:41, 3.82it/s]
273
  6%|▌ | 273/4790 [01:14<18:42, 4.02it/s]
274
  6%|▌ | 274/4790 [01:15<18:50, 3.99it/s]
275
  6%|▌ | 275/4790 [01:15<19:49, 3.79it/s]
276
  6%|▌ | 276/4790 [01:15<17:32, 4.29it/s]
277
  6%|▌ | 277/4790 [01:15<16:46, 4.48it/s]
278
  6%|▌ | 278/4790 [01:16<16:39, 4.51it/s]
279
  6%|▌ | 279/4790 [01:16<18:49, 3.99it/s]
280
  6%|▌ | 280/4790 [01:16<17:38, 4.26it/s]
281
  6%|▌ | 281/4790 [01:16<16:33, 4.54it/s]
282
  6%|▌ | 282/4790 [01:17<20:05, 3.74it/s]
283
  6%|▌ | 283/4790 [01:17<18:27, 4.07it/s]
284
  6%|▌ | 284/4790 [01:17<18:32, 4.05it/s]
285
  6%|▌ | 285/4790 [01:17<18:28, 4.07it/s]
286
  6%|▌ | 286/4790 [01:18<18:14, 4.12it/s]
287
  6%|▌ | 287/4790 [01:18<21:27, 3.50it/s]
288
  6%|▌ | 288/4790 [01:18<21:52, 3.43it/s]
289
  6%|▌ | 289/4790 [01:18<20:33, 3.65it/s]
290
  6%|▌ | 290/4790 [01:19<21:24, 3.50it/s]
291
  6%|▌ | 291/4790 [01:19<20:36, 3.64it/s]
292
  6%|▌ | 292/4790 [01:19<19:57, 3.76it/s]
293
  6%|▌ | 293/4790 [01:20<24:15, 3.09it/s]
294
  6%|▌ | 294/4790 [01:20<22:18, 3.36it/s]
295
  6%|▌ | 295/4790 [01:20<23:18, 3.21it/s]
296
  6%|▌ | 296/4790 [01:21<25:14, 2.97it/s]
297
  6%|▌ | 297/4790 [01:21<24:18, 3.08it/s]
298
  6%|▌ | 298/4790 [01:21<21:32, 3.48it/s]
299
  6%|▌ | 299/4790 [01:21<20:51, 3.59it/s]
300
  6%|▋ | 300/4790 [01:22<19:20, 3.87it/s]
301
  6%|▋ | 301/4790 [01:22<19:39, 3.81it/s]
302
  6%|▋ | 302/4790 [01:22<19:31, 3.83it/s]
303
  6%|▋ | 303/4790 [01:22<20:12, 3.70it/s]
304
  6%|▋ | 304/4790 [01:23<19:28, 3.84it/s]
305
  6%|▋ | 305/4790 [01:23<19:17, 3.88it/s]
306
  6%|▋ | 306/4790 [01:23<17:51, 4.18it/s]
307
  6%|▋ | 307/4790 [01:23<18:10, 4.11it/s]
308
  6%|▋ | 308/4790 [01:24<23:32, 3.17it/s]
309
  6%|▋ | 309/4790 [01:24<21:51, 3.42it/s]
310
  6%|▋ | 310/4790 [01:24<20:53, 3.57it/s]
311
  6%|▋ | 311/4790 [01:25<20:05, 3.72it/s]
312
  7%|▋ | 312/4790 [01:25<19:17, 3.87it/s]
313
  7%|▋ | 313/4790 [01:25<19:03, 3.91it/s]
314
  7%|��� | 314/4790 [01:25<21:26, 3.48it/s]
315
  7%|▋ | 315/4790 [01:26<20:37, 3.61it/s]
316
  7%|▋ | 316/4790 [01:26<20:04, 3.71it/s]
317
  7%|▋ | 317/4790 [01:26<19:26, 3.83it/s]
318
  7%|▋ | 318/4790 [01:27<20:44, 3.59it/s]
319
  7%|▋ | 319/4790 [01:27<19:54, 3.74it/s]
320
  7%|▋ | 320/4790 [01:27<22:32, 3.30it/s]
321
  7%|▋ | 321/4790 [01:27<21:02, 3.54it/s]
322
  7%|▋ | 322/4790 [01:28<21:39, 3.44it/s]
323
  7%|▋ | 323/4790 [01:28<21:36, 3.45it/s]
324
  7%|▋ | 324/4790 [01:28<20:08, 3.70it/s]
325
  7%|▋ | 325/4790 [01:29<20:12, 3.68it/s]
326
  7%|▋ | 326/4790 [01:29<18:55, 3.93it/s]
327
  7%|▋ | 327/4790 [01:29<18:47, 3.96it/s]
328
  7%|▋ | 328/4790 [01:29<17:11, 4.33it/s]
329
  7%|▋ | 329/4790 [01:29<19:41, 3.78it/s]
330
  7%|▋ | 330/4790 [01:30<19:27, 3.82it/s]
331
  7%|▋ | 331/4790 [01:30<19:14, 3.86it/s]
332
  7%|▋ | 332/4790 [01:30<19:45, 3.76it/s]
333
  7%|▋ | 333/4790 [01:31<19:15, 3.86it/s]
334
  7%|▋ | 334/4790 [01:31<19:12, 3.86it/s]
335
  7%|▋ | 335/4790 [01:31<19:52, 3.74it/s]
336
  7%|▋ | 336/4790 [01:31<21:35, 3.44it/s]
337
  7%|▋ | 337/4790 [01:32<25:21, 2.93it/s]
338
  7%|▋ | 338/4790 [01:32<24:09, 3.07it/s]
339
  7%|▋ | 339/4790 [01:32<21:21, 3.47it/s]
340
  7%|▋ | 340/4790 [01:33<21:04, 3.52it/s]
341
  7%|▋ | 341/4790 [01:33<21:03, 3.52it/s]
342
  7%|▋ | 342/4790 [01:33<18:26, 4.02it/s]
343
  7%|▋ | 343/4790 [01:33<17:48, 4.16it/s]
344
  7%|▋ | 344/4790 [01:34<16:38, 4.45it/s]
345
  7%|▋ | 345/4790 [01:34<18:20, 4.04it/s]
346
  7%|▋ | 346/4790 [01:34<18:51, 3.93it/s]
347
  7%|▋ | 347/4790 [01:34<18:24, 4.02it/s]
348
  7%|▋ | 348/4790 [01:35<18:23, 4.02it/s]
349
  7%|▋ | 349/4790 [01:35<19:05, 3.88it/s]
350
  7%|▋ | 350/4790 [01:35<18:45, 3.94it/s]
351
  7%|▋ | 351/4790 [01:35<19:40, 3.76it/s]
352
  7%|▋ | 352/4790 [01:36<20:27, 3.62it/s]
353
  7%|▋ | 353/4790 [01:36<20:13, 3.66it/s]
354
  7%|▋ | 354/4790 [01:36<21:46, 3.39it/s]
355
  7%|▋ | 355/4790 [01:37<20:23, 3.62it/s]
356
  7%|▋ | 356/4790 [01:37<20:12, 3.66it/s]
357
  7%|▋ | 357/4790 [01:37<20:34, 3.59it/s]
358
  7%|▋ | 358/4790 [01:37<20:09, 3.66it/s]
359
  7%|▋ | 359/4790 [01:38<21:14, 3.48it/s]
360
  8%|▊ | 360/4790 [01:38<20:40, 3.57it/s]
361
  8%|▊ | 361/4790 [01:38<19:59, 3.69it/s]
362
  8%|▊ | 362/4790 [01:39<23:39, 3.12it/s]
363
  8%|▊ | 363/4790 [01:39<23:06, 3.19it/s]
364
  8%|▊ | 364/4790 [01:39<21:44, 3.39it/s]
365
  8%|▊ | 365/4790 [01:39<20:51, 3.54it/s]
366
  8%|▊ | 366/4790 [01:40<20:37, 3.57it/s]
367
  8%|▊ | 367/4790 [01:40<20:49, 3.54it/s]
368
  8%|▊ | 368/4790 [01:40<19:58, 3.69it/s]
369
  8%|▊ | 369/4790 [01:41<23:59, 3.07it/s]
370
  8%|▊ | 370/4790 [01:41<24:54, 2.96it/s]
371
  8%|▊ | 371/4790 [01:41<23:32, 3.13it/s]
372
  8%|▊ | 372/4790 [01:42<24:14, 3.04it/s]
373
  8%|▊ | 373/4790 [01:42<21:57, 3.35it/s]
374
  8%|▊ | 374/4790 [01:42<20:08, 3.65it/s]
375
  8%|▊ | 375/4790 [01:43<23:55, 3.08it/s]
376
  8%|▊ | 376/4790 [01:43<23:20, 3.15it/s]
377
  8%|▊ | 377/4790 [01:43<20:48, 3.53it/s]
378
  8%|▊ | 378/4790 [01:43<20:24, 3.60it/s]
379
  8%|▊ | 379/4790 [01:44<19:31, 3.77it/s]
380
  8%|▊ | 380/4790 [01:44<19:40, 3.74it/s]
381
  8%|▊ | 381/4790 [01:44<18:52, 3.89it/s]
382
  8%|▊ | 382/4790 [01:44<17:12, 4.27it/s]
383
  8%|▊ | 383/4790 [01:44<16:23, 4.48it/s]
384
  8%|▊ | 384/4790 [01:45<16:58, 4.32it/s]
385
  8%|▊ | 385/4790 [01:45<19:00, 3.86it/s]
386
  8%|▊ | 386/4790 [01:45<20:09, 3.64it/s]
387
  8%|▊ | 387/4790 [01:46<21:31, 3.41it/s]
388
  8%|▊ | 388/4790 [01:46<20:03, 3.66it/s]
389
  8%|▊ | 389/4790 [01:46<22:50, 3.21it/s]
390
  8%|▊ | 390/4790 [01:46<20:16, 3.62it/s]
391
  8%|▊ | 391/4790 [01:47<21:09, 3.47it/s]
392
  8%|▊ | 392/4790 [01:47<19:30, 3.76it/s]
393
  8%|▊ | 393/4790 [01:47<17:51, 4.10it/s]
394
  8%|▊ | 394/4790 [01:47<17:49, 4.11it/s]
395
  8%|▊ | 395/4790 [01:48<18:08, 4.04it/s]
396
  8%|▊ | 396/4790 [01:48<17:36, 4.16it/s]
397
  8%|▊ | 397/4790 [01:48<19:35, 3.74it/s]
398
  8%|▊ | 398/4790 [01:49<20:26, 3.58it/s]
399
  8%|▊ | 399/4790 [01:49<20:14, 3.61it/s]
400
  8%|▊ | 400/4790 [01:49<18:53, 3.87it/s]
401
  8%|▊ | 401/4790 [01:49<19:57, 3.67it/s]
402
  8%|▊ | 402/4790 [01:50<18:32, 3.94it/s]
403
  8%|▊ | 403/4790 [01:50<18:04, 4.05it/s]
404
  8%|▊ | 404/4790 [01:50<17:58, 4.07it/s]
405
  8%|▊ | 405/4790 [01:50<18:34, 3.94it/s]
406
  8%|▊ | 406/4790 [01:51<19:41, 3.71it/s]
407
  8%|▊ | 407/4790 [01:51<18:06, 4.03it/s]
408
  9%|▊ | 408/4790 [01:51<19:27, 3.75it/s]
409
  9%|▊ | 409/4790 [01:51<18:26, 3.96it/s]
410
  9%|▊ | 410/4790 [01:52<18:19, 3.98it/s]
411
  9%|▊ | 411/4790 [01:52<19:04, 3.83it/s]
412
  9%|▊ | 412/4790 [01:52<22:31, 3.24it/s]
413
  9%|▊ | 413/4790 [01:53<22:04, 3.30it/s]
414
  9%|▊ | 414/4790 [01:53<20:44, 3.52it/s]
415
  9%|▊ | 415/4790 [01:53<19:50, 3.67it/s]
416
  9%|▊ | 416/4790 [01:53<20:30, 3.55it/s]
417
  9%|▊ | 417/4790 [01:54<19:17, 3.78it/s]
418
  9%|▊ | 418/4790 [01:54<17:48, 4.09it/s]
419
  9%|▊ | 419/4790 [01:54<17:43, 4.11it/s]
420
  9%|▉ | 420/4790 [01:54<17:57, 4.06it/s]
421
  9%|▉ | 421/4790 [01:55<18:55, 3.85it/s]
422
  9%|▉ | 422/4790 [01:55<18:49, 3.87it/s]
423
  9%|▉ | 423/4790 [01:55<18:09, 4.01it/s]
424
  9%|▉ | 424/4790 [01:55<18:01, 4.04it/s]
425
  9%|▉ | 425/4790 [01:56<18:29, 3.93it/s]
426
  9%|▉ | 426/4790 [01:56<18:20, 3.97it/s]
427
  9%|▉ | 427/4790 [01:56<17:21, 4.19it/s]
428
  9%|▉ | 428/4790 [01:56<17:53, 4.06it/s]
429
  9%|▉ | 429/4790 [01:57<17:32, 4.14it/s]
430
  9%|▉ | 430/4790 [01:57<16:50, 4.32it/s]
431
  9%|▉ | 431/4790 [01:57<16:49, 4.32it/s]
432
  9%|▉ | 432/4790 [01:57<16:52, 4.30it/s]
433
  9%|▉ | 433/4790 [01:57<16:16, 4.46it/s]
434
  9%|▉ | 434/4790 [01:58<17:52, 4.06it/s]
435
  9%|▉ | 435/4790 [01:58<17:23, 4.17it/s]
436
  9%|▉ | 436/4790 [01:58<16:56, 4.28it/s]
437
  9%|▉ | 437/4790 [01:58<19:06, 3.80it/s]
438
  9%|▉ | 438/4790 [01:59<19:18, 3.76it/s]
439
  9%|▉ | 439/4790 [01:59<18:51, 3.84it/s]
440
  9%|▉ | 440/4790 [01:59<19:20, 3.75it/s]
441
  9%|▉ | 441/4790 [02:00<20:58, 3.46it/s]
442
  9%|▉ | 442/4790 [02:00<20:20, 3.56it/s]
443
  9%|▉ | 443/4790 [02:00<20:12, 3.58it/s]
444
  9%|▉ | 444/4790 [02:00<18:59, 3.81it/s]
445
  9%|▉ | 445/4790 [02:01<17:37, 4.11it/s]
446
  9%|▉ | 446/4790 [02:01<19:02, 3.80it/s]
447
  9%|▉ | 447/4790 [02:01<17:38, 4.10it/s]
448
  9%|▉ | 448/4790 [02:01<17:54, 4.04it/s]
449
  9%|▉ | 449/4790 [02:02<16:45, 4.32it/s]
450
  9%|▉ | 450/4790 [02:02<17:07, 4.22it/s]
451
  9%|▉ | 451/4790 [02:02<16:06, 4.49it/s]
452
  9%|▉ | 452/4790 [02:02<19:32, 3.70it/s]
453
  9%|▉ | 453/4790 [02:03<19:30, 3.70it/s]
454
  9%|▉ | 454/4790 [02:03<18:34, 3.89it/s]
455
  9%|▉ | 455/4790 [02:03<17:23, 4.16it/s]
456
  10%|▉ | 456/4790 [02:03<18:24, 3.92it/s]
457
  10%|▉ | 457/4790 [02:04<25:31, 2.83it/s]
458
  10%|▉ | 458/4790 [02:04<23:07, 3.12it/s]
459
  10%|▉ | 459/4790 [02:05<23:20, 3.09it/s]
460
  10%|▉ | 460/4790 [02:05<22:18, 3.24it/s]
461
  10%|▉ | 461/4790 [02:05<21:41, 3.33it/s]
462
  10%|▉ | 462/4790 [02:05<20:15, 3.56it/s]
463
  10%|▉ | 463/4790 [02:06<24:21, 2.96it/s]
464
  10%|▉ | 464/4790 [02:06<21:39, 3.33it/s]
465
  10%|▉ | 465/4790 [02:06<19:06, 3.77it/s]
466
  10%|▉ | 466/4790 [02:06<18:17, 3.94it/s]
467
  10%|▉ | 467/4790 [02:07<18:49, 3.83it/s]
468
  10%|▉ | 468/4790 [02:07<18:49, 3.83it/s]
469
  10%|▉ | 469/4790 [02:07<18:00, 4.00it/s]
470
  10%|▉ | 470/4790 [02:07<18:25, 3.91it/s]
471
  10%|▉ | 471/4790 [02:08<20:39, 3.49it/s]
472
  10%|▉ | 472/4790 [02:08<20:58, 3.43it/s]
473
  10%|▉ | 473/4790 [02:08<20:46, 3.46it/s]
474
  10%|▉ | 474/4790 [02:09<19:24, 3.71it/s]
475
  10%|▉ | 475/4790 [02:09<17:54, 4.02it/s]
476
  10%|▉ | 476/4790 [02:09<18:35, 3.87it/s]
477
  10%|▉ | 477/4790 [02:09<17:33, 4.09it/s]
478
  10%|▉ | 478/4790 [02:10<20:27, 3.51it/s]
479
  10%|█ | 479/4790 [02:10<19:05, 3.76it/s][INFO|trainer.py:811] 2024-09-05 13:37:19,924 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
 
 
 
 
 
 
480
  0%| | 0/850 [00:00<?, ?it/s]
 
481
  1%| | 9/850 [00:00<00:11, 75.89it/s]
 
482
  2%|▏ | 18/850 [00:00<00:10, 78.45it/s]
 
483
  3%|▎ | 27/850 [00:00<00:10, 80.37it/s]
 
484
  4%|▍ | 37/850 [00:00<00:09, 84.54it/s]
 
485
  5%|▌ | 46/850 [00:00<00:09, 85.60it/s]
 
486
  6%|▋ | 55/850 [00:00<00:10, 78.60it/s]
 
487
  8%|▊ | 64/850 [00:00<00:09, 79.44it/s]
 
488
  9%|▊ | 73/850 [00:00<00:10, 75.52it/s]
 
489
  10%|▉ | 81/850 [00:01<00:10, 76.28it/s]
 
490
  11%|█ | 90/850 [00:01<00:09, 80.04it/s]
 
491
  12%|█▏ | 99/850 [00:01<00:09, 82.32it/s]
 
492
  13%|█▎ | 109/850 [00:01<00:08, 84.86it/s]
 
493
  14%|█▍ | 118/850 [00:01<00:08, 85.25it/s]
 
494
  15%|█▍ | 127/850 [00:01<00:08, 86.48it/s]
 
495
  16%|█▌ | 136/850 [00:01<00:08, 85.42it/s]
 
496
  17%|█▋ | 145/850 [00:01<00:08, 81.53it/s]
 
497
  18%|█▊ | 154/850 [00:01<00:08, 82.60it/s]
 
498
  19%|█▉ | 163/850 [00:01<00:08, 84.65it/s]
 
499
  20%|██ | 172/850 [00:02<00:07, 85.43it/s]
 
500
  21%|██▏ | 181/850 [00:02<00:07, 85.66it/s]
 
501
  22%|██▏ | 190/850 [00:02<00:07, 84.23it/s]
 
502
  23%|██▎ | 199/850 [00:02<00:07, 85.01it/s]
 
503
  24%|██▍ | 208/850 [00:02<00:07, 83.44it/s]
 
504
  26%|██▌ | 217/850 [00:02<00:07, 81.88it/s]
 
505
  27%|██▋ | 227/850 [00:02<00:07, 84.16it/s]
 
506
  28%|██▊ | 237/850 [00:02<00:07, 86.15it/s]
 
507
  29%|██▉ | 246/850 [00:02<00:07, 84.46it/s]
 
508
  30%|███ | 256/850 [00:03<00:06, 88.24it/s]
 
509
  31%|███ | 265/850 [00:03<00:06, 87.70it/s]
 
510
  32%|███▏ | 275/850 [00:03<00:06, 88.16it/s]
 
511
  33%|███▎ | 284/850 [00:03<00:06, 87.65it/s]
 
512
  35%|███▍ | 294/850 [00:03<00:06, 88.09it/s]
 
513
  36%|███▌ | 303/850 [00:03<00:06, 86.09it/s]
 
514
  37%|███▋ | 313/850 [00:03<00:06, 87.89it/s]
 
515
  38%|███▊ | 322/850 [00:03<00:06, 87.79it/s]
 
516
  39%|███▉ | 331/850 [00:03<00:05, 87.25it/s]
 
517
  40%|████ | 340/850 [00:04<00:05, 87.67it/s]
 
518
  41%|████ | 349/850 [00:04<00:05, 86.98it/s]
 
519
  42%|████▏ | 358/850 [00:04<00:05, 85.62it/s]
 
520
  43%|████▎ | 367/850 [00:04<00:05, 85.83it/s]
 
521
  44%|████▍ | 376/850 [00:04<00:05, 84.60it/s]
 
522
  45%|████▌ | 385/850 [00:04<00:05, 82.20it/s]
 
523
  46%|████▋ | 394/850 [00:04<00:05, 80.28it/s]
 
524
  47%|████▋ | 403/850 [00:04<00:05, 81.93it/s]
 
525
  49%|████▊ | 413/850 [00:04<00:05, 85.88it/s]
 
526
  50%|████▉ | 423/850 [00:05<00:04, 87.54it/s]
 
527
  51%|█████ | 432/850 [00:05<00:04, 87.55it/s]
 
528
  52%|█████▏ | 441/850 [00:05<00:04, 86.85it/s]
 
529
  53%|█████▎ | 450/850 [00:05<00:04, 83.69it/s]
 
530
  54%|█████▍ | 460/850 [00:05<00:04, 86.80it/s]
 
531
  55%|█████▌ | 470/850 [00:05<00:04, 88.96it/s]
 
532
  56%|█████▋ | 480/850 [00:05<00:04, 90.23it/s]
 
533
  58%|█████▊ | 490/850 [00:05<00:04, 88.23it/s]
 
534
  59%|█████▉ | 500/850 [00:05<00:03, 89.90it/s]
 
535
  60%|██████ | 510/850 [00:05<00:03, 89.99it/s]
 
536
  61%|██████ | 520/850 [00:06<00:03, 90.30it/s]
 
537
  62%|██████▏ | 530/850 [00:06<00:03, 86.55it/s]
 
538
  63%|██████▎ | 539/850 [00:06<00:03, 85.36it/s]
 
539
  64%|██████▍ | 548/850 [00:06<00:03, 83.42it/s]
 
540
  66%|██████▌ | 557/850 [00:06<00:03, 83.02it/s]
 
541
  67%|██████▋ | 566/850 [00:06<00:03, 80.41it/s]
 
542
  68%|██████▊ | 575/850 [00:06<00:03, 82.97it/s]
 
543
  69%|██████▊ | 584/850 [00:06<00:03, 83.09it/s]
 
544
  70%|██████▉ | 593/850 [00:06<00:03, 84.13it/s]
 
545
  71%|███████ | 602/850 [00:07<00:02, 83.90it/s]
 
546
  72%|███████▏ | 611/850 [00:07<00:02, 84.78it/s]
 
547
  73%|███████▎ | 620/850 [00:07<00:02, 84.83it/s]
 
548
  74%|███████▍ | 629/850 [00:07<00:02, 84.00it/s]
 
549
  75%|███████▌ | 638/850 [00:07<00:02, 84.53it/s]
 
550
  76%|███████▌ | 647/850 [00:07<00:02, 85.90it/s]
 
551
  77%|███████▋ | 656/850 [00:07<00:02, 86.93it/s]
 
552
  78%|███████▊ | 666/850 [00:07<00:02, 88.25it/s]
 
553
  79%|███████▉ | 675/850 [00:07<00:02, 85.00it/s]
 
554
  80%|████████ | 684/850 [00:08<00:01, 85.36it/s]
 
555
  82%|████████▏ | 693/850 [00:08<00:01, 83.16it/s]
 
556
  83%|████████▎ | 702/850 [00:08<00:01, 83.49it/s]
 
557
  84%|████████▎ | 711/850 [00:08<00:01, 82.74it/s]
 
558
  85%|████████▍ | 720/850 [00:08<00:01, 83.13it/s]
 
559
  86%|████████▌ | 729/850 [00:08<00:01, 80.00it/s]
 
560
  87%|████████▋ | 738/850 [00:08<00:01, 80.48it/s]
 
561
  88%|████████▊ | 747/850 [00:08<00:01, 80.29it/s]
 
562
  89%|████████▉ | 756/850 [00:08<00:01, 80.87it/s]
 
563
  90%|█████████ | 765/850 [00:09<00:01, 82.87it/s]
 
564
  91%|█████████ | 774/850 [00:09<00:00, 82.26it/s]
 
565
  92%|█████████▏| 783/850 [00:09<00:00, 80.12it/s]
 
566
  93%|█████████▎| 792/850 [00:09<00:00, 79.96it/s]
 
567
  94%|█████████▍| 802/850 [00:09<00:00, 83.67it/s]
 
568
  95%|█████████▌| 811/850 [00:09<00:00, 81.61it/s]
 
569
  97%|█████████▋| 821/850 [00:09<00:00, 85.11it/s]
 
570
  98%|█████████▊| 830/850 [00:09<00:00, 84.40it/s]
 
571
  99%|█████████▊| 839/850 [00:09<00:00, 84.53it/s]
 
572
 
 
573
 
574
  10%|█ | 479/4790 [02:24<19:05, 3.76it/s]
 
 
575
  [INFO|trainer.py:3503] 2024-09-05 13:37:33,956 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-479
 
 
 
 
 
 
 
576
  10%|█ | 480/4790 [02:27<6:21:25, 5.31s/it]
577
  10%|█ | 481/4790 [02:27<4:31:30, 3.78s/it]
578
  10%|█ | 482/4790 [02:27<3:16:31, 2.74s/it]
579
  10%|█ | 483/4790 [02:28<2:25:49, 2.03s/it]
580
  10%|█ | 484/4790 [02:28<1:46:37, 1.49s/it]
581
  10%|█ | 485/4790 [02:28<1:22:39, 1.15s/it]
582
  10%|█ | 486/4790 [02:29<1:02:21, 1.15it/s]
583
  10%|█ | 487/4790 [02:29<50:31, 1.42it/s]
584
  10%|█ | 488/4790 [02:29<41:55, 1.71it/s]
585
  10%|█ | 489/4790 [02:29<33:13, 2.16it/s]
586
  10%|█ | 490/4790 [02:30<29:22, 2.44it/s]
587
  10%|█ | 491/4790 [02:30<24:58, 2.87it/s]
588
  10%|█ | 492/4790 [02:30<22:15, 3.22it/s]
589
  10%|█ | 493/4790 [02:30<20:11, 3.55it/s]
590
  10%|█ | 494/4790 [02:31<20:24, 3.51it/s]
591
  10%|█ | 495/4790 [02:31<19:16, 3.71it/s]
592
  10%|█ | 496/4790 [02:31<19:33, 3.66it/s]
593
  10%|█ | 497/4790 [02:32<21:30, 3.33it/s]
594
  10%|█ | 498/4790 [02:32<22:35, 3.17it/s]
595
  10%|█ | 499/4790 [02:32<20:38, 3.46it/s]
596
  10%|█ | 500/4790 [02:32<19:03, 3.75it/s]
597
 
598
  10%|█ | 500/4790 [02:32<19:03, 3.75it/s]
599
  10%|█ | 501/4790 [02:33<19:03, 3.75it/s]
600
  10%|█ | 502/4790 [02:33<19:37, 3.64it/s]
601
  11%|█ | 503/4790 [02:33<18:13, 3.92it/s]
602
  11%|█ | 504/4790 [02:34<21:59, 3.25it/s]
603
  11%|█ | 505/4790 [02:34<20:52, 3.42it/s]
604
  11%|█ | 506/4790 [02:34<20:21, 3.51it/s]
605
  11%|█ | 507/4790 [02:34<18:42, 3.82it/s]
606
  11%|█ | 508/4790 [02:35<22:26, 3.18it/s]
607
  11%|█ | 509/4790 [02:35<22:30, 3.17it/s]
608
  11%|█ | 510/4790 [02:35<21:05, 3.38it/s]
609
  11%|█ | 511/4790 [02:36<19:19, 3.69it/s]
610
  11%|█ | 512/4790 [02:36<19:44, 3.61it/s]
611
  11%|█ | 513/4790 [02:36<20:22, 3.50it/s]
612
  11%|█ | 514/4790 [02:36<19:52, 3.59it/s]
613
  11%|█ | 515/4790 [02:37<21:09, 3.37it/s]
614
  11%|█ | 516/4790 [02:37<20:38, 3.45it/s]
615
  11%|█ | 517/4790 [02:37<20:57, 3.40it/s]
616
  11%|█ | 518/4790 [02:38<19:47, 3.60it/s]
617
  11%|█ | 519/4790 [02:38<18:21, 3.88it/s]
 
1
+ 2024-09-05 13:34:48.258223: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2
+ 2024-09-05 13:34:48.275126: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
3
+ 2024-09-05 13:34:48.295825: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
4
+ 2024-09-05 13:34:48.302058: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
5
+ 2024-09-05 13:34:48.316617: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
6
+ To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
7
+ 2024-09-05 13:34:49.554072: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
8
+ /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
9
+ warnings.warn(
10
+ 09/05/2024 13:34:51 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
11
+ 09/05/2024 13:34:51 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
12
+ _n_gpu=1,
13
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
14
+ adafactor=False,
15
+ adam_beta1=0.9,
16
+ adam_beta2=0.999,
17
+ adam_epsilon=1e-08,
18
+ auto_find_batch_size=False,
19
+ batch_eval_metrics=False,
20
+ bf16=False,
21
+ bf16_full_eval=False,
22
+ data_seed=None,
23
+ dataloader_drop_last=False,
24
+ dataloader_num_workers=0,
25
+ dataloader_persistent_workers=False,
26
+ dataloader_pin_memory=True,
27
+ dataloader_prefetch_factor=None,
28
+ ddp_backend=None,
29
+ ddp_broadcast_buffers=None,
30
+ ddp_bucket_cap_mb=None,
31
+ ddp_find_unused_parameters=None,
32
+ ddp_timeout=1800,
33
+ debug=[],
34
+ deepspeed=None,
35
+ disable_tqdm=False,
36
+ dispatch_batches=None,
37
+ do_eval=True,
38
+ do_predict=True,
39
+ do_train=True,
40
+ eval_accumulation_steps=None,
41
+ eval_delay=0,
42
+ eval_do_concat_batches=True,
43
+ eval_on_start=False,
44
+ eval_steps=None,
45
+ eval_strategy=epoch,
46
+ eval_use_gather_object=False,
47
+ evaluation_strategy=epoch,
48
+ fp16=False,
49
+ fp16_backend=auto,
50
+ fp16_full_eval=False,
51
+ fp16_opt_level=O1,
52
+ fsdp=[],
53
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
54
+ fsdp_min_num_params=0,
55
+ fsdp_transformer_layer_cls_to_wrap=None,
56
+ full_determinism=False,
57
+ gradient_accumulation_steps=2,
58
+ gradient_checkpointing=False,
59
+ gradient_checkpointing_kwargs=None,
60
+ greater_is_better=True,
61
+ group_by_length=False,
62
+ half_precision_backend=auto,
63
+ hub_always_push=False,
64
+ hub_model_id=None,
65
+ hub_private_repo=False,
66
+ hub_strategy=every_save,
67
+ hub_token=<HUB_TOKEN>,
68
+ ignore_data_skip=False,
69
+ include_inputs_for_metrics=False,
70
+ include_num_input_tokens_seen=False,
71
+ include_tokens_per_second=False,
72
+ jit_mode_eval=False,
73
+ label_names=None,
74
+ label_smoothing_factor=0.0,
75
+ learning_rate=5e-05,
76
+ length_column_name=length,
77
+ load_best_model_at_end=True,
78
+ local_rank=0,
79
+ log_level=passive,
80
+ log_level_replica=warning,
81
+ log_on_each_node=True,
82
+ logging_dir=/content/dissertation/scripts/ner/output/tb,
83
+ logging_first_step=False,
84
+ logging_nan_inf_filter=True,
85
+ logging_steps=500,
86
+ logging_strategy=steps,
87
+ lr_scheduler_kwargs={},
88
+ lr_scheduler_type=linear,
89
+ max_grad_norm=1.0,
90
+ max_steps=-1,
91
+ metric_for_best_model=f1,
92
+ mp_parameters=,
93
+ neftune_noise_alpha=None,
94
+ no_cuda=False,
95
+ num_train_epochs=10.0,
96
+ optim=adamw_torch,
97
+ optim_args=None,
98
+ optim_target_modules=None,
99
+ output_dir=/content/dissertation/scripts/ner/output,
100
+ overwrite_output_dir=True,
101
+ past_index=-1,
102
+ per_device_eval_batch_size=8,
103
+ per_device_train_batch_size=32,
104
+ prediction_loss_only=False,
105
+ push_to_hub=True,
106
+ push_to_hub_model_id=None,
107
+ push_to_hub_organization=None,
108
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
109
+ ray_scope=last,
110
+ remove_unused_columns=True,
111
+ report_to=['tensorboard'],
112
+ restore_callback_states_from_checkpoint=False,
113
+ resume_from_checkpoint=None,
114
+ run_name=/content/dissertation/scripts/ner/output,
115
+ save_on_each_node=False,
116
+ save_only_model=False,
117
+ save_safetensors=True,
118
+ save_steps=500,
119
+ save_strategy=epoch,
120
+ save_total_limit=None,
121
+ seed=42,
122
+ skip_memory_metrics=True,
123
+ split_batches=None,
124
+ tf32=None,
125
+ torch_compile=False,
126
+ torch_compile_backend=None,
127
+ torch_compile_mode=None,
128
+ torch_empty_cache_steps=None,
129
+ torchdynamo=None,
130
+ tpu_metrics_debug=False,
131
+ tpu_num_cores=None,
132
+ use_cpu=False,
133
+ use_ipex=False,
134
+ use_legacy_prediction_loop=False,
135
+ use_mps_device=False,
136
+ warmup_ratio=0.0,
137
+ warmup_steps=0,
138
+ weight_decay=0.0,
139
+ )
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+ [INFO|configuration_utils.py:733] 2024-09-05 13:35:02,046 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/config.json
148
+ [INFO|configuration_utils.py:800] 2024-09-05 13:35:02,050 >> Model config BertConfig {
149
+ "_name_or_path": "IVN-RIN/bioBIT",
150
+ "architectures": [
151
+ "BertForMaskedLM"
152
+ ],
153
+ "attention_probs_dropout_prob": 0.1,
154
+ "classifier_dropout": null,
155
+ "finetuning_task": "ner",
156
+ "hidden_act": "gelu",
157
+ "hidden_dropout_prob": 0.1,
158
+ "hidden_size": 768,
159
+ "id2label": {
160
+ "0": "O",
161
+ "1": "B-FARMACO",
162
+ "2": "I-FARMACO"
163
+ },
164
+ "initializer_range": 0.02,
165
+ "intermediate_size": 3072,
166
+ "label2id": {
167
+ "B-FARMACO": 1,
168
+ "I-FARMACO": 2,
169
+ "O": 0
170
+ },
171
+ "layer_norm_eps": 1e-12,
172
+ "max_position_embeddings": 512,
173
+ "model_type": "bert",
174
+ "num_attention_heads": 12,
175
+ "num_hidden_layers": 12,
176
+ "pad_token_id": 0,
177
+ "position_embedding_type": "absolute",
178
+ "torch_dtype": "float32",
179
+ "transformers_version": "4.44.2",
180
+ "type_vocab_size": 2,
181
+ "use_cache": true,
182
+ "vocab_size": 31102
183
+ }
184
+
185
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 13:35:02,109 >> loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/vocab.txt
186
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 13:35:02,109 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/tokenizer.json
187
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 13:35:02,109 >> loading file added_tokens.json from cache at None
188
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 13:35:02,109 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/special_tokens_map.json
189
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 13:35:02,109 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/tokenizer_config.json
190
+ /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
191
+ warnings.warn(
192
+ [INFO|modeling_utils.py:3678] 2024-09-05 13:35:02,174 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--IVN-RIN--bioBIT/snapshots/83755ed79ee254c11854e9f54a53679557271018/model.safetensors
193
+ [INFO|modeling_utils.py:4497] 2024-09-05 13:35:02,231 >> Some weights of the model checkpoint at IVN-RIN/bioBIT were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
194
+ - This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
195
+ - This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
196
+ [WARNING|modeling_utils.py:4509] 2024-09-05 13:35:02,231 >> Some weights of BertForTokenClassification were not initialized from the model checkpoint at IVN-RIN/bioBIT and are newly initialized: ['classifier.bias', 'classifier.weight']
197
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
198
+
199
+
200
+
201
+ /content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
202
+ metric = load_metric("seqeval", trust_remote_code=True)
203
+ [INFO|trainer.py:811] 2024-09-05 13:35:08,966 >> The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
204
+ [INFO|trainer.py:2134] 2024-09-05 13:35:09,528 >> ***** Running training *****
205
+ [INFO|trainer.py:2135] 2024-09-05 13:35:09,528 >> Num examples = 30,642
206
+ [INFO|trainer.py:2136] 2024-09-05 13:35:09,528 >> Num Epochs = 10
207
+ [INFO|trainer.py:2137] 2024-09-05 13:35:09,528 >> Instantaneous batch size per device = 32
208
+ [INFO|trainer.py:2140] 2024-09-05 13:35:09,528 >> Total train batch size (w. parallel, distributed & accumulation) = 64
209
+ [INFO|trainer.py:2141] 2024-09-05 13:35:09,528 >> Gradient Accumulation steps = 2
210
+ [INFO|trainer.py:2142] 2024-09-05 13:35:09,528 >> Total optimization steps = 4,790
211
+ [INFO|trainer.py:2143] 2024-09-05 13:35:09,529 >> Number of trainable parameters = 109,339,395
212
+
213
  0%| | 0/4790 [00:00<?, ?it/s]
214
  0%| | 1/4790 [00:01<1:20:56, 1.01s/it]
215
  0%| | 2/4790 [00:01<50:37, 1.58it/s]
216
  0%| | 3/4790 [00:01<37:18, 2.14it/s]
217
  0%| | 4/4790 [00:01<29:46, 2.68it/s]
218
  0%| | 5/4790 [00:02<25:48, 3.09it/s]
219
  0%| | 6/4790 [00:02<26:14, 3.04it/s]
220
  0%| | 7/4790 [00:02<24:55, 3.20it/s]
221
  0%| | 8/4790 [00:02<22:17, 3.57it/s]
222
  0%| | 9/4790 [00:03<21:39, 3.68it/s]
223
  0%| | 10/4790 [00:03<22:52, 3.48it/s]
224
  0%| | 11/4790 [00:03<21:15, 3.75it/s]
225
  0%| | 12/4790 [00:04<22:03, 3.61it/s]
226
  0%| | 13/4790 [00:04<20:44, 3.84it/s]
227
  0%| | 14/4790 [00:04<19:51, 4.01it/s]
228
  0%| | 15/4790 [00:04<18:24, 4.32it/s]
229
  0%| | 16/4790 [00:04<18:34, 4.28it/s]
230
  0%| | 17/4790 [00:05<18:30, 4.30it/s]
231
  0%| | 18/4790 [00:05<18:13, 4.37it/s]
232
  0%| | 19/4790 [00:05<20:34, 3.86it/s]
233
  0%| | 20/4790 [00:05<19:30, 4.07it/s]
234
  0%| | 21/4790 [00:06<19:47, 4.02it/s]
235
  0%| | 22/4790 [00:06<19:42, 4.03it/s]
236
  0%| | 23/4790 [00:06<19:29, 4.08it/s]
237
  1%| | 24/4790 [00:06<20:18, 3.91it/s]
238
  1%| | 25/4790 [00:07<20:17, 3.91it/s]
239
  1%| | 26/4790 [00:07<19:08, 4.15it/s]
240
  1%| | 27/4790 [00:07<18:37, 4.26it/s]
241
  1%| | 28/4790 [00:07<20:30, 3.87it/s]
242
  1%| | 29/4790 [00:08<20:31, 3.86it/s]
243
  1%| | 30/4790 [00:08<21:02, 3.77it/s]
244
  1%| | 31/4790 [00:08<20:09, 3.93it/s]
245
  1%| | 32/4790 [00:09<22:20, 3.55it/s]
246
  1%| | 33/4790 [00:09<20:31, 3.86it/s]
247
  1%| | 34/4790 [00:09<21:36, 3.67it/s]
248
  1%| | 35/4790 [00:09<22:11, 3.57it/s]
249
  1%| | 36/4790 [00:10<20:46, 3.81it/s]
250
  1%| | 37/4790 [00:10<23:52, 3.32it/s]
251
  1%| | 38/4790 [00:10<21:50, 3.63it/s]
252
  1%| | 39/4790 [00:10<22:49, 3.47it/s]
253
  1%| | 40/4790 [00:11<23:36, 3.35it/s]
254
  1%| | 41/4790 [00:11<21:06, 3.75it/s]
255
  1%| | 42/4790 [00:11<20:37, 3.84it/s]
256
  1%| | 43/4790 [00:12<20:36, 3.84it/s]
257
  1%| | 44/4790 [00:12<30:21, 2.61it/s]
258
  1%| | 45/4790 [00:12<26:51, 2.94it/s]
259
  1%| | 46/4790 [00:13<24:36, 3.21it/s]
260
  1%| | 47/4790 [00:13<26:25, 2.99it/s]
261
  1%| | 48/4790 [00:13<24:41, 3.20it/s]
262
  1%| | 49/4790 [00:14<23:30, 3.36it/s]
263
  1%| | 50/4790 [00:14<23:01, 3.43it/s]
264
  1%| | 51/4790 [00:14<22:17, 3.54it/s]
265
  1%| | 52/4790 [00:14<21:00, 3.76it/s]
266
  1%| | 53/4790 [00:15<19:53, 3.97it/s]
267
  1%| | 54/4790 [00:15<19:08, 4.12it/s]
268
  1%| | 55/4790 [00:15<21:09, 3.73it/s]
269
  1%| | 56/4790 [00:15<19:04, 4.14it/s]
270
  1%| | 57/4790 [00:16<20:34, 3.84it/s]
271
  1%| | 58/4790 [00:16<20:14, 3.90it/s]
272
  1%| | 59/4790 [00:16<19:47, 3.98it/s]
273
  1%|▏ | 60/4790 [00:16<22:20, 3.53it/s]
274
  1%|▏ | 61/4790 [00:17<21:23, 3.68it/s]
275
  1%|▏ | 62/4790 [00:17<21:38, 3.64it/s]
276
  1%|▏ | 63/4790 [00:17<22:00, 3.58it/s]
277
  1%|▏ | 64/4790 [00:18<21:29, 3.67it/s]
278
  1%|▏ | 65/4790 [00:18<21:47, 3.61it/s]
279
  1%|▏ | 66/4790 [00:18<20:36, 3.82it/s]
280
  1%|▏ | 67/4790 [00:19<27:23, 2.87it/s]
281
  1%|▏ | 68/4790 [00:19<26:32, 2.97it/s]
282
  1%|▏ | 69/4790 [00:19<24:31, 3.21it/s]
283
  1%|▏ | 70/4790 [00:19<23:00, 3.42it/s]
284
  1%|▏ | 71/4790 [00:20<23:36, 3.33it/s]
285
  2%|▏ | 72/4790 [00:20<22:01, 3.57it/s]
286
  2%|▏ | 73/4790 [00:20<20:12, 3.89it/s]
287
  2%|▏ | 74/4790 [00:20<18:49, 4.17it/s]
288
  2%|▏ | 75/4790 [00:21<20:11, 3.89it/s]
289
  2%|▏ | 76/4790 [00:21<19:23, 4.05it/s]
290
  2%|▏ | 77/4790 [00:21<21:03, 3.73it/s]
291
  2%|▏ | 78/4790 [00:21<20:42, 3.79it/s]
292
  2%|▏ | 79/4790 [00:22<20:11, 3.89it/s]
293
  2%|▏ | 80/4790 [00:22<20:50, 3.77it/s]
294
  2%|▏ | 81/4790 [00:22<19:14, 4.08it/s]
295
  2%|▏ | 82/4790 [00:22<21:12, 3.70it/s]
296
  2%|▏ | 83/4790 [00:23<20:43, 3.78it/s]
297
  2%|▏ | 84/4790 [00:23<21:25, 3.66it/s]
298
  2%|▏ | 85/4790 [00:23<22:19, 3.51it/s]
299
  2%|▏ | 86/4790 [00:24<21:02, 3.72it/s]
300
  2%|▏ | 87/4790 [00:24<19:44, 3.97it/s]
301
  2%|▏ | 88/4790 [00:24<18:39, 4.20it/s]
302
  2%|▏ | 89/4790 [00:24<19:58, 3.92it/s]
303
  2%|▏ | 90/4790 [00:25<20:11, 3.88it/s]
304
  2%|▏ | 91/4790 [00:25<22:09, 3.53it/s]
305
  2%|▏ | 92/4790 [00:25<21:43, 3.60it/s]
306
  2%|▏ | 93/4790 [00:25<20:35, 3.80it/s]
307
  2%|▏ | 94/4790 [00:26<20:45, 3.77it/s]
308
  2%|▏ | 95/4790 [00:26<18:51, 4.15it/s]
309
  2%|▏ | 96/4790 [00:26<23:08, 3.38it/s]
310
  2%|▏ | 97/4790 [00:26<21:22, 3.66it/s]
311
  2%|▏ | 98/4790 [00:27<21:11, 3.69it/s]
312
  2%|▏ | 99/4790 [00:27<23:06, 3.38it/s]
313
  2%|▏ | 100/4790 [00:27<22:34, 3.46it/s]
314
  2%|▏ | 101/4790 [00:28<22:19, 3.50it/s]
315
  2%|▏ | 102/4790 [00:28<22:22, 3.49it/s]
316
  2%|▏ | 103/4790 [00:28<21:22, 3.65it/s]
317
  2%|▏ | 104/4790 [00:28<20:50, 3.75it/s]
318
  2%|▏ | 105/4790 [00:29<19:16, 4.05it/s]
319
  2%|▏ | 106/4790 [00:29<19:27, 4.01it/s]
320
  2%|▏ | 107/4790 [00:29<20:03, 3.89it/s]
321
  2%|▏ | 108/4790 [00:29<21:03, 3.70it/s]
322
  2%|▏ | 109/4790 [00:30<19:57, 3.91it/s]
323
  2%|▏ | 110/4790 [00:30<18:51, 4.14it/s]
324
  2%|▏ | 111/4790 [00:30<19:09, 4.07it/s]
325
  2%|▏ | 112/4790 [00:30<19:05, 4.08it/s]
326
  2%|▏ | 113/4790 [00:31<18:14, 4.27it/s]
327
  2%|▏ | 114/4790 [00:31<18:41, 4.17it/s]
328
  2%|▏ | 115/4790 [00:31<17:37, 4.42it/s]
329
  2%|▏ | 116/4790 [00:31<18:21, 4.24it/s]
330
  2%|▏ | 117/4790 [00:32<20:53, 3.73it/s]
331
  2%|▏ | 118/4790 [00:32<20:04, 3.88it/s]
332
  2%|▏ | 119/4790 [00:32<21:01, 3.70it/s]
333
  3%|▎ | 120/4790 [00:32<19:06, 4.07it/s]
334
  3%|▎ | 121/4790 [00:33<20:35, 3.78it/s]
335
  3%|▎ | 122/4790 [00:33<21:02, 3.70it/s]
336
  3%|▎ | 123/4790 [00:33<24:04, 3.23it/s]
337
  3%|▎ | 124/4790 [00:34<21:34, 3.60it/s]
338
  3%|▎ | 125/4790 [00:34<21:45, 3.57it/s]
339
  3%|▎ | 126/4790 [00:34<21:13, 3.66it/s]
340
  3%|▎ | 127/4790 [00:34<22:31, 3.45it/s]
341
  3%|▎ | 128/4790 [00:35<21:58, 3.54it/s]
342
  3%|▎ | 129/4790 [00:35<21:51, 3.55it/s]
343
  3%|▎ | 130/4790 [00:35<22:45, 3.41it/s]
344
  3%|▎ | 131/4790 [00:36<21:18, 3.64it/s]
345
  3%|▎ | 132/4790 [00:36<20:34, 3.77it/s]
346
  3%|▎ | 133/4790 [00:36<20:01, 3.88it/s]
347
  3%|▎ | 134/4790 [00:36<22:14, 3.49it/s]
348
  3%|▎ | 135/4790 [00:37<21:41, 3.58it/s]
349
  3%|▎ | 136/4790 [00:37<25:05, 3.09it/s]
350
  3%|▎ | 137/4790 [00:37<23:27, 3.31it/s]
351
  3%|▎ | 138/4790 [00:38<24:07, 3.21it/s]
352
  3%|▎ | 139/4790 [00:38<24:49, 3.12it/s]
353
  3%|▎ | 140/4790 [00:38<22:58, 3.37it/s]
354
  3%|▎ | 141/4790 [00:38<21:06, 3.67it/s]
355
  3%|▎ | 142/4790 [00:39<20:26, 3.79it/s]
356
  3%|▎ | 143/4790 [00:39<21:11, 3.65it/s]
357
  3%|▎ | 144/4790 [00:39<19:58, 3.88it/s]
358
  3%|▎ | 145/4790 [00:39<18:59, 4.08it/s]
359
  3%|▎ | 146/4790 [00:40<18:44, 4.13it/s]
360
  3%|▎ | 147/4790 [00:40<20:34, 3.76it/s]
361
  3%|▎ | 148/4790 [00:40<19:30, 3.97it/s]
362
  3%|▎ | 149/4790 [00:41<21:12, 3.65it/s]
363
  3%|▎ | 150/4790 [00:41<21:36, 3.58it/s]
364
  3%|▎ | 151/4790 [00:41<20:19, 3.80it/s]
365
  3%|▎ | 152/4790 [00:41<20:08, 3.84it/s]
366
  3%|▎ | 153/4790 [00:42<21:57, 3.52it/s]
367
  3%|▎ | 154/4790 [00:42<22:08, 3.49it/s]
368
  3%|▎ | 155/4790 [00:42<20:34, 3.76it/s]
369
  3%|▎ | 156/4790 [00:42<22:06, 3.49it/s]
370
  3%|▎ | 157/4790 [00:43<20:52, 3.70it/s]
371
  3%|▎ | 158/4790 [00:43<19:31, 3.95it/s]
372
  3%|▎ | 159/4790 [00:43<21:04, 3.66it/s]
373
  3%|▎ | 160/4790 [00:44<22:25, 3.44it/s]
374
  3%|▎ | 161/4790 [00:44<21:51, 3.53it/s]
375
  3%|▎ | 162/4790 [00:44<20:32, 3.75it/s]
376
  3%|▎ | 163/4790 [00:44<19:43, 3.91it/s]
377
  3%|▎ | 164/4790 [00:45<20:41, 3.72it/s]
378
  3%|▎ | 165/4790 [00:45<19:02, 4.05it/s]
379
  3%|▎ | 166/4790 [00:45<20:47, 3.71it/s]
380
  3%|▎ | 167/4790 [00:45<22:56, 3.36it/s]
381
  4%|▎ | 168/4790 [00:46<23:05, 3.34it/s]
382
  4%|▎ | 169/4790 [00:46<21:23, 3.60it/s]
383
  4%|▎ | 170/4790 [00:46<21:05, 3.65it/s]
384
  4%|▎ | 171/4790 [00:47<20:26, 3.76it/s]
385
  4%|▎ | 172/4790 [00:47<19:43, 3.90it/s]
386
  4%|▎ | 173/4790 [00:47<21:36, 3.56it/s]
387
  4%|▎ | 174/4790 [00:47<21:23, 3.60it/s]
388
  4%|▎ | 175/4790 [00:48<20:53, 3.68it/s]
389
  4%|▎ | 176/4790 [00:48<20:12, 3.81it/s]
390
  4%|▎ | 177/4790 [00:48<19:38, 3.92it/s]
391
  4%|▎ | 178/4790 [00:48<19:02, 4.04it/s]
392
  4%|▎ | 179/4790 [00:49<18:51, 4.08it/s]
393
  4%|▍ | 180/4790 [00:49<17:55, 4.29it/s]
394
  4%|▍ | 181/4790 [00:49<20:31, 3.74it/s]
395
  4%|▍ | 182/4790 [00:49<19:12, 4.00it/s]
396
  4%|▍ | 183/4790 [00:50<19:09, 4.01it/s]
397
  4%|▍ | 184/4790 [00:50<17:15, 4.45it/s]
398
  4%|▍ | 185/4790 [00:50<21:16, 3.61it/s]
399
  4%|▍ | 186/4790 [00:50<21:56, 3.50it/s]
400
  4%|▍ | 187/4790 [00:51<21:13, 3.61it/s]
401
  4%|▍ | 188/4790 [00:51<19:37, 3.91it/s]
402
  4%|▍ | 189/4790 [00:51<19:49, 3.87it/s]
403
  4%|▍ | 190/4790 [00:51<19:27, 3.94it/s]
404
  4%|▍ | 191/4790 [00:52<18:22, 4.17it/s]
405
  4%|▍ | 192/4790 [00:52<19:45, 3.88it/s]
406
  4%|▍ | 193/4790 [00:52<19:55, 3.85it/s]
407
  4%|▍ | 194/4790 [00:52<19:56, 3.84it/s]
408
  4%|▍ | 195/4790 [00:53<19:34, 3.91it/s]
409
  4%|▍ | 196/4790 [00:53<18:31, 4.13it/s]
410
  4%|▍ | 197/4790 [00:53<17:44, 4.32it/s]
411
  4%|▍ | 198/4790 [00:53<19:39, 3.89it/s]
412
  4%|▍ | 199/4790 [00:54<19:52, 3.85it/s]
413
  4%|▍ | 200/4790 [00:54<19:50, 3.86it/s]
414
  4%|▍ | 201/4790 [00:54<20:10, 3.79it/s]
415
  4%|▍ | 202/4790 [00:54<18:09, 4.21it/s]
416
  4%|▍ | 203/4790 [00:55<18:39, 4.10it/s]
417
  4%|▍ | 204/4790 [00:55<23:44, 3.22it/s]
418
  4%|▍ | 205/4790 [00:55<21:02, 3.63it/s]
419
  4%|▍ | 206/4790 [00:56<20:28, 3.73it/s]
420
  4%|▍ | 207/4790 [00:56<19:35, 3.90it/s]
421
  4%|▍ | 208/4790 [00:56<19:23, 3.94it/s]
422
  4%|▍ | 209/4790 [00:56<20:38, 3.70it/s]
423
  4%|▍ | 210/4790 [00:57<20:28, 3.73it/s]
424
  4%|▍ | 211/4790 [00:57<21:45, 3.51it/s]
425
  4%|▍ | 212/4790 [00:57<19:52, 3.84it/s]
426
  4%|▍ | 213/4790 [00:57<19:25, 3.93it/s]
427
  4%|▍ | 214/4790 [00:58<18:19, 4.16it/s]
428
  4%|▍ | 215/4790 [00:58<19:07, 3.99it/s]
429
  5%|▍ | 216/4790 [00:58<19:16, 3.96it/s]
430
  5%|▍ | 217/4790 [00:58<20:18, 3.75it/s]
431
  5%|▍ | 218/4790 [00:59<21:08, 3.61it/s]
432
  5%|▍ | 219/4790 [00:59<19:39, 3.88it/s]
433
  5%|▍ | 220/4790 [00:59<19:09, 3.97it/s]
434
  5%|▍ | 221/4790 [00:59<18:31, 4.11it/s]
435
  5%|▍ | 222/4790 [01:00<17:59, 4.23it/s]
436
  5%|▍ | 223/4790 [01:00<18:22, 4.14it/s]
437
  5%|▍ | 224/4790 [01:00<18:34, 4.10it/s]
438
  5%|▍ | 225/4790 [01:00<18:29, 4.12it/s]
439
  5%|▍ | 226/4790 [01:01<22:07, 3.44it/s]
440
  5%|▍ | 227/4790 [01:01<21:26, 3.55it/s]
441
  5%|▍ | 228/4790 [01:01<20:48, 3.65it/s]
442
  5%|▍ | 229/4790 [01:02<20:04, 3.79it/s]
443
  5%|▍ | 230/4790 [01:02<19:58, 3.80it/s]
444
  5%|▍ | 231/4790 [01:02<23:39, 3.21it/s]
445
  5%|▍ | 232/4790 [01:02<22:14, 3.42it/s]
446
  5%|▍ | 233/4790 [01:03<22:43, 3.34it/s]
447
  5%|▍ | 234/4790 [01:03<22:02, 3.45it/s]
448
  5%|▍ | 235/4790 [01:03<20:39, 3.67it/s]
449
  5%|▍ | 236/4790 [01:04<23:49, 3.18it/s]
450
  5%|▍ | 237/4790 [01:04<24:54, 3.05it/s]
451
  5%|▍ | 238/4790 [01:04<24:56, 3.04it/s]
452
  5%|▍ | 239/4790 [01:05<24:16, 3.12it/s]
453
  5%|▌ | 240/4790 [01:05<22:37, 3.35it/s]
454
  5%|▌ | 241/4790 [01:05<26:11, 2.89it/s]
455
  5%|▌ | 242/4790 [01:06<23:40, 3.20it/s]
456
  5%|▌ | 243/4790 [01:06<24:51, 3.05it/s]
457
  5%|▌ | 244/4790 [01:06<24:47, 3.06it/s]
458
  5%|▌ | 245/4790 [01:07<22:59, 3.29it/s]
459
  5%|▌ | 246/4790 [01:07<22:32, 3.36it/s]
460
  5%|▌ | 247/4790 [01:07<20:16, 3.73it/s]
461
  5%|▌ | 248/4790 [01:07<19:43, 3.84it/s]
462
  5%|▌ | 249/4790 [01:08<21:41, 3.49it/s]
463
  5%|▌ | 250/4790 [01:08<25:08, 3.01it/s]
464
  5%|▌ | 251/4790 [01:08<25:17, 2.99it/s]
465
  5%|▌ | 252/4790 [01:09<22:38, 3.34it/s]
466
  5%|▌ | 253/4790 [01:09<20:45, 3.64it/s]
467
  5%|▌ | 254/4790 [01:09<19:55, 3.79it/s]
468
  5%|▌ | 255/4790 [01:09<19:58, 3.78it/s]
469
  5%|▌ | 256/4790 [01:10<20:54, 3.61it/s]
470
  5%|▌ | 257/4790 [01:10<22:47, 3.32it/s]
471
  5%|▌ | 258/4790 [01:10<20:47, 3.63it/s]
472
  5%|▌ | 259/4790 [01:10<18:58, 3.98it/s]
473
  5%|▌ | 260/4790 [01:11<21:14, 3.55it/s]
474
  5%|▌ | 261/4790 [01:11<20:03, 3.76it/s]
475
  5%|▌ | 262/4790 [01:11<21:05, 3.58it/s]
476
  5%|▌ | 263/4790 [01:12<21:12, 3.56it/s]
477
  6%|▌ | 264/4790 [01:12<20:51, 3.62it/s]
478
  6%|▌ | 265/4790 [01:12<22:23, 3.37it/s]
479
  6%|▌ | 266/4790 [01:13<21:56, 3.44it/s]
480
  6%|▌ | 267/4790 [01:13<22:31, 3.35it/s]
481
  6%|▌ | 268/4790 [01:13<21:57, 3.43it/s]
482
  6%|▌ | 269/4790 [01:13<22:48, 3.30it/s]
483
  6%|▌ | 270/4790 [01:14<22:38, 3.33it/s]
484
  6%|▌ | 271/4790 [01:14<21:37, 3.48it/s]
485
  6%|▌ | 272/4790 [01:14<19:41, 3.82it/s]
486
  6%|▌ | 273/4790 [01:14<18:42, 4.02it/s]
487
  6%|▌ | 274/4790 [01:15<18:50, 3.99it/s]
488
  6%|▌ | 275/4790 [01:15<19:49, 3.79it/s]
489
  6%|▌ | 276/4790 [01:15<17:32, 4.29it/s]
490
  6%|▌ | 277/4790 [01:15<16:46, 4.48it/s]
491
  6%|▌ | 278/4790 [01:16<16:39, 4.51it/s]
492
  6%|▌ | 279/4790 [01:16<18:49, 3.99it/s]
493
  6%|▌ | 280/4790 [01:16<17:38, 4.26it/s]
494
  6%|▌ | 281/4790 [01:16<16:33, 4.54it/s]
495
  6%|▌ | 282/4790 [01:17<20:05, 3.74it/s]
496
  6%|▌ | 283/4790 [01:17<18:27, 4.07it/s]
497
  6%|▌ | 284/4790 [01:17<18:32, 4.05it/s]
498
  6%|▌ | 285/4790 [01:17<18:28, 4.07it/s]
499
  6%|▌ | 286/4790 [01:18<18:14, 4.12it/s]
500
  6%|▌ | 287/4790 [01:18<21:27, 3.50it/s]
501
  6%|▌ | 288/4790 [01:18<21:52, 3.43it/s]
502
  6%|▌ | 289/4790 [01:18<20:33, 3.65it/s]
503
  6%|▌ | 290/4790 [01:19<21:24, 3.50it/s]
504
  6%|▌ | 291/4790 [01:19<20:36, 3.64it/s]
505
  6%|▌ | 292/4790 [01:19<19:57, 3.76it/s]
506
  6%|▌ | 293/4790 [01:20<24:15, 3.09it/s]
507
  6%|▌ | 294/4790 [01:20<22:18, 3.36it/s]
508
  6%|▌ | 295/4790 [01:20<23:18, 3.21it/s]
509
  6%|▌ | 296/4790 [01:21<25:14, 2.97it/s]
510
  6%|▌ | 297/4790 [01:21<24:18, 3.08it/s]
511
  6%|▌ | 298/4790 [01:21<21:32, 3.48it/s]
512
  6%|▌ | 299/4790 [01:21<20:51, 3.59it/s]
513
  6%|▋ | 300/4790 [01:22<19:20, 3.87it/s]
514
  6%|▋ | 301/4790 [01:22<19:39, 3.81it/s]
515
  6%|▋ | 302/4790 [01:22<19:31, 3.83it/s]
516
  6%|▋ | 303/4790 [01:22<20:12, 3.70it/s]
517
  6%|▋ | 304/4790 [01:23<19:28, 3.84it/s]
518
  6%|▋ | 305/4790 [01:23<19:17, 3.88it/s]
519
  6%|▋ | 306/4790 [01:23<17:51, 4.18it/s]
520
  6%|▋ | 307/4790 [01:23<18:10, 4.11it/s]
521
  6%|▋ | 308/4790 [01:24<23:32, 3.17it/s]
522
  6%|▋ | 309/4790 [01:24<21:51, 3.42it/s]
523
  6%|▋ | 310/4790 [01:24<20:53, 3.57it/s]
524
  6%|▋ | 311/4790 [01:25<20:05, 3.72it/s]
525
  7%|▋ | 312/4790 [01:25<19:17, 3.87it/s]
526
  7%|▋ | 313/4790 [01:25<19:03, 3.91it/s]
527
  7%|��� | 314/4790 [01:25<21:26, 3.48it/s]
528
  7%|▋ | 315/4790 [01:26<20:37, 3.61it/s]
529
  7%|▋ | 316/4790 [01:26<20:04, 3.71it/s]
530
  7%|▋ | 317/4790 [01:26<19:26, 3.83it/s]
531
  7%|▋ | 318/4790 [01:27<20:44, 3.59it/s]
532
  7%|▋ | 319/4790 [01:27<19:54, 3.74it/s]
533
  7%|▋ | 320/4790 [01:27<22:32, 3.30it/s]
534
  7%|▋ | 321/4790 [01:27<21:02, 3.54it/s]
535
  7%|▋ | 322/4790 [01:28<21:39, 3.44it/s]
536
  7%|▋ | 323/4790 [01:28<21:36, 3.45it/s]
537
  7%|▋ | 324/4790 [01:28<20:08, 3.70it/s]
538
  7%|▋ | 325/4790 [01:29<20:12, 3.68it/s]
539
  7%|▋ | 326/4790 [01:29<18:55, 3.93it/s]
540
  7%|▋ | 327/4790 [01:29<18:47, 3.96it/s]
541
  7%|▋ | 328/4790 [01:29<17:11, 4.33it/s]
542
  7%|▋ | 329/4790 [01:29<19:41, 3.78it/s]
543
  7%|▋ | 330/4790 [01:30<19:27, 3.82it/s]
544
  7%|▋ | 331/4790 [01:30<19:14, 3.86it/s]
545
  7%|▋ | 332/4790 [01:30<19:45, 3.76it/s]
546
  7%|▋ | 333/4790 [01:31<19:15, 3.86it/s]
547
  7%|▋ | 334/4790 [01:31<19:12, 3.86it/s]
548
  7%|▋ | 335/4790 [01:31<19:52, 3.74it/s]
549
  7%|▋ | 336/4790 [01:31<21:35, 3.44it/s]
550
  7%|▋ | 337/4790 [01:32<25:21, 2.93it/s]
551
  7%|▋ | 338/4790 [01:32<24:09, 3.07it/s]
552
  7%|▋ | 339/4790 [01:32<21:21, 3.47it/s]
553
  7%|▋ | 340/4790 [01:33<21:04, 3.52it/s]
554
  7%|▋ | 341/4790 [01:33<21:03, 3.52it/s]
555
  7%|▋ | 342/4790 [01:33<18:26, 4.02it/s]
556
  7%|▋ | 343/4790 [01:33<17:48, 4.16it/s]
557
  7%|▋ | 344/4790 [01:34<16:38, 4.45it/s]
558
  7%|▋ | 345/4790 [01:34<18:20, 4.04it/s]
559
  7%|▋ | 346/4790 [01:34<18:51, 3.93it/s]
560
  7%|▋ | 347/4790 [01:34<18:24, 4.02it/s]
561
  7%|▋ | 348/4790 [01:35<18:23, 4.02it/s]
562
  7%|▋ | 349/4790 [01:35<19:05, 3.88it/s]
563
  7%|▋ | 350/4790 [01:35<18:45, 3.94it/s]
564
  7%|▋ | 351/4790 [01:35<19:40, 3.76it/s]
565
  7%|▋ | 352/4790 [01:36<20:27, 3.62it/s]
566
  7%|▋ | 353/4790 [01:36<20:13, 3.66it/s]
567
  7%|▋ | 354/4790 [01:36<21:46, 3.39it/s]
568
  7%|▋ | 355/4790 [01:37<20:23, 3.62it/s]
569
  7%|▋ | 356/4790 [01:37<20:12, 3.66it/s]
570
  7%|▋ | 357/4790 [01:37<20:34, 3.59it/s]
571
  7%|▋ | 358/4790 [01:37<20:09, 3.66it/s]
572
  7%|▋ | 359/4790 [01:38<21:14, 3.48it/s]
573
  8%|▊ | 360/4790 [01:38<20:40, 3.57it/s]
574
  8%|▊ | 361/4790 [01:38<19:59, 3.69it/s]
575
  8%|▊ | 362/4790 [01:39<23:39, 3.12it/s]
576
  8%|▊ | 363/4790 [01:39<23:06, 3.19it/s]
577
  8%|▊ | 364/4790 [01:39<21:44, 3.39it/s]
578
  8%|▊ | 365/4790 [01:39<20:51, 3.54it/s]
579
  8%|▊ | 366/4790 [01:40<20:37, 3.57it/s]
580
  8%|▊ | 367/4790 [01:40<20:49, 3.54it/s]
581
  8%|▊ | 368/4790 [01:40<19:58, 3.69it/s]
582
  8%|▊ | 369/4790 [01:41<23:59, 3.07it/s]
583
  8%|▊ | 370/4790 [01:41<24:54, 2.96it/s]
584
  8%|▊ | 371/4790 [01:41<23:32, 3.13it/s]
585
  8%|▊ | 372/4790 [01:42<24:14, 3.04it/s]
586
  8%|▊ | 373/4790 [01:42<21:57, 3.35it/s]
587
  8%|▊ | 374/4790 [01:42<20:08, 3.65it/s]
588
  8%|▊ | 375/4790 [01:43<23:55, 3.08it/s]
589
  8%|▊ | 376/4790 [01:43<23:20, 3.15it/s]
590
  8%|▊ | 377/4790 [01:43<20:48, 3.53it/s]
591
  8%|▊ | 378/4790 [01:43<20:24, 3.60it/s]
592
  8%|▊ | 379/4790 [01:44<19:31, 3.77it/s]
593
  8%|▊ | 380/4790 [01:44<19:40, 3.74it/s]
594
  8%|▊ | 381/4790 [01:44<18:52, 3.89it/s]
595
  8%|▊ | 382/4790 [01:44<17:12, 4.27it/s]
596
  8%|▊ | 383/4790 [01:44<16:23, 4.48it/s]
597
  8%|▊ | 384/4790 [01:45<16:58, 4.32it/s]
598
  8%|▊ | 385/4790 [01:45<19:00, 3.86it/s]
599
  8%|▊ | 386/4790 [01:45<20:09, 3.64it/s]
600
  8%|▊ | 387/4790 [01:46<21:31, 3.41it/s]
601
  8%|▊ | 388/4790 [01:46<20:03, 3.66it/s]
602
  8%|▊ | 389/4790 [01:46<22:50, 3.21it/s]
603
  8%|▊ | 390/4790 [01:46<20:16, 3.62it/s]
604
  8%|▊ | 391/4790 [01:47<21:09, 3.47it/s]
605
  8%|▊ | 392/4790 [01:47<19:30, 3.76it/s]
606
  8%|▊ | 393/4790 [01:47<17:51, 4.10it/s]
607
  8%|▊ | 394/4790 [01:47<17:49, 4.11it/s]
608
  8%|▊ | 395/4790 [01:48<18:08, 4.04it/s]
609
  8%|▊ | 396/4790 [01:48<17:36, 4.16it/s]
610
  8%|▊ | 397/4790 [01:48<19:35, 3.74it/s]
611
  8%|▊ | 398/4790 [01:49<20:26, 3.58it/s]
612
  8%|▊ | 399/4790 [01:49<20:14, 3.61it/s]
613
  8%|▊ | 400/4790 [01:49<18:53, 3.87it/s]
614
  8%|▊ | 401/4790 [01:49<19:57, 3.67it/s]
615
  8%|▊ | 402/4790 [01:50<18:32, 3.94it/s]
616
  8%|▊ | 403/4790 [01:50<18:04, 4.05it/s]
617
  8%|▊ | 404/4790 [01:50<17:58, 4.07it/s]
618
  8%|▊ | 405/4790 [01:50<18:34, 3.94it/s]
619
  8%|▊ | 406/4790 [01:51<19:41, 3.71it/s]
620
  8%|▊ | 407/4790 [01:51<18:06, 4.03it/s]
621
  9%|▊ | 408/4790 [01:51<19:27, 3.75it/s]
622
  9%|▊ | 409/4790 [01:51<18:26, 3.96it/s]
623
  9%|▊ | 410/4790 [01:52<18:19, 3.98it/s]
624
  9%|▊ | 411/4790 [01:52<19:04, 3.83it/s]
625
  9%|▊ | 412/4790 [01:52<22:31, 3.24it/s]
626
  9%|▊ | 413/4790 [01:53<22:04, 3.30it/s]
627
  9%|▊ | 414/4790 [01:53<20:44, 3.52it/s]
628
  9%|▊ | 415/4790 [01:53<19:50, 3.67it/s]
629
  9%|▊ | 416/4790 [01:53<20:30, 3.55it/s]
630
  9%|▊ | 417/4790 [01:54<19:17, 3.78it/s]
631
  9%|▊ | 418/4790 [01:54<17:48, 4.09it/s]
632
  9%|▊ | 419/4790 [01:54<17:43, 4.11it/s]
633
  9%|▉ | 420/4790 [01:54<17:57, 4.06it/s]
634
  9%|▉ | 421/4790 [01:55<18:55, 3.85it/s]
635
  9%|▉ | 422/4790 [01:55<18:49, 3.87it/s]
636
  9%|▉ | 423/4790 [01:55<18:09, 4.01it/s]
637
  9%|▉ | 424/4790 [01:55<18:01, 4.04it/s]
638
  9%|▉ | 425/4790 [01:56<18:29, 3.93it/s]
639
  9%|▉ | 426/4790 [01:56<18:20, 3.97it/s]
640
  9%|▉ | 427/4790 [01:56<17:21, 4.19it/s]
641
  9%|▉ | 428/4790 [01:56<17:53, 4.06it/s]
642
  9%|▉ | 429/4790 [01:57<17:32, 4.14it/s]
643
  9%|▉ | 430/4790 [01:57<16:50, 4.32it/s]
644
  9%|▉ | 431/4790 [01:57<16:49, 4.32it/s]
645
  9%|▉ | 432/4790 [01:57<16:52, 4.30it/s]
646
  9%|▉ | 433/4790 [01:57<16:16, 4.46it/s]
647
  9%|▉ | 434/4790 [01:58<17:52, 4.06it/s]
648
  9%|▉ | 435/4790 [01:58<17:23, 4.17it/s]
649
  9%|▉ | 436/4790 [01:58<16:56, 4.28it/s]
650
  9%|▉ | 437/4790 [01:58<19:06, 3.80it/s]
651
  9%|▉ | 438/4790 [01:59<19:18, 3.76it/s]
652
  9%|▉ | 439/4790 [01:59<18:51, 3.84it/s]
653
  9%|▉ | 440/4790 [01:59<19:20, 3.75it/s]
654
  9%|▉ | 441/4790 [02:00<20:58, 3.46it/s]
655
  9%|▉ | 442/4790 [02:00<20:20, 3.56it/s]
656
  9%|▉ | 443/4790 [02:00<20:12, 3.58it/s]
657
  9%|▉ | 444/4790 [02:00<18:59, 3.81it/s]
658
  9%|▉ | 445/4790 [02:01<17:37, 4.11it/s]
659
  9%|▉ | 446/4790 [02:01<19:02, 3.80it/s]
660
  9%|▉ | 447/4790 [02:01<17:38, 4.10it/s]
661
  9%|▉ | 448/4790 [02:01<17:54, 4.04it/s]
662
  9%|▉ | 449/4790 [02:02<16:45, 4.32it/s]
663
  9%|▉ | 450/4790 [02:02<17:07, 4.22it/s]
664
  9%|▉ | 451/4790 [02:02<16:06, 4.49it/s]
665
  9%|▉ | 452/4790 [02:02<19:32, 3.70it/s]
666
  9%|▉ | 453/4790 [02:03<19:30, 3.70it/s]
667
  9%|▉ | 454/4790 [02:03<18:34, 3.89it/s]
668
  9%|▉ | 455/4790 [02:03<17:23, 4.16it/s]
669
  10%|▉ | 456/4790 [02:03<18:24, 3.92it/s]
670
  10%|▉ | 457/4790 [02:04<25:31, 2.83it/s]
671
  10%|▉ | 458/4790 [02:04<23:07, 3.12it/s]
672
  10%|▉ | 459/4790 [02:05<23:20, 3.09it/s]
673
  10%|▉ | 460/4790 [02:05<22:18, 3.24it/s]
674
  10%|▉ | 461/4790 [02:05<21:41, 3.33it/s]
675
  10%|▉ | 462/4790 [02:05<20:15, 3.56it/s]
676
  10%|▉ | 463/4790 [02:06<24:21, 2.96it/s]
677
  10%|▉ | 464/4790 [02:06<21:39, 3.33it/s]
678
  10%|▉ | 465/4790 [02:06<19:06, 3.77it/s]
679
  10%|▉ | 466/4790 [02:06<18:17, 3.94it/s]
680
  10%|▉ | 467/4790 [02:07<18:49, 3.83it/s]
681
  10%|▉ | 468/4790 [02:07<18:49, 3.83it/s]
682
  10%|▉ | 469/4790 [02:07<18:00, 4.00it/s]
683
  10%|▉ | 470/4790 [02:07<18:25, 3.91it/s]
684
  10%|▉ | 471/4790 [02:08<20:39, 3.49it/s]
685
  10%|▉ | 472/4790 [02:08<20:58, 3.43it/s]
686
  10%|▉ | 473/4790 [02:08<20:46, 3.46it/s]
687
  10%|▉ | 474/4790 [02:09<19:24, 3.71it/s]
688
  10%|▉ | 475/4790 [02:09<17:54, 4.02it/s]
689
  10%|▉ | 476/4790 [02:09<18:35, 3.87it/s]
690
  10%|▉ | 477/4790 [02:09<17:33, 4.09it/s]
691
  10%|▉ | 478/4790 [02:10<20:27, 3.51it/s]
692
  10%|█ | 479/4790 [02:10<19:05, 3.76it/s][INFO|trainer.py:811] 2024-09-05 13:37:19,924 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
693
+ [INFO|trainer.py:3819] 2024-09-05 13:37:19,927 >>
694
+ ***** Running Evaluation *****
695
+ [INFO|trainer.py:3821] 2024-09-05 13:37:19,927 >> Num examples = 6798
696
+ [INFO|trainer.py:3824] 2024-09-05 13:37:19,927 >> Batch size = 8
697
+
698
+
699
  0%| | 0/850 [00:00<?, ?it/s]
700
+
701
  1%| | 9/850 [00:00<00:11, 75.89it/s]
702
+
703
  2%|▏ | 18/850 [00:00<00:10, 78.45it/s]
704
+
705
  3%|▎ | 27/850 [00:00<00:10, 80.37it/s]
706
+
707
  4%|▍ | 37/850 [00:00<00:09, 84.54it/s]
708
+
709
  5%|▌ | 46/850 [00:00<00:09, 85.60it/s]
710
+
711
  6%|▋ | 55/850 [00:00<00:10, 78.60it/s]
712
+
713
  8%|▊ | 64/850 [00:00<00:09, 79.44it/s]
714
+
715
  9%|▊ | 73/850 [00:00<00:10, 75.52it/s]
716
+
717
  10%|▉ | 81/850 [00:01<00:10, 76.28it/s]
718
+
719
  11%|█ | 90/850 [00:01<00:09, 80.04it/s]
720
+
721
  12%|█▏ | 99/850 [00:01<00:09, 82.32it/s]
722
+
723
  13%|█▎ | 109/850 [00:01<00:08, 84.86it/s]
724
+
725
  14%|█▍ | 118/850 [00:01<00:08, 85.25it/s]
726
+
727
  15%|█▍ | 127/850 [00:01<00:08, 86.48it/s]
728
+
729
  16%|█▌ | 136/850 [00:01<00:08, 85.42it/s]
730
+
731
  17%|█▋ | 145/850 [00:01<00:08, 81.53it/s]
732
+
733
  18%|█▊ | 154/850 [00:01<00:08, 82.60it/s]
734
+
735
  19%|█▉ | 163/850 [00:01<00:08, 84.65it/s]
736
+
737
  20%|██ | 172/850 [00:02<00:07, 85.43it/s]
738
+
739
  21%|██▏ | 181/850 [00:02<00:07, 85.66it/s]
740
+
741
  22%|██▏ | 190/850 [00:02<00:07, 84.23it/s]
742
+
743
  23%|██▎ | 199/850 [00:02<00:07, 85.01it/s]
744
+
745
  24%|██▍ | 208/850 [00:02<00:07, 83.44it/s]
746
+
747
  26%|██▌ | 217/850 [00:02<00:07, 81.88it/s]
748
+
749
  27%|██▋ | 227/850 [00:02<00:07, 84.16it/s]
750
+
751
  28%|██▊ | 237/850 [00:02<00:07, 86.15it/s]
752
+
753
  29%|██▉ | 246/850 [00:02<00:07, 84.46it/s]
754
+
755
  30%|███ | 256/850 [00:03<00:06, 88.24it/s]
756
+
757
  31%|███ | 265/850 [00:03<00:06, 87.70it/s]
758
+
759
  32%|███▏ | 275/850 [00:03<00:06, 88.16it/s]
760
+
761
  33%|███▎ | 284/850 [00:03<00:06, 87.65it/s]
762
+
763
  35%|███▍ | 294/850 [00:03<00:06, 88.09it/s]
764
+
765
  36%|███▌ | 303/850 [00:03<00:06, 86.09it/s]
766
+
767
  37%|███▋ | 313/850 [00:03<00:06, 87.89it/s]
768
+
769
  38%|███▊ | 322/850 [00:03<00:06, 87.79it/s]
770
+
771
  39%|███▉ | 331/850 [00:03<00:05, 87.25it/s]
772
+
773
  40%|████ | 340/850 [00:04<00:05, 87.67it/s]
774
+
775
  41%|████ | 349/850 [00:04<00:05, 86.98it/s]
776
+
777
  42%|████▏ | 358/850 [00:04<00:05, 85.62it/s]
778
+
779
  43%|████▎ | 367/850 [00:04<00:05, 85.83it/s]
780
+
781
  44%|████▍ | 376/850 [00:04<00:05, 84.60it/s]
782
+
783
  45%|████▌ | 385/850 [00:04<00:05, 82.20it/s]
784
+
785
  46%|████▋ | 394/850 [00:04<00:05, 80.28it/s]
786
+
787
  47%|████▋ | 403/850 [00:04<00:05, 81.93it/s]
788
+
789
  49%|████▊ | 413/850 [00:04<00:05, 85.88it/s]
790
+
791
  50%|████▉ | 423/850 [00:05<00:04, 87.54it/s]
792
+
793
  51%|█████ | 432/850 [00:05<00:04, 87.55it/s]
794
+
795
  52%|█████▏ | 441/850 [00:05<00:04, 86.85it/s]
796
+
797
  53%|█████▎ | 450/850 [00:05<00:04, 83.69it/s]
798
+
799
  54%|█████▍ | 460/850 [00:05<00:04, 86.80it/s]
800
+
801
  55%|█████▌ | 470/850 [00:05<00:04, 88.96it/s]
802
+
803
  56%|█████▋ | 480/850 [00:05<00:04, 90.23it/s]
804
+
805
  58%|█████▊ | 490/850 [00:05<00:04, 88.23it/s]
806
+
807
  59%|█████▉ | 500/850 [00:05<00:03, 89.90it/s]
808
+
809
  60%|██████ | 510/850 [00:05<00:03, 89.99it/s]
810
+
811
  61%|██████ | 520/850 [00:06<00:03, 90.30it/s]
812
+
813
  62%|██████▏ | 530/850 [00:06<00:03, 86.55it/s]
814
+
815
  63%|██████▎ | 539/850 [00:06<00:03, 85.36it/s]
816
+
817
  64%|██████▍ | 548/850 [00:06<00:03, 83.42it/s]
818
+
819
  66%|██████▌ | 557/850 [00:06<00:03, 83.02it/s]
820
+
821
  67%|██████▋ | 566/850 [00:06<00:03, 80.41it/s]
822
+
823
  68%|██████▊ | 575/850 [00:06<00:03, 82.97it/s]
824
+
825
  69%|██████▊ | 584/850 [00:06<00:03, 83.09it/s]
826
+
827
  70%|██████▉ | 593/850 [00:06<00:03, 84.13it/s]
828
+
829
  71%|███████ | 602/850 [00:07<00:02, 83.90it/s]
830
+
831
  72%|███████▏ | 611/850 [00:07<00:02, 84.78it/s]
832
+
833
  73%|███████▎ | 620/850 [00:07<00:02, 84.83it/s]
834
+
835
  74%|███████▍ | 629/850 [00:07<00:02, 84.00it/s]
836
+
837
  75%|███████▌ | 638/850 [00:07<00:02, 84.53it/s]
838
+
839
  76%|███████▌ | 647/850 [00:07<00:02, 85.90it/s]
840
+
841
  77%|███████▋ | 656/850 [00:07<00:02, 86.93it/s]
842
+
843
  78%|███████▊ | 666/850 [00:07<00:02, 88.25it/s]
844
+
845
  79%|███████▉ | 675/850 [00:07<00:02, 85.00it/s]
846
+
847
  80%|████████ | 684/850 [00:08<00:01, 85.36it/s]
848
+
849
  82%|████████▏ | 693/850 [00:08<00:01, 83.16it/s]
850
+
851
  83%|████████▎ | 702/850 [00:08<00:01, 83.49it/s]
852
+
853
  84%|████████▎ | 711/850 [00:08<00:01, 82.74it/s]
854
+
855
  85%|████████▍ | 720/850 [00:08<00:01, 83.13it/s]
856
+
857
  86%|████████▌ | 729/850 [00:08<00:01, 80.00it/s]
858
+
859
  87%|████████▋ | 738/850 [00:08<00:01, 80.48it/s]
860
+
861
  88%|████████▊ | 747/850 [00:08<00:01, 80.29it/s]
862
+
863
  89%|████████▉ | 756/850 [00:08<00:01, 80.87it/s]
864
+
865
  90%|█████████ | 765/850 [00:09<00:01, 82.87it/s]
866
+
867
  91%|█████████ | 774/850 [00:09<00:00, 82.26it/s]
868
+
869
  92%|█████████▏| 783/850 [00:09<00:00, 80.12it/s]
870
+
871
  93%|█████████▎| 792/850 [00:09<00:00, 79.96it/s]
872
+
873
  94%|█████████▍| 802/850 [00:09<00:00, 83.67it/s]
874
+
875
  95%|█████████▌| 811/850 [00:09<00:00, 81.61it/s]
876
+
877
  97%|█████████▋| 821/850 [00:09<00:00, 85.11it/s]
878
+
879
  98%|█████████▊| 830/850 [00:09<00:00, 84.40it/s]
880
+
881
  99%|█████████▊| 839/850 [00:09<00:00, 84.53it/s]
882
+
883
 
884
+
885
 
886
  10%|█ | 479/4790 [02:24<19:05, 3.76it/s]
887
+
888
+
889
  [INFO|trainer.py:3503] 2024-09-05 13:37:33,956 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-479
890
+ [INFO|configuration_utils.py:472] 2024-09-05 13:37:33,957 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-479/config.json
891
+ [INFO|modeling_utils.py:2799] 2024-09-05 13:37:34,851 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-479/model.safetensors
892
+ [INFO|tokenization_utils_base.py:2684] 2024-09-05 13:37:34,852 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-479/tokenizer_config.json
893
+ [INFO|tokenization_utils_base.py:2693] 2024-09-05 13:37:34,852 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-479/special_tokens_map.json
894
+ [INFO|tokenization_utils_base.py:2684] 2024-09-05 13:37:36,650 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
895
+ [INFO|tokenization_utils_base.py:2693] 2024-09-05 13:37:36,650 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
896
+
897
  10%|█ | 480/4790 [02:27<6:21:25, 5.31s/it]
898
  10%|█ | 481/4790 [02:27<4:31:30, 3.78s/it]
899
  10%|█ | 482/4790 [02:27<3:16:31, 2.74s/it]
900
  10%|█ | 483/4790 [02:28<2:25:49, 2.03s/it]
901
  10%|█ | 484/4790 [02:28<1:46:37, 1.49s/it]
902
  10%|█ | 485/4790 [02:28<1:22:39, 1.15s/it]
903
  10%|█ | 486/4790 [02:29<1:02:21, 1.15it/s]
904
  10%|█ | 487/4790 [02:29<50:31, 1.42it/s]
905
  10%|█ | 488/4790 [02:29<41:55, 1.71it/s]
906
  10%|█ | 489/4790 [02:29<33:13, 2.16it/s]
907
  10%|█ | 490/4790 [02:30<29:22, 2.44it/s]
908
  10%|█ | 491/4790 [02:30<24:58, 2.87it/s]
909
  10%|█ | 492/4790 [02:30<22:15, 3.22it/s]
910
  10%|█ | 493/4790 [02:30<20:11, 3.55it/s]
911
  10%|█ | 494/4790 [02:31<20:24, 3.51it/s]
912
  10%|█ | 495/4790 [02:31<19:16, 3.71it/s]
913
  10%|█ | 496/4790 [02:31<19:33, 3.66it/s]
914
  10%|█ | 497/4790 [02:32<21:30, 3.33it/s]
915
  10%|█ | 498/4790 [02:32<22:35, 3.17it/s]
916
  10%|█ | 499/4790 [02:32<20:38, 3.46it/s]
917
  10%|█ | 500/4790 [02:32<19:03, 3.75it/s]
918
 
919
  10%|█ | 500/4790 [02:32<19:03, 3.75it/s]
920
  10%|█ | 501/4790 [02:33<19:03, 3.75it/s]
921
  10%|█ | 502/4790 [02:33<19:37, 3.64it/s]
922
  11%|█ | 503/4790 [02:33<18:13, 3.92it/s]
923
  11%|█ | 504/4790 [02:34<21:59, 3.25it/s]
924
  11%|█ | 505/4790 [02:34<20:52, 3.42it/s]
925
  11%|█ | 506/4790 [02:34<20:21, 3.51it/s]
926
  11%|█ | 507/4790 [02:34<18:42, 3.82it/s]
927
  11%|█ | 508/4790 [02:35<22:26, 3.18it/s]
928
  11%|█ | 509/4790 [02:35<22:30, 3.17it/s]
929
  11%|█ | 510/4790 [02:35<21:05, 3.38it/s]
930
  11%|█ | 511/4790 [02:36<19:19, 3.69it/s]
931
  11%|█ | 512/4790 [02:36<19:44, 3.61it/s]
932
  11%|█ | 513/4790 [02:36<20:22, 3.50it/s]
933
  11%|█ | 514/4790 [02:36<19:52, 3.59it/s]
934
  11%|█ | 515/4790 [02:37<21:09, 3.37it/s]
935
  11%|█ | 516/4790 [02:37<20:38, 3.45it/s]
936
  11%|█ | 517/4790 [02:37<20:57, 3.40it/s]
937
  11%|█ | 518/4790 [02:38<19:47, 3.60it/s]
938
  11%|█ | 519/4790 [02:38<18:21, 3.88it/s]
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.989888776541962,
3
+ "total_flos": 1.7928149517546354e+16,
4
+ "train_loss": 0.002201772875978276,
5
+ "train_runtime": 1511.1925,
6
+ "train_samples": 31619,
7
+ "train_samples_per_second": 209.232,
8
+ "train_steps_per_second": 3.269
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9140096618357488,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4940",
4
+ "epoch": 9.989888776541962,
5
+ "eval_steps": 500,
6
+ "global_step": 4940,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9989888776541962,
13
+ "eval_accuracy": 0.9982926512594131,
14
+ "eval_f1": 0.8903781713738632,
15
+ "eval_loss": 0.004970682319253683,
16
+ "eval_precision": 0.8806818181818182,
17
+ "eval_recall": 0.9002904162633107,
18
+ "eval_runtime": 13.9779,
19
+ "eval_samples_per_second": 486.339,
20
+ "eval_steps_per_second": 60.81,
21
+ "step": 494
22
+ },
23
+ {
24
+ "epoch": 1.0111223458038423,
25
+ "grad_norm": 0.07247231900691986,
26
+ "learning_rate": 4.4939271255060735e-05,
27
+ "loss": 0.0131,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_accuracy": 0.9985328486107504,
33
+ "eval_f1": 0.9090909090909091,
34
+ "eval_loss": 0.004644014406949282,
35
+ "eval_precision": 0.9034416826003824,
36
+ "eval_recall": 0.914811229428848,
37
+ "eval_runtime": 14.1023,
38
+ "eval_samples_per_second": 482.05,
39
+ "eval_steps_per_second": 60.274,
40
+ "step": 989
41
+ },
42
+ {
43
+ "epoch": 2.0222446916076846,
44
+ "grad_norm": 0.2395874410867691,
45
+ "learning_rate": 3.9878542510121455e-05,
46
+ "loss": 0.0037,
47
+ "step": 1000
48
+ },
49
+ {
50
+ "epoch": 2.998988877654196,
51
+ "eval_accuracy": 0.9983575694624772,
52
+ "eval_f1": 0.9049904030710172,
53
+ "eval_loss": 0.006752336397767067,
54
+ "eval_precision": 0.8972407231208372,
55
+ "eval_recall": 0.9128751210067764,
56
+ "eval_runtime": 13.9972,
57
+ "eval_samples_per_second": 485.67,
58
+ "eval_steps_per_second": 60.727,
59
+ "step": 1483
60
+ },
61
+ {
62
+ "epoch": 3.033367037411527,
63
+ "grad_norm": 0.0013774348190054297,
64
+ "learning_rate": 3.481781376518219e-05,
65
+ "loss": 0.0021,
66
+ "step": 1500
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "eval_accuracy": 0.998338094001558,
71
+ "eval_f1": 0.9043805934997644,
72
+ "eval_loss": 0.006901361979544163,
73
+ "eval_precision": 0.8807339449541285,
74
+ "eval_recall": 0.9293320425943853,
75
+ "eval_runtime": 14.3886,
76
+ "eval_samples_per_second": 472.458,
77
+ "eval_steps_per_second": 59.075,
78
+ "step": 1978
79
+ },
80
+ {
81
+ "epoch": 4.044489383215369,
82
+ "grad_norm": 0.13544714450836182,
83
+ "learning_rate": 2.9757085020242914e-05,
84
+ "loss": 0.0012,
85
+ "step": 2000
86
+ },
87
+ {
88
+ "epoch": 4.998988877654196,
89
+ "eval_accuracy": 0.9984224876655414,
90
+ "eval_f1": 0.9041745730550285,
91
+ "eval_loss": 0.007279807701706886,
92
+ "eval_precision": 0.8865116279069768,
93
+ "eval_recall": 0.9225556631171346,
94
+ "eval_runtime": 14.0239,
95
+ "eval_samples_per_second": 484.743,
96
+ "eval_steps_per_second": 60.611,
97
+ "step": 2472
98
+ },
99
+ {
100
+ "epoch": 5.055611729019211,
101
+ "grad_norm": 0.001985458889976144,
102
+ "learning_rate": 2.4696356275303644e-05,
103
+ "loss": 0.0006,
104
+ "step": 2500
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "eval_accuracy": 0.9984354713061543,
109
+ "eval_f1": 0.9118483412322275,
110
+ "eval_loss": 0.0077171181328594685,
111
+ "eval_precision": 0.89322191272052,
112
+ "eval_recall": 0.9312681510164569,
113
+ "eval_runtime": 14.0472,
114
+ "eval_samples_per_second": 483.939,
115
+ "eval_steps_per_second": 60.51,
116
+ "step": 2967
117
+ },
118
+ {
119
+ "epoch": 6.066734074823054,
120
+ "grad_norm": 0.015543761663138866,
121
+ "learning_rate": 1.9635627530364373e-05,
122
+ "loss": 0.0004,
123
+ "step": 3000
124
+ },
125
+ {
126
+ "epoch": 6.998988877654196,
127
+ "eval_accuracy": 0.998526356790444,
128
+ "eval_f1": 0.9123809523809524,
129
+ "eval_loss": 0.0071807485073804855,
130
+ "eval_precision": 0.8978444236176195,
131
+ "eval_recall": 0.9273959341723137,
132
+ "eval_runtime": 14.0553,
133
+ "eval_samples_per_second": 483.66,
134
+ "eval_steps_per_second": 60.475,
135
+ "step": 3461
136
+ },
137
+ {
138
+ "epoch": 7.077856420626896,
139
+ "grad_norm": 0.0003612766449805349,
140
+ "learning_rate": 1.4574898785425101e-05,
141
+ "loss": 0.0004,
142
+ "step": 3500
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "eval_accuracy": 0.9985847831732018,
147
+ "eval_f1": 0.9133171912832929,
148
+ "eval_loss": 0.007767966017127037,
149
+ "eval_precision": 0.9137596899224806,
150
+ "eval_recall": 0.9128751210067764,
151
+ "eval_runtime": 14.0606,
152
+ "eval_samples_per_second": 483.479,
153
+ "eval_steps_per_second": 60.453,
154
+ "step": 3956
155
+ },
156
+ {
157
+ "epoch": 8.088978766430738,
158
+ "grad_norm": 0.0004361484607215971,
159
+ "learning_rate": 9.51417004048583e-06,
160
+ "loss": 0.0001,
161
+ "step": 4000
162
+ },
163
+ {
164
+ "epoch": 8.998988877654195,
165
+ "eval_accuracy": 0.9985523240716697,
166
+ "eval_f1": 0.9138431752178122,
167
+ "eval_loss": 0.00841750018298626,
168
+ "eval_precision": 0.9138431752178122,
169
+ "eval_recall": 0.9138431752178122,
170
+ "eval_runtime": 14.3137,
171
+ "eval_samples_per_second": 474.93,
172
+ "eval_steps_per_second": 59.384,
173
+ "step": 4450
174
+ },
175
+ {
176
+ "epoch": 9.100101112234581,
177
+ "grad_norm": 0.00019355813856236637,
178
+ "learning_rate": 4.453441295546559e-06,
179
+ "loss": 0.0001,
180
+ "step": 4500
181
+ },
182
+ {
183
+ "epoch": 9.989888776541962,
184
+ "eval_accuracy": 0.9985198649701377,
185
+ "eval_f1": 0.9140096618357488,
186
+ "eval_loss": 0.008521749638020992,
187
+ "eval_precision": 0.9122468659594986,
188
+ "eval_recall": 0.9157792836398838,
189
+ "eval_runtime": 14.4488,
190
+ "eval_samples_per_second": 470.491,
191
+ "eval_steps_per_second": 58.829,
192
+ "step": 4940
193
+ },
194
+ {
195
+ "epoch": 9.989888776541962,
196
+ "step": 4940,
197
+ "total_flos": 1.7928149517546354e+16,
198
+ "train_loss": 0.002201772875978276,
199
+ "train_runtime": 1511.1925,
200
+ "train_samples_per_second": 209.232,
201
+ "train_steps_per_second": 3.269
202
+ }
203
+ ],
204
+ "logging_steps": 500,
205
+ "max_steps": 4940,
206
+ "num_input_tokens_seen": 0,
207
+ "num_train_epochs": 10,
208
+ "save_steps": 500,
209
+ "stateful_callbacks": {
210
+ "TrainerControl": {
211
+ "args": {
212
+ "should_epoch_stop": false,
213
+ "should_evaluate": false,
214
+ "should_log": false,
215
+ "should_save": true,
216
+ "should_training_stop": true
217
+ },
218
+ "attributes": {}
219
+ }
220
+ },
221
+ "total_flos": 1.7928149517546354e+16,
222
+ "train_batch_size": 32,
223
+ "trial_name": null,
224
+ "trial_params": null
225
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
3
+ size 5240
vocab.txt ADDED
The diff for this file is too large to render. See raw diff