Rodrigo1771 commited on
Commit
cb21a7d
β€’
1 Parent(s): 653ab91

Training in progress, epoch 1

Browse files
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "michiyasunaga/BioLinkBERT-base",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "ner",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "O",
15
+ "1": "B-FARMACO",
16
+ "2": "I-FARMACO"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "B-FARMACO": 1,
22
+ "I-FARMACO": 2,
23
+ "O": 0
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "bert",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.44.2",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 28895
37
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3ceb2eb18620368af7015f3678369f766b8eb56aa29d7073dc117bce300c203
3
+ size 430601004
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tb/events.out.tfevents.1725569221.c3806e32a2f8.1237.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dce1bb39d0512415fbdfee1e9bab2de73533964f2c80d90eacdd674f9edf972
3
+ size 5593
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
train.log ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/4370 [00:00<?, ?it/s]
1
  0%| | 1/4370 [00:01<2:06:45, 1.74s/it]
2
  0%| | 2/4370 [00:01<1:01:47, 1.18it/s]
3
  0%| | 3/4370 [00:02<45:00, 1.62it/s]
4
  0%| | 4/4370 [00:02<31:33, 2.31it/s]
5
  0%| | 5/4370 [00:02<25:03, 2.90it/s]
6
  0%| | 6/4370 [00:02<20:29, 3.55it/s]
7
  0%| | 7/4370 [00:03<18:58, 3.83it/s]
8
  0%| | 8/4370 [00:03<17:35, 4.13it/s]
9
  0%| | 9/4370 [00:03<16:39, 4.36it/s]
10
  0%| | 10/4370 [00:03<16:22, 4.44it/s]
11
  0%| | 11/4370 [00:03<18:38, 3.90it/s]
12
  0%| | 12/4370 [00:04<16:48, 4.32it/s]
13
  0%| | 13/4370 [00:04<16:17, 4.46it/s]
14
  0%| | 14/4370 [00:04<15:45, 4.61it/s]
15
  0%| | 15/4370 [00:04<14:35, 4.98it/s]
16
  0%| | 16/4370 [00:04<14:45, 4.92it/s]
17
  0%| | 17/4370 [00:05<14:03, 5.16it/s]
18
  0%| | 18/4370 [00:05<13:30, 5.37it/s]
19
  0%| | 19/4370 [00:05<13:12, 5.49it/s]
20
  0%| | 20/4370 [00:05<15:28, 4.69it/s]
21
  0%| | 21/4370 [00:05<14:54, 4.86it/s]
22
  1%| | 22/4370 [00:06<14:39, 4.94it/s]
23
  1%| | 23/4370 [00:06<16:12, 4.47it/s]
24
  1%| | 24/4370 [00:06<15:39, 4.63it/s]
25
  1%| | 25/4370 [00:06<15:31, 4.66it/s]
26
  1%| | 26/4370 [00:07<16:39, 4.35it/s]
27
  1%| | 27/4370 [00:07<16:12, 4.47it/s]
28
  1%| | 28/4370 [00:07<15:38, 4.63it/s]
29
  1%| | 29/4370 [00:07<17:41, 4.09it/s]
30
  1%| | 30/4370 [00:08<17:28, 4.14it/s]
31
  1%| | 31/4370 [00:08<17:06, 4.23it/s]
32
  1%| | 32/4370 [00:08<16:00, 4.52it/s]
33
  1%| | 33/4370 [00:08<15:28, 4.67it/s]
34
  1%| | 34/4370 [00:08<16:06, 4.49it/s]
35
  1%| | 35/4370 [00:09<14:53, 4.85it/s]
36
  1%| | 36/4370 [00:09<14:34, 4.96it/s]
37
  1%| | 37/4370 [00:09<13:39, 5.29it/s]
38
  1%| | 38/4370 [00:09<13:35, 5.31it/s]
39
  1%| | 39/4370 [00:09<13:02, 5.53it/s]
40
  1%| | 40/4370 [00:09<12:00, 6.01it/s]
41
  1%| | 41/4370 [00:10<11:55, 6.05it/s]
42
  1%| | 42/4370 [00:10<13:36, 5.30it/s]
43
  1%| | 43/4370 [00:10<14:23, 5.01it/s]
44
  1%| | 44/4370 [00:10<14:43, 4.90it/s]
45
  1%| | 45/4370 [00:10<14:29, 4.98it/s]
46
  1%| | 46/4370 [00:11<14:08, 5.09it/s]
47
  1%| | 47/4370 [00:11<14:05, 5.11it/s]
48
  1%| | 48/4370 [00:11<16:25, 4.39it/s]
49
  1%| | 49/4370 [00:11<17:05, 4.21it/s]
50
  1%| | 50/4370 [00:12<16:45, 4.30it/s]
51
  1%| | 51/4370 [00:12<15:45, 4.57it/s]
52
  1%| | 52/4370 [00:12<14:55, 4.82it/s]
53
  1%| | 53/4370 [00:12<13:41, 5.26it/s]
54
  1%| | 54/4370 [00:12<14:42, 4.89it/s]
55
  1%|▏ | 55/4370 [00:13<15:50, 4.54it/s]
56
  1%|▏ | 56/4370 [00:13<15:18, 4.70it/s]
57
  1%|▏ | 57/4370 [00:13<15:10, 4.74it/s]
58
  1%|▏ | 58/4370 [00:13<13:57, 5.15it/s]
59
  1%|▏ | 59/4370 [00:13<14:19, 5.02it/s]
60
  1%|▏ | 60/4370 [00:14<14:06, 5.09it/s]
61
  1%|▏ | 61/4370 [00:14<13:29, 5.32it/s]
62
  1%|▏ | 62/4370 [00:14<13:38, 5.26it/s]
63
  1%|▏ | 63/4370 [00:14<13:40, 5.25it/s]
64
  1%|▏ | 64/4370 [00:14<13:15, 5.41it/s]
65
  1%|▏ | 65/4370 [00:14<13:13, 5.43it/s]
66
  2%|▏ | 66/4370 [00:15<13:26, 5.33it/s]
67
  2%|▏ | 67/4370 [00:15<13:04, 5.49it/s]
68
  2%|▏ | 68/4370 [00:15<13:10, 5.44it/s]
69
  2%|▏ | 69/4370 [00:15<12:41, 5.65it/s]
70
  2%|▏ | 70/4370 [00:15<12:56, 5.54it/s]
71
  2%|▏ | 71/4370 [00:16<13:17, 5.39it/s]
72
  2%|▏ | 72/4370 [00:16<13:22, 5.35it/s]
73
  2%|▏ | 73/4370 [00:16<13:01, 5.50it/s]
74
  2%|▏ | 74/4370 [00:16<12:39, 5.66it/s]
75
  2%|▏ | 75/4370 [00:16<12:30, 5.72it/s]
76
  2%|▏ | 76/4370 [00:16<13:10, 5.44it/s]
77
  2%|▏ | 77/4370 [00:17<13:13, 5.41it/s]
78
  2%|▏ | 78/4370 [00:17<13:50, 5.17it/s]
79
  2%|▏ | 79/4370 [00:17<15:01, 4.76it/s]
80
  2%|▏ | 80/4370 [00:17<15:16, 4.68it/s]
81
  2%|▏ | 81/4370 [00:17<14:21, 4.98it/s]
82
  2%|▏ | 82/4370 [00:18<15:20, 4.66it/s]
83
  2%|▏ | 83/4370 [00:18<14:35, 4.89it/s]
84
  2%|▏ | 84/4370 [00:18<14:17, 5.00it/s]
85
  2%|▏ | 85/4370 [00:18<13:48, 5.17it/s]
86
  2%|▏ | 86/4370 [00:18<13:32, 5.27it/s]
87
  2%|▏ | 87/4370 [00:19<14:42, 4.85it/s]
88
  2%|▏ | 88/4370 [00:19<14:55, 4.78it/s]
89
  2%|▏ | 89/4370 [00:19<14:05, 5.06it/s]
90
  2%|▏ | 90/4370 [00:19<12:54, 5.53it/s]
91
  2%|▏ | 91/4370 [00:19<13:10, 5.41it/s]
92
  2%|▏ | 92/4370 [00:20<13:17, 5.36it/s]
93
  2%|▏ | 93/4370 [00:20<14:18, 4.98it/s]
94
  2%|▏ | 94/4370 [00:20<14:15, 5.00it/s]
95
  2%|▏ | 95/4370 [00:20<13:33, 5.25it/s]
96
  2%|▏ | 96/4370 [00:20<13:16, 5.37it/s]
97
  2%|▏ | 97/4370 [00:21<15:12, 4.68it/s]
98
  2%|▏ | 98/4370 [00:21<14:42, 4.84it/s]
99
  2%|▏ | 99/4370 [00:21<15:04, 4.72it/s]
100
  2%|▏ | 100/4370 [00:21<18:08, 3.92it/s]
101
  2%|▏ | 101/4370 [00:22<17:42, 4.02it/s]
102
  2%|▏ | 102/4370 [00:22<16:38, 4.28it/s]
103
  2%|▏ | 103/4370 [00:22<16:15, 4.37it/s]
104
  2%|▏ | 104/4370 [00:22<16:12, 4.39it/s]
105
  2%|▏ | 105/4370 [00:23<15:14, 4.66it/s]
106
  2%|▏ | 106/4370 [00:23<14:16, 4.98it/s]
107
  2%|▏ | 107/4370 [00:23<13:59, 5.08it/s]
108
  2%|▏ | 108/4370 [00:23<14:43, 4.83it/s]
109
  2%|▏ | 109/4370 [00:23<14:26, 4.92it/s]
110
  3%|β–Ž | 110/4370 [00:23<14:12, 5.00it/s]
111
  3%|β–Ž | 111/4370 [00:24<14:39, 4.84it/s]
112
  3%|β–Ž | 112/4370 [00:24<14:02, 5.05it/s]
113
  3%|β–Ž | 113/4370 [00:24<15:21, 4.62it/s]
114
  3%|β–Ž | 114/4370 [00:24<14:57, 4.74it/s]
115
  3%|β–Ž | 115/4370 [00:25<14:05, 5.03it/s]
116
  3%|β–Ž | 116/4370 [00:25<15:06, 4.69it/s]
117
  3%|β–Ž | 117/4370 [00:25<15:10, 4.67it/s]
118
  3%|β–Ž | 118/4370 [00:25<15:20, 4.62it/s]
119
  3%|β–Ž | 119/4370 [00:25<14:25, 4.91it/s]
120
  3%|β–Ž | 120/4370 [00:26<15:01, 4.72it/s]
121
  3%|β–Ž | 121/4370 [00:26<14:28, 4.89it/s]
122
  3%|β–Ž | 122/4370 [00:26<15:04, 4.70it/s]
123
  3%|β–Ž | 123/4370 [00:26<13:36, 5.20it/s]
124
  3%|β–Ž | 124/4370 [00:26<13:44, 5.15it/s]
125
  3%|β–Ž | 125/4370 [00:27<14:28, 4.89it/s]
126
  3%|β–Ž | 126/4370 [00:27<14:49, 4.77it/s]
127
  3%|β–Ž | 127/4370 [00:27<13:41, 5.17it/s]
128
  3%|β–Ž | 128/4370 [00:27<12:56, 5.47it/s]
129
  3%|β–Ž | 129/4370 [00:27<12:52, 5.49it/s]
130
  3%|β–Ž | 130/4370 [00:27<13:10, 5.37it/s]
131
  3%|β–Ž | 131/4370 [00:28<12:51, 5.49it/s]
132
  3%|β–Ž | 132/4370 [00:28<14:02, 5.03it/s]
133
  3%|β–Ž | 133/4370 [00:28<13:44, 5.14it/s]
134
  3%|β–Ž | 134/4370 [00:28<12:50, 5.50it/s]
135
  3%|β–Ž | 135/4370 [00:28<13:05, 5.39it/s]
136
  3%|β–Ž | 136/4370 [00:29<13:24, 5.26it/s]
137
  3%|β–Ž | 137/4370 [00:29<13:15, 5.32it/s]
138
  3%|β–Ž | 138/4370 [00:29<14:03, 5.02it/s]
139
  3%|β–Ž | 139/4370 [00:29<13:13, 5.33it/s]
140
  3%|β–Ž | 140/4370 [00:29<13:00, 5.42it/s]
141
  3%|β–Ž | 141/4370 [00:30<13:06, 5.38it/s]
142
  3%|β–Ž | 142/4370 [00:30<12:44, 5.53it/s]
143
  3%|β–Ž | 143/4370 [00:30<14:48, 4.76it/s]
144
  3%|β–Ž | 144/4370 [00:30<15:31, 4.54it/s]
145
  3%|β–Ž | 145/4370 [00:30<14:01, 5.02it/s]
146
  3%|β–Ž | 146/4370 [00:31<13:28, 5.22it/s]
147
  3%|β–Ž | 147/4370 [00:31<13:49, 5.09it/s]
148
  3%|β–Ž | 148/4370 [00:31<13:42, 5.13it/s]
149
  3%|β–Ž | 149/4370 [00:31<13:18, 5.28it/s]
150
  3%|β–Ž | 150/4370 [00:31<12:53, 5.46it/s]
151
  3%|β–Ž | 151/4370 [00:32<12:32, 5.61it/s]
152
  3%|β–Ž | 152/4370 [00:32<12:17, 5.72it/s]
153
  4%|β–Ž | 153/4370 [00:32<13:58, 5.03it/s]
154
  4%|β–Ž | 154/4370 [00:32<14:12, 4.94it/s]
155
  4%|β–Ž | 155/4370 [00:32<14:00, 5.02it/s]
156
  4%|β–Ž | 156/4370 [00:33<13:33, 5.18it/s]
157
  4%|β–Ž | 157/4370 [00:33<12:58, 5.41it/s]
158
  4%|β–Ž | 158/4370 [00:33<13:47, 5.09it/s]
159
  4%|β–Ž | 159/4370 [00:33<13:47, 5.09it/s]
160
  4%|β–Ž | 160/4370 [00:33<12:46, 5.49it/s]
161
  4%|β–Ž | 161/4370 [00:33<12:23, 5.66it/s]
162
  4%|β–Ž | 162/4370 [00:34<12:00, 5.84it/s]
163
  4%|β–Ž | 163/4370 [00:34<12:57, 5.41it/s]
164
  4%|▍ | 164/4370 [00:34<13:50, 5.06it/s]
165
  4%|▍ | 165/4370 [00:34<13:06, 5.34it/s]
166
  4%|▍ | 166/4370 [00:34<13:41, 5.12it/s]
167
  4%|▍ | 167/4370 [00:35<15:58, 4.38it/s]
168
  4%|▍ | 168/4370 [00:35<18:16, 3.83it/s]
169
  4%|▍ | 169/4370 [00:35<16:34, 4.23it/s]
170
  4%|▍ | 170/4370 [00:35<17:41, 3.96it/s]
171
  4%|▍ | 171/4370 [00:36<17:02, 4.10it/s]
172
  4%|▍ | 172/4370 [00:36<15:22, 4.55it/s]
173
  4%|▍ | 173/4370 [00:36<16:15, 4.30it/s]
174
  4%|▍ | 174/4370 [00:36<15:08, 4.62it/s]
175
  4%|▍ | 175/4370 [00:36<13:27, 5.19it/s]
176
  4%|▍ | 176/4370 [00:37<12:07, 5.76it/s]
177
  4%|▍ | 177/4370 [00:37<11:33, 6.04it/s]
178
  4%|▍ | 178/4370 [00:37<11:21, 6.15it/s]
179
  4%|▍ | 179/4370 [00:37<11:41, 5.98it/s]
180
  4%|▍ | 180/4370 [00:37<11:42, 5.97it/s]
181
  4%|▍ | 181/4370 [00:37<12:23, 5.63it/s]
182
  4%|▍ | 182/4370 [00:38<12:23, 5.63it/s]
183
  4%|▍ | 183/4370 [00:38<12:55, 5.40it/s]
184
  4%|▍ | 184/4370 [00:38<12:51, 5.43it/s]
185
  4%|▍ | 185/4370 [00:38<13:50, 5.04it/s]
186
  4%|▍ | 186/4370 [00:38<12:40, 5.50it/s]
187
  4%|▍ | 187/4370 [00:39<12:10, 5.73it/s]
188
  4%|▍ | 188/4370 [00:39<12:27, 5.60it/s]
189
  4%|▍ | 189/4370 [00:39<12:00, 5.80it/s]
190
  4%|▍ | 190/4370 [00:39<14:08, 4.92it/s]
191
  4%|▍ | 191/4370 [00:39<14:58, 4.65it/s]
192
  4%|▍ | 192/4370 [00:40<16:15, 4.28it/s]
193
  4%|▍ | 193/4370 [00:40<14:54, 4.67it/s]
194
  4%|▍ | 194/4370 [00:40<13:28, 5.17it/s]
195
  4%|▍ | 195/4370 [00:40<13:34, 5.13it/s]
196
  4%|▍ | 196/4370 [00:40<13:01, 5.34it/s]
197
  5%|▍ | 197/4370 [00:41<15:56, 4.36it/s]
198
  5%|▍ | 198/4370 [00:41<14:32, 4.78it/s]
199
  5%|▍ | 199/4370 [00:41<13:11, 5.27it/s]
200
  5%|▍ | 200/4370 [00:41<14:09, 4.91it/s]
201
  5%|▍ | 201/4370 [00:42<15:47, 4.40it/s]
202
  5%|▍ | 202/4370 [00:42<15:29, 4.49it/s]
203
  5%|▍ | 203/4370 [00:42<15:33, 4.46it/s]
204
  5%|▍ | 204/4370 [00:42<15:09, 4.58it/s]
205
  5%|▍ | 205/4370 [00:42<15:35, 4.45it/s]
206
  5%|▍ | 206/4370 [00:43<14:41, 4.73it/s]
207
  5%|▍ | 207/4370 [00:43<14:44, 4.71it/s]
208
  5%|▍ | 208/4370 [00:43<13:42, 5.06it/s]
209
  5%|▍ | 209/4370 [00:43<13:25, 5.17it/s]
210
  5%|▍ | 210/4370 [00:43<13:05, 5.29it/s]
211
  5%|▍ | 211/4370 [00:44<13:24, 5.17it/s]
212
  5%|▍ | 212/4370 [00:44<12:46, 5.42it/s]
213
  5%|▍ | 213/4370 [00:44<12:02, 5.75it/s]
214
  5%|▍ | 214/4370 [00:44<12:02, 5.75it/s]
215
  5%|▍ | 215/4370 [00:44<12:32, 5.52it/s]
216
  5%|▍ | 216/4370 [00:44<13:01, 5.31it/s]
217
  5%|▍ | 217/4370 [00:45<13:15, 5.22it/s]
218
  5%|▍ | 218/4370 [00:45<13:42, 5.05it/s]
219
  5%|β–Œ | 219/4370 [00:45<17:13, 4.02it/s]
220
  5%|β–Œ | 220/4370 [00:45<16:01, 4.32it/s]
221
  5%|β–Œ | 221/4370 [00:46<14:55, 4.63it/s]
222
  5%|β–Œ | 222/4370 [00:46<14:00, 4.94it/s]
223
  5%|β–Œ | 223/4370 [00:46<13:55, 4.97it/s]
224
  5%|β–Œ | 224/4370 [00:46<13:15, 5.21it/s]
225
  5%|β–Œ | 225/4370 [00:46<13:30, 5.11it/s]
226
  5%|β–Œ | 226/4370 [00:46<13:34, 5.09it/s]
227
  5%|β–Œ | 227/4370 [00:47<15:00, 4.60it/s]
228
  5%|β–Œ | 228/4370 [00:47<14:02, 4.92it/s]
229
  5%|β–Œ | 229/4370 [00:47<12:59, 5.31it/s]
230
  5%|β–Œ | 230/4370 [00:47<12:51, 5.37it/s]
231
  5%|β–Œ | 231/4370 [00:47<12:34, 5.48it/s]
232
  5%|β–Œ | 232/4370 [00:48<12:41, 5.43it/s]
233
  5%|β–Œ | 233/4370 [00:48<12:36, 5.47it/s]
234
  5%|β–Œ | 234/4370 [00:48<12:18, 5.60it/s]
235
  5%|β–Œ | 235/4370 [00:48<11:43, 5.88it/s]
236
  5%|β–Œ | 236/4370 [00:48<12:29, 5.52it/s]
237
  5%|β–Œ | 237/4370 [00:49<12:42, 5.42it/s]
238
  5%|β–Œ | 238/4370 [00:49<12:37, 5.45it/s]
239
  5%|β–Œ | 239/4370 [00:49<12:18, 5.59it/s]
240
  5%|β–Œ | 240/4370 [00:49<14:41, 4.69it/s]
241
  6%|β–Œ | 241/4370 [00:49<14:45, 4.66it/s]
242
  6%|β–Œ | 242/4370 [00:50<15:07, 4.55it/s]
243
  6%|β–Œ | 243/4370 [00:50<13:46, 4.99it/s]
244
  6%|β–Œ | 244/4370 [00:50<13:42, 5.02it/s]
245
  6%|β–Œ | 245/4370 [00:50<13:13, 5.20it/s]
246
  6%|β–Œ | 246/4370 [00:50<12:32, 5.48it/s]
247
  6%|β–Œ | 247/4370 [00:50<12:36, 5.45it/s]
248
  6%|β–Œ | 248/4370 [00:51<12:18, 5.58it/s]
249
  6%|β–Œ | 249/4370 [00:51<12:33, 5.47it/s]
250
  6%|β–Œ | 250/4370 [00:51<13:26, 5.11it/s]
251
  6%|β–Œ | 251/4370 [00:51<12:58, 5.29it/s]
252
  6%|β–Œ | 252/4370 [00:51<13:07, 5.23it/s]
253
  6%|β–Œ | 253/4370 [00:52<13:03, 5.26it/s]
254
  6%|β–Œ | 254/4370 [00:52<12:24, 5.53it/s]
255
  6%|β–Œ | 255/4370 [00:52<12:03, 5.68it/s]
256
  6%|β–Œ | 256/4370 [00:52<11:50, 5.79it/s]
257
  6%|β–Œ | 257/4370 [00:52<11:39, 5.88it/s]
258
  6%|β–Œ | 258/4370 [00:52<11:23, 6.02it/s]
259
  6%|β–Œ | 259/4370 [00:53<10:40, 6.42it/s]
260
  6%|β–Œ | 260/4370 [00:53<10:58, 6.24it/s]
261
  6%|β–Œ | 261/4370 [00:53<11:32, 5.93it/s]
262
  6%|β–Œ | 262/4370 [00:53<11:50, 5.78it/s]
263
  6%|β–Œ | 263/4370 [00:53<12:07, 5.64it/s]
264
  6%|β–Œ | 264/4370 [00:53<11:43, 5.83it/s]
265
  6%|β–Œ | 265/4370 [00:54<12:58, 5.27it/s]
266
  6%|β–Œ | 266/4370 [00:54<12:28, 5.48it/s]
267
  6%|β–Œ | 267/4370 [00:54<12:51, 5.32it/s]
268
  6%|β–Œ | 268/4370 [00:54<12:53, 5.30it/s]
269
  6%|β–Œ | 269/4370 [00:54<13:27, 5.08it/s]
270
  6%|β–Œ | 270/4370 [00:55<12:52, 5.31it/s]
271
  6%|β–Œ | 271/4370 [00:55<12:42, 5.38it/s]
272
  6%|β–Œ | 272/4370 [00:55<11:57, 5.71it/s]
273
  6%|β–Œ | 273/4370 [00:55<11:14, 6.07it/s]
274
  6%|β–‹ | 274/4370 [00:55<11:29, 5.94it/s]
275
  6%|β–‹ | 275/4370 [00:55<11:16, 6.05it/s]
276
  6%|β–‹ | 276/4370 [00:56<11:20, 6.02it/s]
277
  6%|β–‹ | 277/4370 [00:56<11:36, 5.88it/s]
278
  6%|β–‹ | 278/4370 [00:56<12:08, 5.61it/s]
279
  6%|β–‹ | 279/4370 [00:56<11:42, 5.82it/s]
280
  6%|β–‹ | 280/4370 [00:56<11:37, 5.86it/s]
281
  6%|β–‹ | 281/4370 [00:57<15:06, 4.51it/s]
282
  6%|β–‹ | 282/4370 [00:57<13:59, 4.87it/s]
283
  6%|β–‹ | 283/4370 [00:57<12:54, 5.27it/s]
284
  6%|β–‹ | 284/4370 [00:57<13:46, 4.94it/s]
285
  7%|β–‹ | 285/4370 [00:57<12:28, 5.46it/s]
286
  7%|β–‹ | 286/4370 [00:58<12:00, 5.67it/s]
287
  7%|β–‹ | 287/4370 [00:58<11:41, 5.82it/s]
288
  7%|β–‹ | 288/4370 [00:58<14:06, 4.82it/s]
289
  7%|β–‹ | 289/4370 [00:58<14:23, 4.73it/s]
290
  7%|β–‹ | 290/4370 [00:58<13:58, 4.87it/s]
291
  7%|β–‹ | 291/4370 [00:59<13:33, 5.01it/s]
292
  7%|β–‹ | 292/4370 [00:59<14:16, 4.76it/s]
293
  7%|β–‹ | 293/4370 [00:59<16:31, 4.11it/s]
294
  7%|β–‹ | 294/4370 [00:59<15:47, 4.30it/s]
295
  7%|β–‹ | 295/4370 [01:00<14:56, 4.55it/s]
296
  7%|β–‹ | 296/4370 [01:00<14:46, 4.60it/s]
297
  7%|β–‹ | 297/4370 [01:00<13:47, 4.92it/s]
298
  7%|β–‹ | 298/4370 [01:00<15:27, 4.39it/s]
299
  7%|β–‹ | 299/4370 [01:00<14:30, 4.68it/s]
300
  7%|β–‹ | 300/4370 [01:01<14:23, 4.71it/s]
301
  7%|β–‹ | 301/4370 [01:01<13:30, 5.02it/s]
302
  7%|β–‹ | 302/4370 [01:01<13:50, 4.90it/s]
303
  7%|β–‹ | 303/4370 [01:01<14:39, 4.62it/s]
304
  7%|β–‹ | 304/4370 [01:01<13:16, 5.11it/s]
305
  7%|β–‹ | 305/4370 [01:02<12:30, 5.42it/s]
306
  7%|β–‹ | 306/4370 [01:02<11:57, 5.66it/s]
307
  7%|β–‹ | 307/4370 [01:02<11:08, 6.08it/s]
308
  7%|β–‹ | 308/4370 [01:02<10:51, 6.23it/s]
309
  7%|β–‹ | 309/4370 [01:02<11:55, 5.68it/s]
310
  7%|β–‹ | 310/4370 [01:02<12:17, 5.51it/s]
311
  7%|β–‹ | 311/4370 [01:03<12:34, 5.38it/s]
312
  7%|β–‹ | 312/4370 [01:03<12:23, 5.46it/s]
313
  7%|β–‹ | 313/4370 [01:03<13:14, 5.10it/s]
314
  7%|β–‹ | 314/4370 [01:03<12:38, 5.35it/s]
315
  7%|β–‹ | 315/4370 [01:03<12:40, 5.33it/s]
316
  7%|β–‹ | 316/4370 [01:03<12:39, 5.34it/s]
317
  7%|β–‹ | 317/4370 [01:04<12:12, 5.54it/s]
318
  7%|β–‹ | 318/4370 [01:04<13:37, 4.96it/s]
319
  7%|β–‹ | 319/4370 [01:04<13:45, 4.91it/s]
320
  7%|β–‹ | 320/4370 [01:04<14:14, 4.74it/s]
321
  7%|β–‹ | 321/4370 [01:05<14:06, 4.78it/s]
322
  7%|β–‹ | 322/4370 [01:05<13:02, 5.17it/s]
323
  7%|β–‹ | 323/4370 [01:05<11:47, 5.72it/s]
324
  7%|β–‹ | 324/4370 [01:05<11:47, 5.72it/s]
325
  7%|β–‹ | 325/4370 [01:05<11:29, 5.87it/s]
326
  7%|β–‹ | 326/4370 [01:05<11:53, 5.67it/s]
327
  7%|β–‹ | 327/4370 [01:06<11:57, 5.63it/s]
328
  8%|β–Š | 328/4370 [01:06<11:54, 5.65it/s]
329
  8%|β–Š | 329/4370 [01:06<12:36, 5.34it/s]
330
  8%|β–Š | 330/4370 [01:06<14:24, 4.67it/s]
331
  8%|β–Š | 331/4370 [01:06<13:43, 4.90it/s]
332
  8%|β–Š | 332/4370 [01:07<14:43, 4.57it/s]
333
  8%|β–Š | 333/4370 [01:07<13:56, 4.83it/s]
334
  8%|β–Š | 334/4370 [01:07<12:54, 5.21it/s]
335
  8%|β–Š | 335/4370 [01:07<12:38, 5.32it/s]
336
  8%|β–Š | 336/4370 [01:07<13:10, 5.10it/s]
337
  8%|β–Š | 337/4370 [01:08<12:53, 5.21it/s]
338
  8%|β–Š | 338/4370 [01:08<12:20, 5.45it/s]
339
  8%|β–Š | 339/4370 [01:08<12:01, 5.59it/s]
340
  8%|β–Š | 340/4370 [01:08<12:22, 5.43it/s]
341
  8%|β–Š | 341/4370 [01:08<12:14, 5.48it/s]
342
  8%|β–Š | 342/4370 [01:08<12:04, 5.56it/s]
343
  8%|β–Š | 343/4370 [01:09<14:07, 4.75it/s]
344
  8%|β–Š | 344/4370 [01:09<13:17, 5.05it/s]
345
  8%|β–Š | 345/4370 [01:09<14:02, 4.78it/s]
346
  8%|β–Š | 346/4370 [01:09<15:17, 4.39it/s]
347
  8%|β–Š | 347/4370 [01:10<14:10, 4.73it/s]
348
  8%|β–Š | 348/4370 [01:10<14:04, 4.76it/s]
349
  8%|β–Š | 349/4370 [01:10<13:58, 4.79it/s]
350
  8%|β–Š | 350/4370 [01:10<14:39, 4.57it/s]
351
  8%|β–Š | 351/4370 [01:10<14:24, 4.65it/s]
352
  8%|β–Š | 352/4370 [01:11<14:48, 4.52it/s]
353
  8%|β–Š | 353/4370 [01:11<15:40, 4.27it/s]
354
  8%|β–Š | 354/4370 [01:11<14:22, 4.65it/s]
355
  8%|β–Š | 355/4370 [01:12<21:07, 3.17it/s]
356
  8%|β–Š | 356/4370 [01:12<19:30, 3.43it/s]
357
  8%|β–Š | 357/4370 [01:12<18:03, 3.71it/s]
358
  8%|β–Š | 358/4370 [01:12<16:24, 4.07it/s]
359
  8%|β–Š | 359/4370 [01:12<15:15, 4.38it/s]
360
  8%|β–Š | 360/4370 [01:13<14:40, 4.55it/s]
361
  8%|β–Š | 361/4370 [01:13<14:47, 4.52it/s]
362
  8%|β–Š | 362/4370 [01:13<14:35, 4.58it/s]
363
  8%|β–Š | 363/4370 [01:13<13:24, 4.98it/s]
364
  8%|β–Š | 364/4370 [01:14<15:51, 4.21it/s]
365
  8%|β–Š | 365/4370 [01:14<14:39, 4.56it/s]
366
  8%|β–Š | 366/4370 [01:14<15:40, 4.26it/s]
367
  8%|β–Š | 367/4370 [01:14<14:59, 4.45it/s]
368
  8%|β–Š | 368/4370 [01:14<13:25, 4.97it/s]
369
  8%|β–Š | 369/4370 [01:15<12:57, 5.14it/s]
370
  8%|β–Š | 370/4370 [01:15<12:21, 5.39it/s]
371
  8%|β–Š | 371/4370 [01:15<14:17, 4.67it/s]
372
  9%|β–Š | 372/4370 [01:15<13:56, 4.78it/s]
373
  9%|β–Š | 373/4370 [01:15<13:01, 5.11it/s]
374
  9%|β–Š | 374/4370 [01:16<16:01, 4.16it/s]
375
  9%|β–Š | 375/4370 [01:16<14:59, 4.44it/s]
376
  9%|β–Š | 376/4370 [01:16<13:31, 4.92it/s]
377
  9%|β–Š | 377/4370 [01:16<12:49, 5.19it/s]
378
  9%|β–Š | 378/4370 [01:16<12:49, 5.19it/s]
379
  9%|β–Š | 379/4370 [01:17<13:49, 4.81it/s]
380
  9%|β–Š | 380/4370 [01:17<13:22, 4.97it/s]
381
  9%|β–Š | 381/4370 [01:17<13:40, 4.86it/s]
382
  9%|β–Š | 382/4370 [01:17<14:12, 4.68it/s]
383
  9%|β–‰ | 383/4370 [01:17<12:48, 5.19it/s]
384
  9%|β–‰ | 384/4370 [01:18<13:08, 5.05it/s]
385
  9%|β–‰ | 385/4370 [01:18<13:33, 4.90it/s]
386
  9%|β–‰ | 386/4370 [01:18<13:35, 4.88it/s]
387
  9%|β–‰ | 387/4370 [01:18<14:55, 4.45it/s]
388
  9%|β–‰ | 388/4370 [01:19<13:31, 4.91it/s]
389
  9%|β–‰ | 389/4370 [01:19<12:17, 5.40it/s]
390
  9%|β–‰ | 390/4370 [01:19<13:00, 5.10it/s]
391
  9%|β–‰ | 391/4370 [01:19<12:16, 5.40it/s]
392
  9%|β–‰ | 392/4370 [01:19<11:54, 5.57it/s]
393
  9%|β–‰ | 393/4370 [01:20<15:09, 4.37it/s]
394
  9%|β–‰ | 394/4370 [01:20<14:43, 4.50it/s]
395
  9%|β–‰ | 395/4370 [01:20<13:39, 4.85it/s]
396
  9%|β–‰ | 396/4370 [01:20<13:40, 4.84it/s]
397
  9%|β–‰ | 397/4370 [01:20<13:20, 4.96it/s]
398
  9%|β–‰ | 398/4370 [01:21<13:27, 4.92it/s]
399
  9%|β–‰ | 399/4370 [01:21<13:08, 5.04it/s]
400
  9%|β–‰ | 400/4370 [01:21<13:02, 5.07it/s]
401
  9%|β–‰ | 401/4370 [01:21<13:21, 4.95it/s]
402
  9%|β–‰ | 402/4370 [01:21<13:17, 4.98it/s]
403
  9%|β–‰ | 403/4370 [01:21<12:43, 5.20it/s]
404
  9%|β–‰ | 404/4370 [01:22<13:02, 5.07it/s]
405
  9%|β–‰ | 405/4370 [01:22<12:05, 5.46it/s]
406
  9%|β–‰ | 406/4370 [01:22<11:16, 5.86it/s]
407
  9%|β–‰ | 407/4370 [01:22<10:41, 6.17it/s]
408
  9%|β–‰ | 408/4370 [01:22<11:49, 5.59it/s]
409
  9%|β–‰ | 409/4370 [01:23<12:47, 5.16it/s]
410
  9%|β–‰ | 410/4370 [01:23<14:15, 4.63it/s]
411
  9%|β–‰ | 411/4370 [01:23<13:57, 4.73it/s]
412
  9%|β–‰ | 412/4370 [01:23<13:41, 4.82it/s]
413
  9%|β–‰ | 413/4370 [01:23<13:26, 4.90it/s]
414
  9%|β–‰ | 414/4370 [01:24<13:05, 5.03it/s]
415
  9%|β–‰ | 415/4370 [01:24<13:16, 4.97it/s]
416
  10%|β–‰ | 416/4370 [01:24<14:25, 4.57it/s]
417
  10%|β–‰ | 417/4370 [01:24<15:14, 4.32it/s]
418
  10%|β–‰ | 418/4370 [01:25<14:02, 4.69it/s]
419
  10%|β–‰ | 419/4370 [01:25<12:50, 5.13it/s]
420
  10%|β–‰ | 420/4370 [01:25<12:20, 5.33it/s]
421
  10%|β–‰ | 421/4370 [01:25<12:30, 5.26it/s]
422
  10%|β–‰ | 422/4370 [01:25<11:48, 5.57it/s]
423
  10%|β–‰ | 423/4370 [01:25<13:00, 5.05it/s]
424
  10%|β–‰ | 424/4370 [01:26<12:50, 5.12it/s]
425
  10%|β–‰ | 425/4370 [01:26<13:31, 4.86it/s]
426
  10%|β–‰ | 426/4370 [01:26<13:01, 5.04it/s]
427
  10%|β–‰ | 427/4370 [01:26<14:21, 4.58it/s]
428
  10%|β–‰ | 428/4370 [01:27<18:00, 3.65it/s]
429
  10%|β–‰ | 429/4370 [01:27<15:39, 4.19it/s]
430
  10%|β–‰ | 430/4370 [01:27<15:31, 4.23it/s]
431
  10%|β–‰ | 431/4370 [01:27<13:59, 4.69it/s]
432
  10%|β–‰ | 432/4370 [01:28<15:00, 4.38it/s]
433
  10%|β–‰ | 433/4370 [01:28<14:16, 4.60it/s]
434
  10%|β–‰ | 434/4370 [01:28<13:14, 4.96it/s]
435
  10%|β–‰ | 435/4370 [01:28<12:42, 5.16it/s]
436
  10%|β–‰ | 436/4370 [01:28<13:00, 5.04it/s]
437
  10%|β–ˆ | 437/4370 [01:28<12:31, 5.23it/s][INFO|trainer.py:811] 2024-09-05 20:48:30,653 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
 
 
 
 
 
 
438
  0%| | 0/869 [00:00<?, ?it/s]
 
439
  1%|▏ | 11/869 [00:00<00:08, 101.23it/s]
 
440
  3%|β–Ž | 22/869 [00:00<00:09, 91.51it/s] 
 
441
  4%|β–Ž | 32/869 [00:00<00:09, 90.28it/s]
 
442
  5%|▍ | 42/869 [00:00<00:09, 91.73it/s]
 
443
  6%|β–Œ | 52/869 [00:00<00:08, 92.54it/s]
 
444
  7%|β–‹ | 62/869 [00:00<00:08, 92.44it/s]
 
445
  8%|β–Š | 72/869 [00:00<00:08, 90.47it/s]
 
446
  9%|β–‰ | 82/869 [00:00<00:08, 92.22it/s]
 
447
  11%|β–ˆ | 92/869 [00:00<00:08, 92.98it/s]
 
448
  12%|β–ˆβ– | 102/869 [00:01<00:08, 94.35it/s]
 
449
  13%|β–ˆβ–Ž | 112/869 [00:01<00:08, 92.62it/s]
 
450
  14%|β–ˆβ– | 122/869 [00:01<00:08, 91.65it/s]
 
451
  15%|β–ˆβ–Œ | 132/869 [00:01<00:08, 91.74it/s]
 
452
  16%|β–ˆβ–‹ | 142/869 [00:01<00:08, 90.63it/s]
 
453
  17%|β–ˆβ–‹ | 152/869 [00:01<00:07, 90.80it/s]
 
454
  19%|β–ˆβ–Š | 162/869 [00:01<00:07, 90.11it/s]
 
455
  20%|β–ˆβ–‰ | 172/869 [00:01<00:07, 89.26it/s]
 
456
  21%|β–ˆβ–ˆ | 181/869 [00:01<00:07, 88.34it/s]
 
457
  22%|β–ˆβ–ˆβ– | 190/869 [00:02<00:07, 87.81it/s]
 
458
  23%|β–ˆβ–ˆβ–Ž | 200/869 [00:02<00:07, 90.06it/s]
 
459
  24%|β–ˆβ–ˆβ– | 210/869 [00:02<00:07, 91.03it/s]
 
460
  25%|β–ˆβ–ˆβ–Œ | 220/869 [00:02<00:07, 92.47it/s]
 
461
  26%|β–ˆβ–ˆβ–‹ | 230/869 [00:02<00:06, 91.95it/s]
 
462
  28%|β–ˆβ–ˆβ–Š | 240/869 [00:02<00:07, 89.75it/s]
 
463
  29%|β–ˆβ–ˆβ–Š | 249/869 [00:02<00:07, 87.37it/s]
 
464
  30%|β–ˆβ–ˆβ–‰ | 259/869 [00:02<00:06, 90.63it/s]
 
465
  31%|β–ˆβ–ˆβ–ˆ | 269/869 [00:02<00:06, 90.54it/s]
 
466
  32%|β–ˆβ–ˆβ–ˆβ– | 279/869 [00:03<00:06, 90.05it/s]
 
467
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 289/869 [00:03<00:06, 89.12it/s]
 
468
  34%|β–ˆβ–ˆβ–ˆβ– | 298/869 [00:03<00:06, 88.93it/s]
 
469
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 307/869 [00:03<00:06, 88.88it/s]
 
470
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 317/869 [00:03<00:06, 89.54it/s]
 
471
  38%|β–ˆβ–ˆβ–ˆβ–Š | 326/869 [00:03<00:06, 89.59it/s]
 
472
  39%|β–ˆβ–ˆβ–ˆβ–Š | 336/869 [00:03<00:05, 91.34it/s]
 
473
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 346/869 [00:03<00:05, 89.56it/s]
 
474
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 356/869 [00:03<00:05, 90.14it/s]
 
475
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 366/869 [00:04<00:05, 92.17it/s]
 
476
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 376/869 [00:04<00:05, 88.58it/s]
 
477
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 385/869 [00:04<00:05, 88.83it/s]
 
478
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 394/869 [00:04<00:05, 87.30it/s]
 
479
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 403/869 [00:04<00:05, 87.21it/s]
 
480
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 413/869 [00:04<00:05, 90.10it/s]
 
481
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 423/869 [00:04<00:04, 90.84it/s]
 
482
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 433/869 [00:04<00:04, 91.10it/s]
 
483
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 443/869 [00:04<00:04, 87.90it/s]
 
484
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 453/869 [00:05<00:04, 90.57it/s]
 
485
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 463/869 [00:05<00:04, 89.49it/s]
 
486
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 473/869 [00:05<00:04, 91.35it/s]
 
487
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 484/869 [00:05<00:04, 93.70it/s]
 
488
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 494/869 [00:05<00:04, 90.76it/s]
 
489
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 504/869 [00:05<00:04, 88.92it/s]
 
490
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 514/869 [00:05<00:03, 90.62it/s]
 
491
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 524/869 [00:05<00:03, 87.02it/s]
 
492
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 533/869 [00:05<00:03, 87.76it/s]
 
493
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 542/869 [00:06<00:03, 85.30it/s]
 
494
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 551/869 [00:06<00:03, 86.30it/s]
 
495
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 561/869 [00:06<00:03, 88.08it/s]
 
496
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 570/869 [00:06<00:03, 88.12it/s]
 
497
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 580/869 [00:06<00:03, 88.75it/s]
 
498
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 590/869 [00:06<00:03, 89.63it/s]
 
499
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 600/869 [00:06<00:02, 91.18it/s]
 
500
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 610/869 [00:06<00:02, 90.68it/s]
 
501
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 620/869 [00:06<00:02, 90.38it/s]
 
502
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 630/869 [00:06<00:02, 91.00it/s]
 
503
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 640/869 [00:07<00:02, 90.13it/s]
 
504
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 650/869 [00:07<00:02, 90.69it/s]
 
505
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 660/869 [00:07<00:02, 90.37it/s]
 
506
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 670/869 [00:07<00:02, 91.58it/s]
 
507
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 680/869 [00:07<00:02, 93.04it/s]
 
508
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 690/869 [00:07<00:02, 86.86it/s]
 
509
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 700/869 [00:07<00:01, 88.28it/s]
 
510
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 710/869 [00:07<00:01, 90.00it/s]
 
511
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 720/869 [00:07<00:01, 89.02it/s]
 
512
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 730/869 [00:08<00:01, 90.80it/s]
 
513
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 740/869 [00:08<00:01, 91.14it/s]
 
514
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 750/869 [00:08<00:01, 90.37it/s]
 
515
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 760/869 [00:08<00:01, 91.86it/s]
 
516
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 770/869 [00:08<00:01, 89.85it/s]
 
517
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 780/869 [00:08<00:01, 84.79it/s]
 
518
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 790/869 [00:08<00:00, 87.94it/s]
 
519
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 800/869 [00:08<00:00, 90.37it/s]
 
520
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 810/869 [00:08<00:00, 91.73it/s]
 
521
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 820/869 [00:09<00:00, 90.53it/s]
 
522
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 830/869 [00:09<00:00, 93.05it/s]
 
523
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 840/869 [00:09<00:00, 93.45it/s]
 
524
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 851/869 [00:09<00:00, 95.58it/s]
 
525
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 861/869 [00:09<00:00, 92.09it/s]
526
 
 
527
 
528
  10%|β–ˆ | 437/4370 [01:42<12:31, 5.23it/s]
 
 
529
  [INFO|trainer.py:3503] 2024-09-05 20:48:44,033 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-437
 
 
 
 
 
 
 
530
  10%|β–ˆ | 438/4370 [01:45<5:31:31, 5.06s/it]
531
  10%|β–ˆ | 439/4370 [01:45<3:56:14, 3.61s/it]
532
  10%|β–ˆ | 440/4370 [01:45<2:49:11, 2.58s/it]
533
  10%|β–ˆ | 441/4370 [01:46<2:03:39, 1.89s/it]
534
  10%|β–ˆ | 442/4370 [01:46<1:29:45, 1.37s/it]
535
  10%|β–ˆ | 443/4370 [01:46<1:06:06, 1.01s/it]
536
  10%|β–ˆ | 444/4370 [01:46<50:50, 1.29it/s]
537
  10%|β–ˆ | 445/4370 [01:46<40:42, 1.61it/s]
538
  10%|β–ˆ | 446/4370 [01:47<32:01, 2.04it/s]
539
  10%|β–ˆ | 447/4370 [01:47<25:57, 2.52it/s]
540
  10%|β–ˆ | 448/4370 [01:47<22:16, 2.93it/s]
541
  10%|β–ˆ | 449/4370 [01:47<18:51, 3.47it/s]
542
  10%|β–ˆ | 450/4370 [01:47<17:21, 3.77it/s]
543
  10%|β–ˆ | 451/4370 [01:48<16:41, 3.91it/s]
544
  10%|β–ˆ | 452/4370 [01:48<17:52, 3.65it/s]
545
  10%|β–ˆ | 453/4370 [01:48<16:01, 4.07it/s]
546
  10%|β–ˆ | 454/4370 [01:48<14:31, 4.49it/s]
547
  10%|β–ˆ | 455/4370 [01:48<14:12, 4.59it/s]
548
  10%|β–ˆ | 456/4370 [01:49<13:57, 4.68it/s]
549
  10%|β–ˆ | 457/4370 [01:49<13:45, 4.74it/s]
550
  10%|β–ˆ | 458/4370 [01:49<13:32, 4.82it/s]
551
  11%|β–ˆ | 459/4370 [01:49<14:12, 4.59it/s]
552
  11%|β–ˆ | 460/4370 [01:49<12:33, 5.19it/s]
553
  11%|β–ˆ | 461/4370 [01:50<12:08, 5.37it/s]
554
  11%|β–ˆ | 462/4370 [01:50<12:06, 5.38it/s]
555
  11%|β–ˆ | 463/4370 [01:50<15:08, 4.30it/s]
556
  11%|β–ˆ | 464/4370 [01:50<14:44, 4.42it/s]
557
  11%|β–ˆ | 465/4370 [01:50<13:11, 4.93it/s]
558
  11%|β–ˆ | 466/4370 [01:51<13:08, 4.95it/s]
559
  11%|β–ˆ | 467/4370 [01:51<12:43, 5.11it/s]
560
  11%|β–ˆ | 468/4370 [01:51<12:43, 5.11it/s]
561
  11%|β–ˆ | 469/4370 [01:51<14:53, 4.37it/s]
562
  11%|β–ˆ | 470/4370 [01:52<15:05, 4.31it/s]
563
  11%|β–ˆ | 471/4370 [01:52<15:29, 4.19it/s]
564
  11%|β–ˆ | 472/4370 [01:52<14:31, 4.47it/s]
565
  11%|β–ˆ | 473/4370 [01:52<13:49, 4.70it/s]
566
  11%|β–ˆ | 474/4370 [01:52<12:24, 5.24it/s]
567
  11%|β–ˆ | 475/4370 [01:53<13:27, 4.82it/s]
568
  11%|β–ˆ | 476/4370 [01:53<13:34, 4.78it/s]
569
  11%|β–ˆ | 477/4370 [01:53<12:56, 5.01it/s]
570
  11%|β–ˆ | 478/4370 [01:53<13:10, 4.92it/s]
571
  11%|β–ˆ | 479/4370 [01:53<12:51, 5.04it/s]
572
  11%|β–ˆ | 480/4370 [01:54<12:48, 5.06it/s]
573
  11%|β–ˆ | 481/4370 [01:54<13:03, 4.96it/s]
574
  11%|β–ˆ | 482/4370 [01:54<12:28, 5.19it/s]
575
  11%|β–ˆ | 483/4370 [01:54<12:10, 5.32it/s]
576
  11%|β–ˆ | 484/4370 [01:54<12:40, 5.11it/s]
577
  11%|β–ˆ | 485/4370 [01:55<13:10, 4.92it/s]
578
  11%|β–ˆ | 486/4370 [01:55<13:00, 4.98it/s]
579
  11%|β–ˆ | 487/4370 [01:55<13:17, 4.87it/s]
580
  11%|β–ˆ | 488/4370 [01:55<12:37, 5.13it/s]
581
  11%|β–ˆ | 489/4370 [01:55<12:53, 5.02it/s]
582
  11%|β–ˆ | 490/4370 [01:56<12:48, 5.05it/s]
583
  11%|β–ˆ | 491/4370 [01:56<12:04, 5.35it/s]
584
  11%|β–ˆβ– | 492/4370 [01:56<11:50, 5.46it/s]
585
  11%|β–ˆβ– | 493/4370 [01:56<11:57, 5.40it/s]
586
  11%|β–ˆβ– | 494/4370 [01:56<11:10, 5.78it/s]
587
  11%|β–ˆβ– | 495/4370 [01:56<10:53, 5.93it/s]
588
  11%|β–ˆβ– | 496/4370 [01:57<11:08, 5.79it/s]
589
  11%|β–ˆβ– | 497/4370 [01:57<10:32, 6.12it/s]
590
  11%|β–ˆβ– | 498/4370 [01:57<12:01, 5.36it/s]
591
  11%|β–ˆβ– | 499/4370 [01:57<12:16, 5.26it/s]
592
  11%|β–ˆβ– | 500/4370 [01:57<12:51, 5.01it/s]
593
 
594
  11%|β–ˆβ– | 500/4370 [01:57<12:51, 5.01it/s]
595
  11%|β–ˆβ– | 501/4370 [01:58<13:06, 4.92it/s]
596
  11%|β–ˆβ– | 502/4370 [01:58<13:53, 4.64it/s]
597
  12%|β–ˆβ– | 503/4370 [01:58<13:15, 4.86it/s]
598
  12%|β–ˆβ– | 504/4370 [01:58<13:34, 4.74it/s]
599
  12%|β–ˆβ– | 505/4370 [01:58<13:00, 4.95it/s]
600
  12%|β–ˆβ– | 506/4370 [01:59<12:26, 5.18it/s]
601
  12%|β–ˆβ– | 507/4370 [01:59<11:54, 5.41it/s]
602
  12%|β–ˆβ– | 508/4370 [01:59<12:10, 5.28it/s]
603
  12%|β–ˆβ– | 509/4370 [01:59<11:43, 5.49it/s]
604
  12%|β–ˆβ– | 510/4370 [01:59<12:48, 5.03it/s]
605
  12%|β–ˆβ– | 511/4370 [02:00<12:13, 5.26it/s]
606
  12%|β–ˆβ– | 512/4370 [02:00<11:58, 5.37it/s]
607
  12%|β–ˆβ– | 513/4370 [02:00<11:53, 5.40it/s]
608
  12%|β–ˆβ– | 514/4370 [02:00<12:07, 5.30it/s]
609
  12%|β–ˆβ– | 515/4370 [02:00<11:44, 5.47it/s]
610
  12%|β–ˆβ– | 516/4370 [02:00<10:50, 5.93it/s]
611
  12%|β–ˆβ– | 517/4370 [02:01<10:36, 6.05it/s]
612
  12%|β–ˆβ– | 518/4370 [02:01<10:51, 5.91it/s]
613
  12%|β–ˆβ– | 519/4370 [02:01<10:24, 6.16it/s]
614
  12%|β–ˆβ– | 520/4370 [02:01<10:17, 6.23it/s]
615
  12%|β–ˆβ– | 521/4370 [02:01<10:32, 6.09it/s]
616
  12%|β–ˆβ– | 522/4370 [02:01<12:40, 5.06it/s]
617
  12%|β–ˆβ– | 523/4370 [02:02<13:08, 4.88it/s]
618
  12%|β–ˆβ– | 524/4370 [02:02<12:18, 5.21it/s]
619
  12%|β–ˆβ– | 525/4370 [02:02<12:10, 5.26it/s]
620
  12%|β–ˆβ– | 526/4370 [02:02<12:21, 5.19it/s]
621
  12%|β–ˆβ– | 527/4370 [02:02<11:18, 5.66it/s]
622
  12%|β–ˆβ– | 528/4370 [02:03<11:08, 5.75it/s]
623
  12%|β–ˆβ– | 529/4370 [02:03<13:55, 4.59it/s]
624
  12%|β–ˆβ– | 530/4370 [02:03<13:25, 4.77it/s]
625
  12%|β–ˆβ– | 531/4370 [02:03<12:38, 5.06it/s]
626
  12%|β–ˆβ– | 532/4370 [02:03<12:11, 5.24it/s]
627
  12%|β–ˆβ– | 533/4370 [02:04<11:58, 5.34it/s]
628
  12%|β–ˆβ– | 534/4370 [02:04<11:35, 5.52it/s]
629
  12%|β–ˆβ– | 535/4370 [02:04<11:43, 5.45it/s]
630
  12%|β–ˆβ– | 536/4370 [02:04<12:30, 5.11it/s]
631
  12%|β–ˆβ– | 537/4370 [02:04<13:41, 4.66it/s]
632
  12%|β–ˆβ– | 538/4370 [02:05<12:49, 4.98it/s]
633
  12%|β–ˆβ– | 539/4370 [02:05<12:40, 5.04it/s]
634
  12%|β–ˆβ– | 540/4370 [02:05<12:03, 5.30it/s]
635
  12%|β–ˆβ– | 541/4370 [02:05<11:44, 5.44it/s]
636
  12%|β–ˆβ– | 542/4370 [02:05<11:32, 5.52it/s]
637
  12%|β–ˆβ– | 543/4370 [02:05<11:43, 5.44it/s]
638
  12%|β–ˆβ– | 544/4370 [02:06<12:52, 4.95it/s]
639
  12%|β–ˆβ– | 545/4370 [02:06<14:00, 4.55it/s]
640
  12%|β–ˆβ– | 546/4370 [02:06<13:39, 4.67it/s]
641
  13%|β–ˆβ–Ž | 547/4370 [02:06<13:14, 4.81it/s]
642
  13%|β–ˆβ–Ž | 548/4370 [02:07<14:12, 4.49it/s]
643
  13%|β–ˆβ–Ž | 549/4370 [02:07<12:46, 4.99it/s]
644
  13%|β–ˆβ–Ž | 550/4370 [02:07<12:04, 5.28it/s]
645
  13%|β–ˆβ–Ž | 551/4370 [02:07<13:56, 4.57it/s]
646
  13%|β–ˆβ–Ž | 552/4370 [02:07<14:35, 4.36it/s]
647
  13%|β–ˆβ–Ž | 553/4370 [02:08<13:14, 4.80it/s]
648
  13%|β–ˆβ–Ž | 554/4370 [02:08<14:49, 4.29it/s]
649
  13%|β–ˆβ–Ž | 555/4370 [02:08<13:40, 4.65it/s]
650
  13%|β–ˆβ–Ž | 556/4370 [02:08<13:31, 4.70it/s]
651
  13%|β–ˆβ–Ž | 557/4370 [02:09<13:50, 4.59it/s]
652
  13%|β–ˆβ–Ž | 558/4370 [02:09<14:19, 4.44it/s]
653
  13%|β–ˆβ–Ž | 559/4370 [02:09<14:10, 4.48it/s]
654
  13%|β–ˆβ–Ž | 560/4370 [02:09<13:48, 4.60it/s]
655
  13%|β–ˆβ–Ž | 561/4370 [02:09<12:40, 5.01it/s]
 
1
+ 2024-09-05 20:46:30.525749: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2
+ 2024-09-05 20:46:30.543947: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
3
+ 2024-09-05 20:46:30.565279: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
4
+ 2024-09-05 20:46:30.571709: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
5
+ 2024-09-05 20:46:30.587388: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
6
+ To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
7
+ 2024-09-05 20:46:31.893073: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
8
+ /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of πŸ€— Transformers. Use `eval_strategy` instead
9
+ warnings.warn(
10
+ 09/05/2024 20:46:34 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
11
+ 09/05/2024 20:46:34 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
12
+ _n_gpu=1,
13
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
14
+ adafactor=False,
15
+ adam_beta1=0.9,
16
+ adam_beta2=0.999,
17
+ adam_epsilon=1e-08,
18
+ auto_find_batch_size=False,
19
+ batch_eval_metrics=False,
20
+ bf16=False,
21
+ bf16_full_eval=False,
22
+ data_seed=None,
23
+ dataloader_drop_last=False,
24
+ dataloader_num_workers=0,
25
+ dataloader_persistent_workers=False,
26
+ dataloader_pin_memory=True,
27
+ dataloader_prefetch_factor=None,
28
+ ddp_backend=None,
29
+ ddp_broadcast_buffers=None,
30
+ ddp_bucket_cap_mb=None,
31
+ ddp_find_unused_parameters=None,
32
+ ddp_timeout=1800,
33
+ debug=[],
34
+ deepspeed=None,
35
+ disable_tqdm=False,
36
+ dispatch_batches=None,
37
+ do_eval=True,
38
+ do_predict=True,
39
+ do_train=True,
40
+ eval_accumulation_steps=None,
41
+ eval_delay=0,
42
+ eval_do_concat_batches=True,
43
+ eval_on_start=False,
44
+ eval_steps=None,
45
+ eval_strategy=epoch,
46
+ eval_use_gather_object=False,
47
+ evaluation_strategy=epoch,
48
+ fp16=False,
49
+ fp16_backend=auto,
50
+ fp16_full_eval=False,
51
+ fp16_opt_level=O1,
52
+ fsdp=[],
53
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
54
+ fsdp_min_num_params=0,
55
+ fsdp_transformer_layer_cls_to_wrap=None,
56
+ full_determinism=False,
57
+ gradient_accumulation_steps=2,
58
+ gradient_checkpointing=False,
59
+ gradient_checkpointing_kwargs=None,
60
+ greater_is_better=True,
61
+ group_by_length=False,
62
+ half_precision_backend=auto,
63
+ hub_always_push=False,
64
+ hub_model_id=None,
65
+ hub_private_repo=False,
66
+ hub_strategy=every_save,
67
+ hub_token=<HUB_TOKEN>,
68
+ ignore_data_skip=False,
69
+ include_inputs_for_metrics=False,
70
+ include_num_input_tokens_seen=False,
71
+ include_tokens_per_second=False,
72
+ jit_mode_eval=False,
73
+ label_names=None,
74
+ label_smoothing_factor=0.0,
75
+ learning_rate=5e-05,
76
+ length_column_name=length,
77
+ load_best_model_at_end=True,
78
+ local_rank=0,
79
+ log_level=passive,
80
+ log_level_replica=warning,
81
+ log_on_each_node=True,
82
+ logging_dir=/content/dissertation/scripts/ner/output/tb,
83
+ logging_first_step=False,
84
+ logging_nan_inf_filter=True,
85
+ logging_steps=500,
86
+ logging_strategy=steps,
87
+ lr_scheduler_kwargs={},
88
+ lr_scheduler_type=linear,
89
+ max_grad_norm=1.0,
90
+ max_steps=-1,
91
+ metric_for_best_model=f1,
92
+ mp_parameters=,
93
+ neftune_noise_alpha=None,
94
+ no_cuda=False,
95
+ num_train_epochs=10.0,
96
+ optim=adamw_torch,
97
+ optim_args=None,
98
+ optim_target_modules=None,
99
+ output_dir=/content/dissertation/scripts/ner/output,
100
+ overwrite_output_dir=True,
101
+ past_index=-1,
102
+ per_device_eval_batch_size=8,
103
+ per_device_train_batch_size=32,
104
+ prediction_loss_only=False,
105
+ push_to_hub=True,
106
+ push_to_hub_model_id=None,
107
+ push_to_hub_organization=None,
108
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
109
+ ray_scope=last,
110
+ remove_unused_columns=True,
111
+ report_to=['tensorboard'],
112
+ restore_callback_states_from_checkpoint=False,
113
+ resume_from_checkpoint=None,
114
+ run_name=/content/dissertation/scripts/ner/output,
115
+ save_on_each_node=False,
116
+ save_only_model=False,
117
+ save_safetensors=True,
118
+ save_steps=500,
119
+ save_strategy=epoch,
120
+ save_total_limit=None,
121
+ seed=42,
122
+ skip_memory_metrics=True,
123
+ split_batches=None,
124
+ tf32=None,
125
+ torch_compile=False,
126
+ torch_compile_backend=None,
127
+ torch_compile_mode=None,
128
+ torch_empty_cache_steps=None,
129
+ torchdynamo=None,
130
+ tpu_metrics_debug=False,
131
+ tpu_num_cores=None,
132
+ use_cpu=False,
133
+ use_ipex=False,
134
+ use_legacy_prediction_loop=False,
135
+ use_mps_device=False,
136
+ warmup_ratio=0.0,
137
+ warmup_steps=0,
138
+ weight_decay=0.0,
139
+ )
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+ [INFO|configuration_utils.py:733] 2024-09-05 20:46:49,939 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--michiyasunaga--BioLinkBERT-base/snapshots/b71f5d70f063d1c8f1124070ce86f1ee463ca1fe/config.json
148
+ [INFO|configuration_utils.py:800] 2024-09-05 20:46:49,949 >> Model config BertConfig {
149
+ "_name_or_path": "michiyasunaga/BioLinkBERT-base",
150
+ "architectures": [
151
+ "BertModel"
152
+ ],
153
+ "attention_probs_dropout_prob": 0.1,
154
+ "classifier_dropout": null,
155
+ "finetuning_task": "ner",
156
+ "gradient_checkpointing": false,
157
+ "hidden_act": "gelu",
158
+ "hidden_dropout_prob": 0.1,
159
+ "hidden_size": 768,
160
+ "id2label": {
161
+ "0": "O",
162
+ "1": "B-FARMACO",
163
+ "2": "I-FARMACO"
164
+ },
165
+ "initializer_range": 0.02,
166
+ "intermediate_size": 3072,
167
+ "label2id": {
168
+ "B-FARMACO": 1,
169
+ "I-FARMACO": 2,
170
+ "O": 0
171
+ },
172
+ "layer_norm_eps": 1e-12,
173
+ "max_position_embeddings": 512,
174
+ "model_type": "bert",
175
+ "num_attention_heads": 12,
176
+ "num_hidden_layers": 12,
177
+ "pad_token_id": 0,
178
+ "position_embedding_type": "absolute",
179
+ "transformers_version": "4.44.2",
180
+ "type_vocab_size": 2,
181
+ "use_cache": true,
182
+ "vocab_size": 28895
183
+ }
184
+
185
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 20:46:52,215 >> loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--michiyasunaga--BioLinkBERT-base/snapshots/b71f5d70f063d1c8f1124070ce86f1ee463ca1fe/vocab.txt
186
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 20:46:52,215 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--michiyasunaga--BioLinkBERT-base/snapshots/b71f5d70f063d1c8f1124070ce86f1ee463ca1fe/tokenizer.json
187
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 20:46:52,215 >> loading file added_tokens.json from cache at None
188
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 20:46:52,215 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--michiyasunaga--BioLinkBERT-base/snapshots/b71f5d70f063d1c8f1124070ce86f1ee463ca1fe/special_tokens_map.json
189
+ [INFO|tokenization_utils_base.py:2269] 2024-09-05 20:46:52,215 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--michiyasunaga--BioLinkBERT-base/snapshots/b71f5d70f063d1c8f1124070ce86f1ee463ca1fe/tokenizer_config.json
190
+ /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
191
+ warnings.warn(
192
+ [INFO|modeling_utils.py:3678] 2024-09-05 20:46:54,406 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--michiyasunaga--BioLinkBERT-base/snapshots/b71f5d70f063d1c8f1124070ce86f1ee463ca1fe/pytorch_model.bin
193
+ [INFO|modeling_utils.py:4497] 2024-09-05 20:46:54,546 >> Some weights of the model checkpoint at michiyasunaga/BioLinkBERT-base were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
194
+ - This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
195
+ - This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
196
+ [WARNING|modeling_utils.py:4509] 2024-09-05 20:46:54,546 >> Some weights of BertForTokenClassification were not initialized from the model checkpoint at michiyasunaga/BioLinkBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
197
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
198
+
199
+
200
+
201
+ /content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library πŸ€— Evaluate: https://huggingface.co/docs/evaluate
202
+ metric = load_metric("seqeval", trust_remote_code=True)
203
+
204
+ [INFO|trainer.py:811] 2024-09-05 20:47:01,019 >> The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
205
+ [INFO|trainer.py:2134] 2024-09-05 20:47:01,709 >> ***** Running training *****
206
+ [INFO|trainer.py:2135] 2024-09-05 20:47:01,709 >> Num examples = 27,967
207
+ [INFO|trainer.py:2136] 2024-09-05 20:47:01,709 >> Num Epochs = 10
208
+ [INFO|trainer.py:2137] 2024-09-05 20:47:01,709 >> Instantaneous batch size per device = 32
209
+ [INFO|trainer.py:2140] 2024-09-05 20:47:01,709 >> Total train batch size (w. parallel, distributed & accumulation) = 64
210
+ [INFO|trainer.py:2141] 2024-09-05 20:47:01,709 >> Gradient Accumulation steps = 2
211
+ [INFO|trainer.py:2142] 2024-09-05 20:47:01,709 >> Total optimization steps = 4,370
212
+ [INFO|trainer.py:2143] 2024-09-05 20:47:01,710 >> Number of trainable parameters = 107,644,419
213
+
214
  0%| | 0/4370 [00:00<?, ?it/s]
215
  0%| | 1/4370 [00:01<2:06:45, 1.74s/it]
216
  0%| | 2/4370 [00:01<1:01:47, 1.18it/s]
217
  0%| | 3/4370 [00:02<45:00, 1.62it/s]
218
  0%| | 4/4370 [00:02<31:33, 2.31it/s]
219
  0%| | 5/4370 [00:02<25:03, 2.90it/s]
220
  0%| | 6/4370 [00:02<20:29, 3.55it/s]
221
  0%| | 7/4370 [00:03<18:58, 3.83it/s]
222
  0%| | 8/4370 [00:03<17:35, 4.13it/s]
223
  0%| | 9/4370 [00:03<16:39, 4.36it/s]
224
  0%| | 10/4370 [00:03<16:22, 4.44it/s]
225
  0%| | 11/4370 [00:03<18:38, 3.90it/s]
226
  0%| | 12/4370 [00:04<16:48, 4.32it/s]
227
  0%| | 13/4370 [00:04<16:17, 4.46it/s]
228
  0%| | 14/4370 [00:04<15:45, 4.61it/s]
229
  0%| | 15/4370 [00:04<14:35, 4.98it/s]
230
  0%| | 16/4370 [00:04<14:45, 4.92it/s]
231
  0%| | 17/4370 [00:05<14:03, 5.16it/s]
232
  0%| | 18/4370 [00:05<13:30, 5.37it/s]
233
  0%| | 19/4370 [00:05<13:12, 5.49it/s]
234
  0%| | 20/4370 [00:05<15:28, 4.69it/s]
235
  0%| | 21/4370 [00:05<14:54, 4.86it/s]
236
  1%| | 22/4370 [00:06<14:39, 4.94it/s]
237
  1%| | 23/4370 [00:06<16:12, 4.47it/s]
238
  1%| | 24/4370 [00:06<15:39, 4.63it/s]
239
  1%| | 25/4370 [00:06<15:31, 4.66it/s]
240
  1%| | 26/4370 [00:07<16:39, 4.35it/s]
241
  1%| | 27/4370 [00:07<16:12, 4.47it/s]
242
  1%| | 28/4370 [00:07<15:38, 4.63it/s]
243
  1%| | 29/4370 [00:07<17:41, 4.09it/s]
244
  1%| | 30/4370 [00:08<17:28, 4.14it/s]
245
  1%| | 31/4370 [00:08<17:06, 4.23it/s]
246
  1%| | 32/4370 [00:08<16:00, 4.52it/s]
247
  1%| | 33/4370 [00:08<15:28, 4.67it/s]
248
  1%| | 34/4370 [00:08<16:06, 4.49it/s]
249
  1%| | 35/4370 [00:09<14:53, 4.85it/s]
250
  1%| | 36/4370 [00:09<14:34, 4.96it/s]
251
  1%| | 37/4370 [00:09<13:39, 5.29it/s]
252
  1%| | 38/4370 [00:09<13:35, 5.31it/s]
253
  1%| | 39/4370 [00:09<13:02, 5.53it/s]
254
  1%| | 40/4370 [00:09<12:00, 6.01it/s]
255
  1%| | 41/4370 [00:10<11:55, 6.05it/s]
256
  1%| | 42/4370 [00:10<13:36, 5.30it/s]
257
  1%| | 43/4370 [00:10<14:23, 5.01it/s]
258
  1%| | 44/4370 [00:10<14:43, 4.90it/s]
259
  1%| | 45/4370 [00:10<14:29, 4.98it/s]
260
  1%| | 46/4370 [00:11<14:08, 5.09it/s]
261
  1%| | 47/4370 [00:11<14:05, 5.11it/s]
262
  1%| | 48/4370 [00:11<16:25, 4.39it/s]
263
  1%| | 49/4370 [00:11<17:05, 4.21it/s]
264
  1%| | 50/4370 [00:12<16:45, 4.30it/s]
265
  1%| | 51/4370 [00:12<15:45, 4.57it/s]
266
  1%| | 52/4370 [00:12<14:55, 4.82it/s]
267
  1%| | 53/4370 [00:12<13:41, 5.26it/s]
268
  1%| | 54/4370 [00:12<14:42, 4.89it/s]
269
  1%|▏ | 55/4370 [00:13<15:50, 4.54it/s]
270
  1%|▏ | 56/4370 [00:13<15:18, 4.70it/s]
271
  1%|▏ | 57/4370 [00:13<15:10, 4.74it/s]
272
  1%|▏ | 58/4370 [00:13<13:57, 5.15it/s]
273
  1%|▏ | 59/4370 [00:13<14:19, 5.02it/s]
274
  1%|▏ | 60/4370 [00:14<14:06, 5.09it/s]
275
  1%|▏ | 61/4370 [00:14<13:29, 5.32it/s]
276
  1%|▏ | 62/4370 [00:14<13:38, 5.26it/s]
277
  1%|▏ | 63/4370 [00:14<13:40, 5.25it/s]
278
  1%|▏ | 64/4370 [00:14<13:15, 5.41it/s]
279
  1%|▏ | 65/4370 [00:14<13:13, 5.43it/s]
280
  2%|▏ | 66/4370 [00:15<13:26, 5.33it/s]
281
  2%|▏ | 67/4370 [00:15<13:04, 5.49it/s]
282
  2%|▏ | 68/4370 [00:15<13:10, 5.44it/s]
283
  2%|▏ | 69/4370 [00:15<12:41, 5.65it/s]
284
  2%|▏ | 70/4370 [00:15<12:56, 5.54it/s]
285
  2%|▏ | 71/4370 [00:16<13:17, 5.39it/s]
286
  2%|▏ | 72/4370 [00:16<13:22, 5.35it/s]
287
  2%|▏ | 73/4370 [00:16<13:01, 5.50it/s]
288
  2%|▏ | 74/4370 [00:16<12:39, 5.66it/s]
289
  2%|▏ | 75/4370 [00:16<12:30, 5.72it/s]
290
  2%|▏ | 76/4370 [00:16<13:10, 5.44it/s]
291
  2%|▏ | 77/4370 [00:17<13:13, 5.41it/s]
292
  2%|▏ | 78/4370 [00:17<13:50, 5.17it/s]
293
  2%|▏ | 79/4370 [00:17<15:01, 4.76it/s]
294
  2%|▏ | 80/4370 [00:17<15:16, 4.68it/s]
295
  2%|▏ | 81/4370 [00:17<14:21, 4.98it/s]
296
  2%|▏ | 82/4370 [00:18<15:20, 4.66it/s]
297
  2%|▏ | 83/4370 [00:18<14:35, 4.89it/s]
298
  2%|▏ | 84/4370 [00:18<14:17, 5.00it/s]
299
  2%|▏ | 85/4370 [00:18<13:48, 5.17it/s]
300
  2%|▏ | 86/4370 [00:18<13:32, 5.27it/s]
301
  2%|▏ | 87/4370 [00:19<14:42, 4.85it/s]
302
  2%|▏ | 88/4370 [00:19<14:55, 4.78it/s]
303
  2%|▏ | 89/4370 [00:19<14:05, 5.06it/s]
304
  2%|▏ | 90/4370 [00:19<12:54, 5.53it/s]
305
  2%|▏ | 91/4370 [00:19<13:10, 5.41it/s]
306
  2%|▏ | 92/4370 [00:20<13:17, 5.36it/s]
307
  2%|▏ | 93/4370 [00:20<14:18, 4.98it/s]
308
  2%|▏ | 94/4370 [00:20<14:15, 5.00it/s]
309
  2%|▏ | 95/4370 [00:20<13:33, 5.25it/s]
310
  2%|▏ | 96/4370 [00:20<13:16, 5.37it/s]
311
  2%|▏ | 97/4370 [00:21<15:12, 4.68it/s]
312
  2%|▏ | 98/4370 [00:21<14:42, 4.84it/s]
313
  2%|▏ | 99/4370 [00:21<15:04, 4.72it/s]
314
  2%|▏ | 100/4370 [00:21<18:08, 3.92it/s]
315
  2%|▏ | 101/4370 [00:22<17:42, 4.02it/s]
316
  2%|▏ | 102/4370 [00:22<16:38, 4.28it/s]
317
  2%|▏ | 103/4370 [00:22<16:15, 4.37it/s]
318
  2%|▏ | 104/4370 [00:22<16:12, 4.39it/s]
319
  2%|▏ | 105/4370 [00:23<15:14, 4.66it/s]
320
  2%|▏ | 106/4370 [00:23<14:16, 4.98it/s]
321
  2%|▏ | 107/4370 [00:23<13:59, 5.08it/s]
322
  2%|▏ | 108/4370 [00:23<14:43, 4.83it/s]
323
  2%|▏ | 109/4370 [00:23<14:26, 4.92it/s]
324
  3%|β–Ž | 110/4370 [00:23<14:12, 5.00it/s]
325
  3%|β–Ž | 111/4370 [00:24<14:39, 4.84it/s]
326
  3%|β–Ž | 112/4370 [00:24<14:02, 5.05it/s]
327
  3%|β–Ž | 113/4370 [00:24<15:21, 4.62it/s]
328
  3%|β–Ž | 114/4370 [00:24<14:57, 4.74it/s]
329
  3%|β–Ž | 115/4370 [00:25<14:05, 5.03it/s]
330
  3%|β–Ž | 116/4370 [00:25<15:06, 4.69it/s]
331
  3%|β–Ž | 117/4370 [00:25<15:10, 4.67it/s]
332
  3%|β–Ž | 118/4370 [00:25<15:20, 4.62it/s]
333
  3%|β–Ž | 119/4370 [00:25<14:25, 4.91it/s]
334
  3%|β–Ž | 120/4370 [00:26<15:01, 4.72it/s]
335
  3%|β–Ž | 121/4370 [00:26<14:28, 4.89it/s]
336
  3%|β–Ž | 122/4370 [00:26<15:04, 4.70it/s]
337
  3%|β–Ž | 123/4370 [00:26<13:36, 5.20it/s]
338
  3%|β–Ž | 124/4370 [00:26<13:44, 5.15it/s]
339
  3%|β–Ž | 125/4370 [00:27<14:28, 4.89it/s]
340
  3%|β–Ž | 126/4370 [00:27<14:49, 4.77it/s]
341
  3%|β–Ž | 127/4370 [00:27<13:41, 5.17it/s]
342
  3%|β–Ž | 128/4370 [00:27<12:56, 5.47it/s]
343
  3%|β–Ž | 129/4370 [00:27<12:52, 5.49it/s]
344
  3%|β–Ž | 130/4370 [00:27<13:10, 5.37it/s]
345
  3%|β–Ž | 131/4370 [00:28<12:51, 5.49it/s]
346
  3%|β–Ž | 132/4370 [00:28<14:02, 5.03it/s]
347
  3%|β–Ž | 133/4370 [00:28<13:44, 5.14it/s]
348
  3%|β–Ž | 134/4370 [00:28<12:50, 5.50it/s]
349
  3%|β–Ž | 135/4370 [00:28<13:05, 5.39it/s]
350
  3%|β–Ž | 136/4370 [00:29<13:24, 5.26it/s]
351
  3%|β–Ž | 137/4370 [00:29<13:15, 5.32it/s]
352
  3%|β–Ž | 138/4370 [00:29<14:03, 5.02it/s]
353
  3%|β–Ž | 139/4370 [00:29<13:13, 5.33it/s]
354
  3%|β–Ž | 140/4370 [00:29<13:00, 5.42it/s]
355
  3%|β–Ž | 141/4370 [00:30<13:06, 5.38it/s]
356
  3%|β–Ž | 142/4370 [00:30<12:44, 5.53it/s]
357
  3%|β–Ž | 143/4370 [00:30<14:48, 4.76it/s]
358
  3%|β–Ž | 144/4370 [00:30<15:31, 4.54it/s]
359
  3%|β–Ž | 145/4370 [00:30<14:01, 5.02it/s]
360
  3%|β–Ž | 146/4370 [00:31<13:28, 5.22it/s]
361
  3%|β–Ž | 147/4370 [00:31<13:49, 5.09it/s]
362
  3%|β–Ž | 148/4370 [00:31<13:42, 5.13it/s]
363
  3%|β–Ž | 149/4370 [00:31<13:18, 5.28it/s]
364
  3%|β–Ž | 150/4370 [00:31<12:53, 5.46it/s]
365
  3%|β–Ž | 151/4370 [00:32<12:32, 5.61it/s]
366
  3%|β–Ž | 152/4370 [00:32<12:17, 5.72it/s]
367
  4%|β–Ž | 153/4370 [00:32<13:58, 5.03it/s]
368
  4%|β–Ž | 154/4370 [00:32<14:12, 4.94it/s]
369
  4%|β–Ž | 155/4370 [00:32<14:00, 5.02it/s]
370
  4%|β–Ž | 156/4370 [00:33<13:33, 5.18it/s]
371
  4%|β–Ž | 157/4370 [00:33<12:58, 5.41it/s]
372
  4%|β–Ž | 158/4370 [00:33<13:47, 5.09it/s]
373
  4%|β–Ž | 159/4370 [00:33<13:47, 5.09it/s]
374
  4%|β–Ž | 160/4370 [00:33<12:46, 5.49it/s]
375
  4%|β–Ž | 161/4370 [00:33<12:23, 5.66it/s]
376
  4%|β–Ž | 162/4370 [00:34<12:00, 5.84it/s]
377
  4%|β–Ž | 163/4370 [00:34<12:57, 5.41it/s]
378
  4%|▍ | 164/4370 [00:34<13:50, 5.06it/s]
379
  4%|▍ | 165/4370 [00:34<13:06, 5.34it/s]
380
  4%|▍ | 166/4370 [00:34<13:41, 5.12it/s]
381
  4%|▍ | 167/4370 [00:35<15:58, 4.38it/s]
382
  4%|▍ | 168/4370 [00:35<18:16, 3.83it/s]
383
  4%|▍ | 169/4370 [00:35<16:34, 4.23it/s]
384
  4%|▍ | 170/4370 [00:35<17:41, 3.96it/s]
385
  4%|▍ | 171/4370 [00:36<17:02, 4.10it/s]
386
  4%|▍ | 172/4370 [00:36<15:22, 4.55it/s]
387
  4%|▍ | 173/4370 [00:36<16:15, 4.30it/s]
388
  4%|▍ | 174/4370 [00:36<15:08, 4.62it/s]
389
  4%|▍ | 175/4370 [00:36<13:27, 5.19it/s]
390
  4%|▍ | 176/4370 [00:37<12:07, 5.76it/s]
391
  4%|▍ | 177/4370 [00:37<11:33, 6.04it/s]
392
  4%|▍ | 178/4370 [00:37<11:21, 6.15it/s]
393
  4%|▍ | 179/4370 [00:37<11:41, 5.98it/s]
394
  4%|▍ | 180/4370 [00:37<11:42, 5.97it/s]
395
  4%|▍ | 181/4370 [00:37<12:23, 5.63it/s]
396
  4%|▍ | 182/4370 [00:38<12:23, 5.63it/s]
397
  4%|▍ | 183/4370 [00:38<12:55, 5.40it/s]
398
  4%|▍ | 184/4370 [00:38<12:51, 5.43it/s]
399
  4%|▍ | 185/4370 [00:38<13:50, 5.04it/s]
400
  4%|▍ | 186/4370 [00:38<12:40, 5.50it/s]
401
  4%|▍ | 187/4370 [00:39<12:10, 5.73it/s]
402
  4%|▍ | 188/4370 [00:39<12:27, 5.60it/s]
403
  4%|▍ | 189/4370 [00:39<12:00, 5.80it/s]
404
  4%|▍ | 190/4370 [00:39<14:08, 4.92it/s]
405
  4%|▍ | 191/4370 [00:39<14:58, 4.65it/s]
406
  4%|▍ | 192/4370 [00:40<16:15, 4.28it/s]
407
  4%|▍ | 193/4370 [00:40<14:54, 4.67it/s]
408
  4%|▍ | 194/4370 [00:40<13:28, 5.17it/s]
409
  4%|▍ | 195/4370 [00:40<13:34, 5.13it/s]
410
  4%|▍ | 196/4370 [00:40<13:01, 5.34it/s]
411
  5%|▍ | 197/4370 [00:41<15:56, 4.36it/s]
412
  5%|▍ | 198/4370 [00:41<14:32, 4.78it/s]
413
  5%|▍ | 199/4370 [00:41<13:11, 5.27it/s]
414
  5%|▍ | 200/4370 [00:41<14:09, 4.91it/s]
415
  5%|▍ | 201/4370 [00:42<15:47, 4.40it/s]
416
  5%|▍ | 202/4370 [00:42<15:29, 4.49it/s]
417
  5%|▍ | 203/4370 [00:42<15:33, 4.46it/s]
418
  5%|▍ | 204/4370 [00:42<15:09, 4.58it/s]
419
  5%|▍ | 205/4370 [00:42<15:35, 4.45it/s]
420
  5%|▍ | 206/4370 [00:43<14:41, 4.73it/s]
421
  5%|▍ | 207/4370 [00:43<14:44, 4.71it/s]
422
  5%|▍ | 208/4370 [00:43<13:42, 5.06it/s]
423
  5%|▍ | 209/4370 [00:43<13:25, 5.17it/s]
424
  5%|▍ | 210/4370 [00:43<13:05, 5.29it/s]
425
  5%|▍ | 211/4370 [00:44<13:24, 5.17it/s]
426
  5%|▍ | 212/4370 [00:44<12:46, 5.42it/s]
427
  5%|▍ | 213/4370 [00:44<12:02, 5.75it/s]
428
  5%|▍ | 214/4370 [00:44<12:02, 5.75it/s]
429
  5%|▍ | 215/4370 [00:44<12:32, 5.52it/s]
430
  5%|▍ | 216/4370 [00:44<13:01, 5.31it/s]
431
  5%|▍ | 217/4370 [00:45<13:15, 5.22it/s]
432
  5%|▍ | 218/4370 [00:45<13:42, 5.05it/s]
433
  5%|β–Œ | 219/4370 [00:45<17:13, 4.02it/s]
434
  5%|β–Œ | 220/4370 [00:45<16:01, 4.32it/s]
435
  5%|β–Œ | 221/4370 [00:46<14:55, 4.63it/s]
436
  5%|β–Œ | 222/4370 [00:46<14:00, 4.94it/s]
437
  5%|β–Œ | 223/4370 [00:46<13:55, 4.97it/s]
438
  5%|β–Œ | 224/4370 [00:46<13:15, 5.21it/s]
439
  5%|β–Œ | 225/4370 [00:46<13:30, 5.11it/s]
440
  5%|β–Œ | 226/4370 [00:46<13:34, 5.09it/s]
441
  5%|β–Œ | 227/4370 [00:47<15:00, 4.60it/s]
442
  5%|β–Œ | 228/4370 [00:47<14:02, 4.92it/s]
443
  5%|β–Œ | 229/4370 [00:47<12:59, 5.31it/s]
444
  5%|β–Œ | 230/4370 [00:47<12:51, 5.37it/s]
445
  5%|β–Œ | 231/4370 [00:47<12:34, 5.48it/s]
446
  5%|β–Œ | 232/4370 [00:48<12:41, 5.43it/s]
447
  5%|β–Œ | 233/4370 [00:48<12:36, 5.47it/s]
448
  5%|β–Œ | 234/4370 [00:48<12:18, 5.60it/s]
449
  5%|β–Œ | 235/4370 [00:48<11:43, 5.88it/s]
450
  5%|β–Œ | 236/4370 [00:48<12:29, 5.52it/s]
451
  5%|β–Œ | 237/4370 [00:49<12:42, 5.42it/s]
452
  5%|β–Œ | 238/4370 [00:49<12:37, 5.45it/s]
453
  5%|β–Œ | 239/4370 [00:49<12:18, 5.59it/s]
454
  5%|β–Œ | 240/4370 [00:49<14:41, 4.69it/s]
455
  6%|β–Œ | 241/4370 [00:49<14:45, 4.66it/s]
456
  6%|β–Œ | 242/4370 [00:50<15:07, 4.55it/s]
457
  6%|β–Œ | 243/4370 [00:50<13:46, 4.99it/s]
458
  6%|β–Œ | 244/4370 [00:50<13:42, 5.02it/s]
459
  6%|β–Œ | 245/4370 [00:50<13:13, 5.20it/s]
460
  6%|β–Œ | 246/4370 [00:50<12:32, 5.48it/s]
461
  6%|β–Œ | 247/4370 [00:50<12:36, 5.45it/s]
462
  6%|β–Œ | 248/4370 [00:51<12:18, 5.58it/s]
463
  6%|β–Œ | 249/4370 [00:51<12:33, 5.47it/s]
464
  6%|β–Œ | 250/4370 [00:51<13:26, 5.11it/s]
465
  6%|β–Œ | 251/4370 [00:51<12:58, 5.29it/s]
466
  6%|β–Œ | 252/4370 [00:51<13:07, 5.23it/s]
467
  6%|β–Œ | 253/4370 [00:52<13:03, 5.26it/s]
468
  6%|β–Œ | 254/4370 [00:52<12:24, 5.53it/s]
469
  6%|β–Œ | 255/4370 [00:52<12:03, 5.68it/s]
470
  6%|β–Œ | 256/4370 [00:52<11:50, 5.79it/s]
471
  6%|β–Œ | 257/4370 [00:52<11:39, 5.88it/s]
472
  6%|β–Œ | 258/4370 [00:52<11:23, 6.02it/s]
473
  6%|β–Œ | 259/4370 [00:53<10:40, 6.42it/s]
474
  6%|β–Œ | 260/4370 [00:53<10:58, 6.24it/s]
475
  6%|β–Œ | 261/4370 [00:53<11:32, 5.93it/s]
476
  6%|β–Œ | 262/4370 [00:53<11:50, 5.78it/s]
477
  6%|β–Œ | 263/4370 [00:53<12:07, 5.64it/s]
478
  6%|β–Œ | 264/4370 [00:53<11:43, 5.83it/s]
479
  6%|β–Œ | 265/4370 [00:54<12:58, 5.27it/s]
480
  6%|β–Œ | 266/4370 [00:54<12:28, 5.48it/s]
481
  6%|β–Œ | 267/4370 [00:54<12:51, 5.32it/s]
482
  6%|β–Œ | 268/4370 [00:54<12:53, 5.30it/s]
483
  6%|β–Œ | 269/4370 [00:54<13:27, 5.08it/s]
484
  6%|β–Œ | 270/4370 [00:55<12:52, 5.31it/s]
485
  6%|β–Œ | 271/4370 [00:55<12:42, 5.38it/s]
486
  6%|β–Œ | 272/4370 [00:55<11:57, 5.71it/s]
487
  6%|β–Œ | 273/4370 [00:55<11:14, 6.07it/s]
488
  6%|β–‹ | 274/4370 [00:55<11:29, 5.94it/s]
489
  6%|β–‹ | 275/4370 [00:55<11:16, 6.05it/s]
490
  6%|β–‹ | 276/4370 [00:56<11:20, 6.02it/s]
491
  6%|β–‹ | 277/4370 [00:56<11:36, 5.88it/s]
492
  6%|β–‹ | 278/4370 [00:56<12:08, 5.61it/s]
493
  6%|β–‹ | 279/4370 [00:56<11:42, 5.82it/s]
494
  6%|β–‹ | 280/4370 [00:56<11:37, 5.86it/s]
495
  6%|β–‹ | 281/4370 [00:57<15:06, 4.51it/s]
496
  6%|β–‹ | 282/4370 [00:57<13:59, 4.87it/s]
497
  6%|β–‹ | 283/4370 [00:57<12:54, 5.27it/s]
498
  6%|β–‹ | 284/4370 [00:57<13:46, 4.94it/s]
499
  7%|β–‹ | 285/4370 [00:57<12:28, 5.46it/s]
500
  7%|β–‹ | 286/4370 [00:58<12:00, 5.67it/s]
501
  7%|β–‹ | 287/4370 [00:58<11:41, 5.82it/s]
502
  7%|β–‹ | 288/4370 [00:58<14:06, 4.82it/s]
503
  7%|β–‹ | 289/4370 [00:58<14:23, 4.73it/s]
504
  7%|β–‹ | 290/4370 [00:58<13:58, 4.87it/s]
505
  7%|β–‹ | 291/4370 [00:59<13:33, 5.01it/s]
506
  7%|β–‹ | 292/4370 [00:59<14:16, 4.76it/s]
507
  7%|β–‹ | 293/4370 [00:59<16:31, 4.11it/s]
508
  7%|β–‹ | 294/4370 [00:59<15:47, 4.30it/s]
509
  7%|β–‹ | 295/4370 [01:00<14:56, 4.55it/s]
510
  7%|β–‹ | 296/4370 [01:00<14:46, 4.60it/s]
511
  7%|β–‹ | 297/4370 [01:00<13:47, 4.92it/s]
512
  7%|β–‹ | 298/4370 [01:00<15:27, 4.39it/s]
513
  7%|β–‹ | 299/4370 [01:00<14:30, 4.68it/s]
514
  7%|β–‹ | 300/4370 [01:01<14:23, 4.71it/s]
515
  7%|β–‹ | 301/4370 [01:01<13:30, 5.02it/s]
516
  7%|β–‹ | 302/4370 [01:01<13:50, 4.90it/s]
517
  7%|β–‹ | 303/4370 [01:01<14:39, 4.62it/s]
518
  7%|β–‹ | 304/4370 [01:01<13:16, 5.11it/s]
519
  7%|β–‹ | 305/4370 [01:02<12:30, 5.42it/s]
520
  7%|β–‹ | 306/4370 [01:02<11:57, 5.66it/s]
521
  7%|β–‹ | 307/4370 [01:02<11:08, 6.08it/s]
522
  7%|β–‹ | 308/4370 [01:02<10:51, 6.23it/s]
523
  7%|β–‹ | 309/4370 [01:02<11:55, 5.68it/s]
524
  7%|β–‹ | 310/4370 [01:02<12:17, 5.51it/s]
525
  7%|β–‹ | 311/4370 [01:03<12:34, 5.38it/s]
526
  7%|β–‹ | 312/4370 [01:03<12:23, 5.46it/s]
527
  7%|β–‹ | 313/4370 [01:03<13:14, 5.10it/s]
528
  7%|β–‹ | 314/4370 [01:03<12:38, 5.35it/s]
529
  7%|β–‹ | 315/4370 [01:03<12:40, 5.33it/s]
530
  7%|β–‹ | 316/4370 [01:03<12:39, 5.34it/s]
531
  7%|β–‹ | 317/4370 [01:04<12:12, 5.54it/s]
532
  7%|β–‹ | 318/4370 [01:04<13:37, 4.96it/s]
533
  7%|β–‹ | 319/4370 [01:04<13:45, 4.91it/s]
534
  7%|β–‹ | 320/4370 [01:04<14:14, 4.74it/s]
535
  7%|β–‹ | 321/4370 [01:05<14:06, 4.78it/s]
536
  7%|β–‹ | 322/4370 [01:05<13:02, 5.17it/s]
537
  7%|β–‹ | 323/4370 [01:05<11:47, 5.72it/s]
538
  7%|β–‹ | 324/4370 [01:05<11:47, 5.72it/s]
539
  7%|β–‹ | 325/4370 [01:05<11:29, 5.87it/s]
540
  7%|β–‹ | 326/4370 [01:05<11:53, 5.67it/s]
541
  7%|β–‹ | 327/4370 [01:06<11:57, 5.63it/s]
542
  8%|β–Š | 328/4370 [01:06<11:54, 5.65it/s]
543
  8%|β–Š | 329/4370 [01:06<12:36, 5.34it/s]
544
  8%|β–Š | 330/4370 [01:06<14:24, 4.67it/s]
545
  8%|β–Š | 331/4370 [01:06<13:43, 4.90it/s]
546
  8%|β–Š | 332/4370 [01:07<14:43, 4.57it/s]
547
  8%|β–Š | 333/4370 [01:07<13:56, 4.83it/s]
548
  8%|β–Š | 334/4370 [01:07<12:54, 5.21it/s]
549
  8%|β–Š | 335/4370 [01:07<12:38, 5.32it/s]
550
  8%|β–Š | 336/4370 [01:07<13:10, 5.10it/s]
551
  8%|β–Š | 337/4370 [01:08<12:53, 5.21it/s]
552
  8%|β–Š | 338/4370 [01:08<12:20, 5.45it/s]
553
  8%|β–Š | 339/4370 [01:08<12:01, 5.59it/s]
554
  8%|β–Š | 340/4370 [01:08<12:22, 5.43it/s]
555
  8%|β–Š | 341/4370 [01:08<12:14, 5.48it/s]
556
  8%|β–Š | 342/4370 [01:08<12:04, 5.56it/s]
557
  8%|β–Š | 343/4370 [01:09<14:07, 4.75it/s]
558
  8%|β–Š | 344/4370 [01:09<13:17, 5.05it/s]
559
  8%|β–Š | 345/4370 [01:09<14:02, 4.78it/s]
560
  8%|β–Š | 346/4370 [01:09<15:17, 4.39it/s]
561
  8%|β–Š | 347/4370 [01:10<14:10, 4.73it/s]
562
  8%|β–Š | 348/4370 [01:10<14:04, 4.76it/s]
563
  8%|β–Š | 349/4370 [01:10<13:58, 4.79it/s]
564
  8%|β–Š | 350/4370 [01:10<14:39, 4.57it/s]
565
  8%|β–Š | 351/4370 [01:10<14:24, 4.65it/s]
566
  8%|β–Š | 352/4370 [01:11<14:48, 4.52it/s]
567
  8%|β–Š | 353/4370 [01:11<15:40, 4.27it/s]
568
  8%|β–Š | 354/4370 [01:11<14:22, 4.65it/s]
569
  8%|β–Š | 355/4370 [01:12<21:07, 3.17it/s]
570
  8%|β–Š | 356/4370 [01:12<19:30, 3.43it/s]
571
  8%|β–Š | 357/4370 [01:12<18:03, 3.71it/s]
572
  8%|β–Š | 358/4370 [01:12<16:24, 4.07it/s]
573
  8%|β–Š | 359/4370 [01:12<15:15, 4.38it/s]
574
  8%|β–Š | 360/4370 [01:13<14:40, 4.55it/s]
575
  8%|β–Š | 361/4370 [01:13<14:47, 4.52it/s]
576
  8%|β–Š | 362/4370 [01:13<14:35, 4.58it/s]
577
  8%|β–Š | 363/4370 [01:13<13:24, 4.98it/s]
578
  8%|β–Š | 364/4370 [01:14<15:51, 4.21it/s]
579
  8%|β–Š | 365/4370 [01:14<14:39, 4.56it/s]
580
  8%|β–Š | 366/4370 [01:14<15:40, 4.26it/s]
581
  8%|β–Š | 367/4370 [01:14<14:59, 4.45it/s]
582
  8%|β–Š | 368/4370 [01:14<13:25, 4.97it/s]
583
  8%|β–Š | 369/4370 [01:15<12:57, 5.14it/s]
584
  8%|β–Š | 370/4370 [01:15<12:21, 5.39it/s]
585
  8%|β–Š | 371/4370 [01:15<14:17, 4.67it/s]
586
  9%|β–Š | 372/4370 [01:15<13:56, 4.78it/s]
587
  9%|β–Š | 373/4370 [01:15<13:01, 5.11it/s]
588
  9%|β–Š | 374/4370 [01:16<16:01, 4.16it/s]
589
  9%|β–Š | 375/4370 [01:16<14:59, 4.44it/s]
590
  9%|β–Š | 376/4370 [01:16<13:31, 4.92it/s]
591
  9%|β–Š | 377/4370 [01:16<12:49, 5.19it/s]
592
  9%|β–Š | 378/4370 [01:16<12:49, 5.19it/s]
593
  9%|β–Š | 379/4370 [01:17<13:49, 4.81it/s]
594
  9%|β–Š | 380/4370 [01:17<13:22, 4.97it/s]
595
  9%|β–Š | 381/4370 [01:17<13:40, 4.86it/s]
596
  9%|β–Š | 382/4370 [01:17<14:12, 4.68it/s]
597
  9%|β–‰ | 383/4370 [01:17<12:48, 5.19it/s]
598
  9%|β–‰ | 384/4370 [01:18<13:08, 5.05it/s]
599
  9%|β–‰ | 385/4370 [01:18<13:33, 4.90it/s]
600
  9%|β–‰ | 386/4370 [01:18<13:35, 4.88it/s]
601
  9%|β–‰ | 387/4370 [01:18<14:55, 4.45it/s]
602
  9%|β–‰ | 388/4370 [01:19<13:31, 4.91it/s]
603
  9%|β–‰ | 389/4370 [01:19<12:17, 5.40it/s]
604
  9%|β–‰ | 390/4370 [01:19<13:00, 5.10it/s]
605
  9%|β–‰ | 391/4370 [01:19<12:16, 5.40it/s]
606
  9%|β–‰ | 392/4370 [01:19<11:54, 5.57it/s]
607
  9%|β–‰ | 393/4370 [01:20<15:09, 4.37it/s]
608
  9%|β–‰ | 394/4370 [01:20<14:43, 4.50it/s]
609
  9%|β–‰ | 395/4370 [01:20<13:39, 4.85it/s]
610
  9%|β–‰ | 396/4370 [01:20<13:40, 4.84it/s]
611
  9%|β–‰ | 397/4370 [01:20<13:20, 4.96it/s]
612
  9%|β–‰ | 398/4370 [01:21<13:27, 4.92it/s]
613
  9%|β–‰ | 399/4370 [01:21<13:08, 5.04it/s]
614
  9%|β–‰ | 400/4370 [01:21<13:02, 5.07it/s]
615
  9%|β–‰ | 401/4370 [01:21<13:21, 4.95it/s]
616
  9%|β–‰ | 402/4370 [01:21<13:17, 4.98it/s]
617
  9%|β–‰ | 403/4370 [01:21<12:43, 5.20it/s]
618
  9%|β–‰ | 404/4370 [01:22<13:02, 5.07it/s]
619
  9%|β–‰ | 405/4370 [01:22<12:05, 5.46it/s]
620
  9%|β–‰ | 406/4370 [01:22<11:16, 5.86it/s]
621
  9%|β–‰ | 407/4370 [01:22<10:41, 6.17it/s]
622
  9%|β–‰ | 408/4370 [01:22<11:49, 5.59it/s]
623
  9%|β–‰ | 409/4370 [01:23<12:47, 5.16it/s]
624
  9%|β–‰ | 410/4370 [01:23<14:15, 4.63it/s]
625
  9%|β–‰ | 411/4370 [01:23<13:57, 4.73it/s]
626
  9%|β–‰ | 412/4370 [01:23<13:41, 4.82it/s]
627
  9%|β–‰ | 413/4370 [01:23<13:26, 4.90it/s]
628
  9%|β–‰ | 414/4370 [01:24<13:05, 5.03it/s]
629
  9%|β–‰ | 415/4370 [01:24<13:16, 4.97it/s]
630
  10%|β–‰ | 416/4370 [01:24<14:25, 4.57it/s]
631
  10%|β–‰ | 417/4370 [01:24<15:14, 4.32it/s]
632
  10%|β–‰ | 418/4370 [01:25<14:02, 4.69it/s]
633
  10%|β–‰ | 419/4370 [01:25<12:50, 5.13it/s]
634
  10%|β–‰ | 420/4370 [01:25<12:20, 5.33it/s]
635
  10%|β–‰ | 421/4370 [01:25<12:30, 5.26it/s]
636
  10%|β–‰ | 422/4370 [01:25<11:48, 5.57it/s]
637
  10%|β–‰ | 423/4370 [01:25<13:00, 5.05it/s]
638
  10%|β–‰ | 424/4370 [01:26<12:50, 5.12it/s]
639
  10%|β–‰ | 425/4370 [01:26<13:31, 4.86it/s]
640
  10%|β–‰ | 426/4370 [01:26<13:01, 5.04it/s]
641
  10%|β–‰ | 427/4370 [01:26<14:21, 4.58it/s]
642
  10%|β–‰ | 428/4370 [01:27<18:00, 3.65it/s]
643
  10%|β–‰ | 429/4370 [01:27<15:39, 4.19it/s]
644
  10%|β–‰ | 430/4370 [01:27<15:31, 4.23it/s]
645
  10%|β–‰ | 431/4370 [01:27<13:59, 4.69it/s]
646
  10%|β–‰ | 432/4370 [01:28<15:00, 4.38it/s]
647
  10%|β–‰ | 433/4370 [01:28<14:16, 4.60it/s]
648
  10%|β–‰ | 434/4370 [01:28<13:14, 4.96it/s]
649
  10%|β–‰ | 435/4370 [01:28<12:42, 5.16it/s]
650
  10%|β–‰ | 436/4370 [01:28<13:00, 5.04it/s]
651
  10%|β–ˆ | 437/4370 [01:28<12:31, 5.23it/s][INFO|trainer.py:811] 2024-09-05 20:48:30,653 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
652
+ [INFO|trainer.py:3819] 2024-09-05 20:48:30,656 >>
653
+ ***** Running Evaluation *****
654
+ [INFO|trainer.py:3821] 2024-09-05 20:48:30,656 >> Num examples = 6946
655
+ [INFO|trainer.py:3824] 2024-09-05 20:48:30,656 >> Batch size = 8
656
+
657
+
658
  0%| | 0/869 [00:00<?, ?it/s]
659
+
660
  1%|▏ | 11/869 [00:00<00:08, 101.23it/s]
661
+
662
  3%|β–Ž | 22/869 [00:00<00:09, 91.51it/s] 
663
+
664
  4%|β–Ž | 32/869 [00:00<00:09, 90.28it/s]
665
+
666
  5%|▍ | 42/869 [00:00<00:09, 91.73it/s]
667
+
668
  6%|β–Œ | 52/869 [00:00<00:08, 92.54it/s]
669
+
670
  7%|β–‹ | 62/869 [00:00<00:08, 92.44it/s]
671
+
672
  8%|β–Š | 72/869 [00:00<00:08, 90.47it/s]
673
+
674
  9%|β–‰ | 82/869 [00:00<00:08, 92.22it/s]
675
+
676
  11%|β–ˆ | 92/869 [00:00<00:08, 92.98it/s]
677
+
678
  12%|β–ˆβ– | 102/869 [00:01<00:08, 94.35it/s]
679
+
680
  13%|β–ˆβ–Ž | 112/869 [00:01<00:08, 92.62it/s]
681
+
682
  14%|β–ˆβ– | 122/869 [00:01<00:08, 91.65it/s]
683
+
684
  15%|β–ˆβ–Œ | 132/869 [00:01<00:08, 91.74it/s]
685
+
686
  16%|β–ˆβ–‹ | 142/869 [00:01<00:08, 90.63it/s]
687
+
688
  17%|β–ˆβ–‹ | 152/869 [00:01<00:07, 90.80it/s]
689
+
690
  19%|β–ˆβ–Š | 162/869 [00:01<00:07, 90.11it/s]
691
+
692
  20%|β–ˆβ–‰ | 172/869 [00:01<00:07, 89.26it/s]
693
+
694
  21%|β–ˆβ–ˆ | 181/869 [00:01<00:07, 88.34it/s]
695
+
696
  22%|β–ˆβ–ˆβ– | 190/869 [00:02<00:07, 87.81it/s]
697
+
698
  23%|β–ˆβ–ˆβ–Ž | 200/869 [00:02<00:07, 90.06it/s]
699
+
700
  24%|β–ˆβ–ˆβ– | 210/869 [00:02<00:07, 91.03it/s]
701
+
702
  25%|β–ˆβ–ˆβ–Œ | 220/869 [00:02<00:07, 92.47it/s]
703
+
704
  26%|β–ˆβ–ˆβ–‹ | 230/869 [00:02<00:06, 91.95it/s]
705
+
706
  28%|β–ˆβ–ˆβ–Š | 240/869 [00:02<00:07, 89.75it/s]
707
+
708
  29%|β–ˆβ–ˆβ–Š | 249/869 [00:02<00:07, 87.37it/s]
709
+
710
  30%|β–ˆβ–ˆβ–‰ | 259/869 [00:02<00:06, 90.63it/s]
711
+
712
  31%|β–ˆβ–ˆβ–ˆ | 269/869 [00:02<00:06, 90.54it/s]
713
+
714
  32%|β–ˆβ–ˆβ–ˆβ– | 279/869 [00:03<00:06, 90.05it/s]
715
+
716
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 289/869 [00:03<00:06, 89.12it/s]
717
+
718
  34%|β–ˆβ–ˆβ–ˆβ– | 298/869 [00:03<00:06, 88.93it/s]
719
+
720
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 307/869 [00:03<00:06, 88.88it/s]
721
+
722
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 317/869 [00:03<00:06, 89.54it/s]
723
+
724
  38%|β–ˆβ–ˆβ–ˆβ–Š | 326/869 [00:03<00:06, 89.59it/s]
725
+
726
  39%|β–ˆβ–ˆβ–ˆβ–Š | 336/869 [00:03<00:05, 91.34it/s]
727
+
728
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 346/869 [00:03<00:05, 89.56it/s]
729
+
730
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 356/869 [00:03<00:05, 90.14it/s]
731
+
732
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 366/869 [00:04<00:05, 92.17it/s]
733
+
734
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 376/869 [00:04<00:05, 88.58it/s]
735
+
736
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 385/869 [00:04<00:05, 88.83it/s]
737
+
738
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 394/869 [00:04<00:05, 87.30it/s]
739
+
740
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 403/869 [00:04<00:05, 87.21it/s]
741
+
742
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 413/869 [00:04<00:05, 90.10it/s]
743
+
744
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 423/869 [00:04<00:04, 90.84it/s]
745
+
746
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 433/869 [00:04<00:04, 91.10it/s]
747
+
748
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 443/869 [00:04<00:04, 87.90it/s]
749
+
750
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 453/869 [00:05<00:04, 90.57it/s]
751
+
752
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 463/869 [00:05<00:04, 89.49it/s]
753
+
754
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 473/869 [00:05<00:04, 91.35it/s]
755
+
756
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 484/869 [00:05<00:04, 93.70it/s]
757
+
758
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 494/869 [00:05<00:04, 90.76it/s]
759
+
760
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 504/869 [00:05<00:04, 88.92it/s]
761
+
762
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 514/869 [00:05<00:03, 90.62it/s]
763
+
764
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 524/869 [00:05<00:03, 87.02it/s]
765
+
766
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 533/869 [00:05<00:03, 87.76it/s]
767
+
768
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 542/869 [00:06<00:03, 85.30it/s]
769
+
770
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 551/869 [00:06<00:03, 86.30it/s]
771
+
772
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 561/869 [00:06<00:03, 88.08it/s]
773
+
774
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 570/869 [00:06<00:03, 88.12it/s]
775
+
776
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 580/869 [00:06<00:03, 88.75it/s]
777
+
778
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 590/869 [00:06<00:03, 89.63it/s]
779
+
780
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 600/869 [00:06<00:02, 91.18it/s]
781
+
782
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 610/869 [00:06<00:02, 90.68it/s]
783
+
784
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 620/869 [00:06<00:02, 90.38it/s]
785
+
786
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 630/869 [00:06<00:02, 91.00it/s]
787
+
788
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 640/869 [00:07<00:02, 90.13it/s]
789
+
790
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 650/869 [00:07<00:02, 90.69it/s]
791
+
792
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 660/869 [00:07<00:02, 90.37it/s]
793
+
794
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 670/869 [00:07<00:02, 91.58it/s]
795
+
796
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 680/869 [00:07<00:02, 93.04it/s]
797
+
798
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 690/869 [00:07<00:02, 86.86it/s]
799
+
800
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 700/869 [00:07<00:01, 88.28it/s]
801
+
802
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 710/869 [00:07<00:01, 90.00it/s]
803
+
804
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 720/869 [00:07<00:01, 89.02it/s]
805
+
806
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 730/869 [00:08<00:01, 90.80it/s]
807
+
808
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 740/869 [00:08<00:01, 91.14it/s]
809
+
810
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 750/869 [00:08<00:01, 90.37it/s]
811
+
812
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 760/869 [00:08<00:01, 91.86it/s]
813
+
814
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 770/869 [00:08<00:01, 89.85it/s]
815
+
816
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 780/869 [00:08<00:01, 84.79it/s]
817
+
818
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 790/869 [00:08<00:00, 87.94it/s]
819
+
820
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 800/869 [00:08<00:00, 90.37it/s]
821
+
822
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 810/869 [00:08<00:00, 91.73it/s]
823
+
824
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 820/869 [00:09<00:00, 90.53it/s]
825
+
826
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 830/869 [00:09<00:00, 93.05it/s]
827
+
828
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 840/869 [00:09<00:00, 93.45it/s]
829
+
830
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 851/869 [00:09<00:00, 95.58it/s]
831
+
832
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 861/869 [00:09<00:00, 92.09it/s]
833
 
834
+
835
 
836
  10%|β–ˆ | 437/4370 [01:42<12:31, 5.23it/s]
837
+
838
+
839
  [INFO|trainer.py:3503] 2024-09-05 20:48:44,033 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-437
840
+ [INFO|configuration_utils.py:472] 2024-09-05 20:48:44,034 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-437/config.json
841
+ [INFO|modeling_utils.py:2799] 2024-09-05 20:48:44,918 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-437/model.safetensors
842
+ [INFO|tokenization_utils_base.py:2684] 2024-09-05 20:48:44,919 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-437/tokenizer_config.json
843
+ [INFO|tokenization_utils_base.py:2693] 2024-09-05 20:48:44,919 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-437/special_tokens_map.json
844
+ [INFO|tokenization_utils_base.py:2684] 2024-09-05 20:48:46,827 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
845
+ [INFO|tokenization_utils_base.py:2693] 2024-09-05 20:48:46,827 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
846
+
847
  10%|β–ˆ | 438/4370 [01:45<5:31:31, 5.06s/it]
848
  10%|β–ˆ | 439/4370 [01:45<3:56:14, 3.61s/it]
849
  10%|β–ˆ | 440/4370 [01:45<2:49:11, 2.58s/it]
850
  10%|β–ˆ | 441/4370 [01:46<2:03:39, 1.89s/it]
851
  10%|β–ˆ | 442/4370 [01:46<1:29:45, 1.37s/it]
852
  10%|β–ˆ | 443/4370 [01:46<1:06:06, 1.01s/it]
853
  10%|β–ˆ | 444/4370 [01:46<50:50, 1.29it/s]
854
  10%|β–ˆ | 445/4370 [01:46<40:42, 1.61it/s]
855
  10%|β–ˆ | 446/4370 [01:47<32:01, 2.04it/s]
856
  10%|β–ˆ | 447/4370 [01:47<25:57, 2.52it/s]
857
  10%|β–ˆ | 448/4370 [01:47<22:16, 2.93it/s]
858
  10%|β–ˆ | 449/4370 [01:47<18:51, 3.47it/s]
859
  10%|β–ˆ | 450/4370 [01:47<17:21, 3.77it/s]
860
  10%|β–ˆ | 451/4370 [01:48<16:41, 3.91it/s]
861
  10%|β–ˆ | 452/4370 [01:48<17:52, 3.65it/s]
862
  10%|β–ˆ | 453/4370 [01:48<16:01, 4.07it/s]
863
  10%|β–ˆ | 454/4370 [01:48<14:31, 4.49it/s]
864
  10%|β–ˆ | 455/4370 [01:48<14:12, 4.59it/s]
865
  10%|β–ˆ | 456/4370 [01:49<13:57, 4.68it/s]
866
  10%|β–ˆ | 457/4370 [01:49<13:45, 4.74it/s]
867
  10%|β–ˆ | 458/4370 [01:49<13:32, 4.82it/s]
868
  11%|β–ˆ | 459/4370 [01:49<14:12, 4.59it/s]
869
  11%|β–ˆ | 460/4370 [01:49<12:33, 5.19it/s]
870
  11%|β–ˆ | 461/4370 [01:50<12:08, 5.37it/s]
871
  11%|β–ˆ | 462/4370 [01:50<12:06, 5.38it/s]
872
  11%|β–ˆ | 463/4370 [01:50<15:08, 4.30it/s]
873
  11%|β–ˆ | 464/4370 [01:50<14:44, 4.42it/s]
874
  11%|β–ˆ | 465/4370 [01:50<13:11, 4.93it/s]
875
  11%|β–ˆ | 466/4370 [01:51<13:08, 4.95it/s]
876
  11%|β–ˆ | 467/4370 [01:51<12:43, 5.11it/s]
877
  11%|β–ˆ | 468/4370 [01:51<12:43, 5.11it/s]
878
  11%|β–ˆ | 469/4370 [01:51<14:53, 4.37it/s]
879
  11%|β–ˆ | 470/4370 [01:52<15:05, 4.31it/s]
880
  11%|β–ˆ | 471/4370 [01:52<15:29, 4.19it/s]
881
  11%|β–ˆ | 472/4370 [01:52<14:31, 4.47it/s]
882
  11%|β–ˆ | 473/4370 [01:52<13:49, 4.70it/s]
883
  11%|β–ˆ | 474/4370 [01:52<12:24, 5.24it/s]
884
  11%|β–ˆ | 475/4370 [01:53<13:27, 4.82it/s]
885
  11%|β–ˆ | 476/4370 [01:53<13:34, 4.78it/s]
886
  11%|β–ˆ | 477/4370 [01:53<12:56, 5.01it/s]
887
  11%|β–ˆ | 478/4370 [01:53<13:10, 4.92it/s]
888
  11%|β–ˆ | 479/4370 [01:53<12:51, 5.04it/s]
889
  11%|β–ˆ | 480/4370 [01:54<12:48, 5.06it/s]
890
  11%|β–ˆ | 481/4370 [01:54<13:03, 4.96it/s]
891
  11%|β–ˆ | 482/4370 [01:54<12:28, 5.19it/s]
892
  11%|β–ˆ | 483/4370 [01:54<12:10, 5.32it/s]
893
  11%|β–ˆ | 484/4370 [01:54<12:40, 5.11it/s]
894
  11%|β–ˆ | 485/4370 [01:55<13:10, 4.92it/s]
895
  11%|β–ˆ | 486/4370 [01:55<13:00, 4.98it/s]
896
  11%|β–ˆ | 487/4370 [01:55<13:17, 4.87it/s]
897
  11%|β–ˆ | 488/4370 [01:55<12:37, 5.13it/s]
898
  11%|β–ˆ | 489/4370 [01:55<12:53, 5.02it/s]
899
  11%|β–ˆ | 490/4370 [01:56<12:48, 5.05it/s]
900
  11%|β–ˆ | 491/4370 [01:56<12:04, 5.35it/s]
901
  11%|β–ˆβ– | 492/4370 [01:56<11:50, 5.46it/s]
902
  11%|β–ˆβ– | 493/4370 [01:56<11:57, 5.40it/s]
903
  11%|β–ˆβ– | 494/4370 [01:56<11:10, 5.78it/s]
904
  11%|β–ˆβ– | 495/4370 [01:56<10:53, 5.93it/s]
905
  11%|β–ˆβ– | 496/4370 [01:57<11:08, 5.79it/s]
906
  11%|β–ˆβ– | 497/4370 [01:57<10:32, 6.12it/s]
907
  11%|β–ˆβ– | 498/4370 [01:57<12:01, 5.36it/s]
908
  11%|β–ˆβ– | 499/4370 [01:57<12:16, 5.26it/s]
909
  11%|β–ˆβ– | 500/4370 [01:57<12:51, 5.01it/s]
910
 
911
  11%|β–ˆβ– | 500/4370 [01:57<12:51, 5.01it/s]
912
  11%|β–ˆβ– | 501/4370 [01:58<13:06, 4.92it/s]
913
  11%|β–ˆβ– | 502/4370 [01:58<13:53, 4.64it/s]
914
  12%|β–ˆβ– | 503/4370 [01:58<13:15, 4.86it/s]
915
  12%|β–ˆβ– | 504/4370 [01:58<13:34, 4.74it/s]
916
  12%|β–ˆβ– | 505/4370 [01:58<13:00, 4.95it/s]
917
  12%|β–ˆβ– | 506/4370 [01:59<12:26, 5.18it/s]
918
  12%|β–ˆβ– | 507/4370 [01:59<11:54, 5.41it/s]
919
  12%|β–ˆβ– | 508/4370 [01:59<12:10, 5.28it/s]
920
  12%|β–ˆβ– | 509/4370 [01:59<11:43, 5.49it/s]
921
  12%|β–ˆβ– | 510/4370 [01:59<12:48, 5.03it/s]
922
  12%|β–ˆβ– | 511/4370 [02:00<12:13, 5.26it/s]
923
  12%|β–ˆβ– | 512/4370 [02:00<11:58, 5.37it/s]
924
  12%|β–ˆβ– | 513/4370 [02:00<11:53, 5.40it/s]
925
  12%|β–ˆβ– | 514/4370 [02:00<12:07, 5.30it/s]
926
  12%|β–ˆβ– | 515/4370 [02:00<11:44, 5.47it/s]
927
  12%|β–ˆβ– | 516/4370 [02:00<10:50, 5.93it/s]
928
  12%|β–ˆβ– | 517/4370 [02:01<10:36, 6.05it/s]
929
  12%|β–ˆβ– | 518/4370 [02:01<10:51, 5.91it/s]
930
  12%|β–ˆβ– | 519/4370 [02:01<10:24, 6.16it/s]
931
  12%|β–ˆβ– | 520/4370 [02:01<10:17, 6.23it/s]
932
  12%|β–ˆβ– | 521/4370 [02:01<10:32, 6.09it/s]
933
  12%|β–ˆβ– | 522/4370 [02:01<12:40, 5.06it/s]
934
  12%|β–ˆβ– | 523/4370 [02:02<13:08, 4.88it/s]
935
  12%|β–ˆβ– | 524/4370 [02:02<12:18, 5.21it/s]
936
  12%|β–ˆβ– | 525/4370 [02:02<12:10, 5.26it/s]
937
  12%|β–ˆβ– | 526/4370 [02:02<12:21, 5.19it/s]
938
  12%|β–ˆβ– | 527/4370 [02:02<11:18, 5.66it/s]
939
  12%|β–ˆβ– | 528/4370 [02:03<11:08, 5.75it/s]
940
  12%|β–ˆβ– | 529/4370 [02:03<13:55, 4.59it/s]
941
  12%|β–ˆβ– | 530/4370 [02:03<13:25, 4.77it/s]
942
  12%|β–ˆβ– | 531/4370 [02:03<12:38, 5.06it/s]
943
  12%|β–ˆβ– | 532/4370 [02:03<12:11, 5.24it/s]
944
  12%|β–ˆβ– | 533/4370 [02:04<11:58, 5.34it/s]
945
  12%|β–ˆβ– | 534/4370 [02:04<11:35, 5.52it/s]
946
  12%|β–ˆβ– | 535/4370 [02:04<11:43, 5.45it/s]
947
  12%|β–ˆβ– | 536/4370 [02:04<12:30, 5.11it/s]
948
  12%|β–ˆβ– | 537/4370 [02:04<13:41, 4.66it/s]
949
  12%|β–ˆβ– | 538/4370 [02:05<12:49, 4.98it/s]
950
  12%|β–ˆβ– | 539/4370 [02:05<12:40, 5.04it/s]
951
  12%|β–ˆβ– | 540/4370 [02:05<12:03, 5.30it/s]
952
  12%|β–ˆβ– | 541/4370 [02:05<11:44, 5.44it/s]
953
  12%|β–ˆβ– | 542/4370 [02:05<11:32, 5.52it/s]
954
  12%|β–ˆβ– | 543/4370 [02:05<11:43, 5.44it/s]
955
  12%|β–ˆβ– | 544/4370 [02:06<12:52, 4.95it/s]
956
  12%|β–ˆβ– | 545/4370 [02:06<14:00, 4.55it/s]
957
  12%|β–ˆβ– | 546/4370 [02:06<13:39, 4.67it/s]
958
  13%|β–ˆβ–Ž | 547/4370 [02:06<13:14, 4.81it/s]
959
  13%|β–ˆβ–Ž | 548/4370 [02:07<14:12, 4.49it/s]
960
  13%|β–ˆβ–Ž | 549/4370 [02:07<12:46, 4.99it/s]
961
  13%|β–ˆβ–Ž | 550/4370 [02:07<12:04, 5.28it/s]
962
  13%|β–ˆβ–Ž | 551/4370 [02:07<13:56, 4.57it/s]
963
  13%|β–ˆβ–Ž | 552/4370 [02:07<14:35, 4.36it/s]
964
  13%|β–ˆβ–Ž | 553/4370 [02:08<13:14, 4.80it/s]
965
  13%|β–ˆβ–Ž | 554/4370 [02:08<14:49, 4.29it/s]
966
  13%|β–ˆβ–Ž | 555/4370 [02:08<13:40, 4.65it/s]
967
  13%|β–ˆβ–Ž | 556/4370 [02:08<13:31, 4.70it/s]
968
  13%|β–ˆβ–Ž | 557/4370 [02:09<13:50, 4.59it/s]
969
  13%|β–ˆβ–Ž | 558/4370 [02:09<14:19, 4.44it/s]
970
  13%|β–ˆβ–Ž | 559/4370 [02:09<14:10, 4.48it/s]
971
  13%|β–ˆβ–Ž | 560/4370 [02:09<13:48, 4.60it/s]
972
  13%|β–ˆβ–Ž | 561/4370 [02:09<12:40, 5.01it/s]
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
3
+ size 5240
vocab.txt ADDED
The diff for this file is too large to render. See raw diff