Rodrigo1771 commited on
Commit
65314ff
1 Parent(s): 934202e

Training in progress, epoch 0

Browse files
README.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
+ tags:
5
+ - token-classification
6
+ - generated_from_trainer
7
+ datasets:
8
+ - Rodrigo1771/combined-train-drugtemist-dev-ner
9
+ metrics:
10
+ - precision
11
+ - recall
12
+ - f1
13
+ - accuracy
14
+ model-index:
15
+ - name: output
16
+ results:
17
+ - task:
18
+ name: Token Classification
19
+ type: token-classification
20
+ dataset:
21
+ name: Rodrigo1771/combined-train-drugtemist-dev-ner
22
+ type: Rodrigo1771/combined-train-drugtemist-dev-ner
23
+ config: CombinedTrainDrugTEMISTDevNER
24
+ split: validation
25
+ args: CombinedTrainDrugTEMISTDevNER
26
+ metrics:
27
+ - name: Precision
28
+ type: precision
29
+ value: 0.09532555790247038
30
+ - name: Recall
31
+ type: recall
32
+ value: 0.9540441176470589
33
+ - name: F1
34
+ type: f1
35
+ value: 0.17333222008850296
36
+ - name: Accuracy
37
+ type: accuracy
38
+ value: 0.7932840841995413
39
+ ---
40
+
41
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
42
+ should probably proofread and complete it, then remove this comment. -->
43
+
44
+ # output
45
+
46
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/combined-train-drugtemist-dev-ner dataset.
47
+ It achieves the following results on the evaluation set:
48
+ - Loss: 1.0503
49
+ - Precision: 0.0953
50
+ - Recall: 0.9540
51
+ - F1: 0.1733
52
+ - Accuracy: 0.7933
53
+
54
+ ## Model description
55
+
56
+ More information needed
57
+
58
+ ## Intended uses & limitations
59
+
60
+ More information needed
61
+
62
+ ## Training and evaluation data
63
+
64
+ More information needed
65
+
66
+ ## Training procedure
67
+
68
+ ### Training hyperparameters
69
+
70
+ The following hyperparameters were used during training:
71
+ - learning_rate: 5e-05
72
+ - train_batch_size: 32
73
+ - eval_batch_size: 8
74
+ - seed: 42
75
+ - gradient_accumulation_steps: 2
76
+ - total_train_batch_size: 64
77
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
78
+ - lr_scheduler_type: linear
79
+ - num_epochs: 10.0
80
+
81
+ ### Training results
82
+
83
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
84
+ |:-------------:|:------:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
85
+ | No log | 0.9988 | 425 | 0.6611 | 0.0883 | 0.9292 | 0.1613 | 0.7850 |
86
+ | 0.3349 | 2.0 | 851 | 0.9204 | 0.0787 | 0.9301 | 0.1451 | 0.7551 |
87
+ | 0.1788 | 2.9988 | 1276 | 0.9545 | 0.0844 | 0.9329 | 0.1549 | 0.7645 |
88
+ | 0.1227 | 4.0 | 1702 | 1.0924 | 0.0885 | 0.9412 | 0.1618 | 0.7692 |
89
+ | 0.0856 | 4.9988 | 2127 | 1.0503 | 0.0953 | 0.9540 | 0.1733 | 0.7933 |
90
+ | 0.0597 | 6.0 | 2553 | 1.2642 | 0.0912 | 0.9449 | 0.1663 | 0.7788 |
91
+ | 0.0597 | 6.9988 | 2978 | 1.3262 | 0.0928 | 0.9485 | 0.1690 | 0.7829 |
92
+ | 0.0458 | 8.0 | 3404 | 1.3698 | 0.0926 | 0.9522 | 0.1688 | 0.7849 |
93
+ | 0.0343 | 8.9988 | 3829 | 1.4433 | 0.0907 | 0.9449 | 0.1655 | 0.7822 |
94
+ | 0.0292 | 9.9882 | 4250 | 1.4862 | 0.0914 | 0.9458 | 0.1667 | 0.7821 |
95
+
96
+
97
+ ### Framework versions
98
+
99
+ - Transformers 4.42.4
100
+ - Pytorch 2.4.0+cu121
101
+ - Datasets 2.21.0
102
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.988249118683902,
3
+ "eval_accuracy": 0.7932840841995413,
4
+ "eval_f1": 0.17333222008850296,
5
+ "eval_loss": 1.0502684116363525,
6
+ "eval_precision": 0.09532555790247038,
7
+ "eval_recall": 0.9540441176470589,
8
+ "eval_runtime": 14.6851,
9
+ "eval_samples": 6810,
10
+ "eval_samples_per_second": 463.735,
11
+ "eval_steps_per_second": 58.018,
12
+ "predict_accuracy": 0.8808544760053879,
13
+ "predict_f1": 0.24078298623886968,
14
+ "predict_loss": 0.62894606590271,
15
+ "predict_precision": 0.1378032345013477,
16
+ "predict_recall": 0.9528246942341293,
17
+ "predict_runtime": 29.8715,
18
+ "predict_samples_per_second": 489.229,
19
+ "predict_steps_per_second": 61.162,
20
+ "total_flos": 1.2649810588547778e+16,
21
+ "train_loss": 0.10639642311544979,
22
+ "train_runtime": 1208.2019,
23
+ "train_samples": 27229,
24
+ "train_samples_per_second": 225.368,
25
+ "train_steps_per_second": 3.518
26
+ }
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "ner",
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-ENFERMEDAD",
18
+ "2": "I-ENFERMEDAD"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "B-ENFERMEDAD": 1,
24
+ "I-ENFERMEDAD": 2,
25
+ "O": 0
26
+ },
27
+ "layer_norm_eps": 1e-05,
28
+ "max_position_embeddings": 514,
29
+ "model_type": "roberta",
30
+ "num_attention_heads": 12,
31
+ "num_hidden_layers": 12,
32
+ "pad_token_id": 1,
33
+ "position_embedding_type": "absolute",
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.42.4",
36
+ "type_vocab_size": 1,
37
+ "use_cache": true,
38
+ "vocab_size": 50262
39
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.988249118683902,
3
+ "eval_accuracy": 0.7932840841995413,
4
+ "eval_f1": 0.17333222008850296,
5
+ "eval_loss": 1.0502684116363525,
6
+ "eval_precision": 0.09532555790247038,
7
+ "eval_recall": 0.9540441176470589,
8
+ "eval_runtime": 14.6851,
9
+ "eval_samples": 6810,
10
+ "eval_samples_per_second": 463.735,
11
+ "eval_steps_per_second": 58.018
12
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e30fd99ceae984d59d9b64cfef736066e9f374b1057df626cb9a78d048370605
3
+ size 496244100
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.8808544760053879,
3
+ "predict_f1": 0.24078298623886968,
4
+ "predict_loss": 0.62894606590271,
5
+ "predict_precision": 0.1378032345013477,
6
+ "predict_recall": 0.9528246942341293,
7
+ "predict_runtime": 29.8715,
8
+ "predict_samples_per_second": 489.229,
9
+ "predict_steps_per_second": 61.162
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tb/events.out.tfevents.1725045346.6b97e535edda.2908.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e576d6fc5f437e9cdba3770a03c7980ab62c145def88b46a7b1d4e68f13bfde9
3
+ size 6546
tb/events.out.tfevents.1725046129.6b97e535edda.6370.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e94268d839c5a3c21781840b052643e729f1bd38c4ed1942a53d09ddd89bfc
3
+ size 12153
tb/events.out.tfevents.1725047358.6b97e535edda.6370.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6babdcbcafd1e6dd843214457a56f608115a0ebceecc401a4e103c0ed26fb36d
3
+ size 560
tb/events.out.tfevents.1725047806.6b97e535edda.13440.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1285ee11483b82cb81d6c4b8c4ba4a32b26fb1a1a6581781e536e31fc708b8
3
+ size 12153
tb/events.out.tfevents.1725049039.6b97e535edda.13440.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f105690f828d701992cc9bf50b2d6c540b9476f5cd4584b9d782e2db879b27e7
3
+ size 560
tb/events.out.tfevents.1725049548.6b97e535edda.20735.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c28dc020c475618b75acd5fee27c7c0952210b4e69cf433112b32cd4a3372a1
3
+ size 5587
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50261": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "max_len": 512,
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
train.log ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/4250 [00:00<?, ?it/s]
1
  0%| | 1/4250 [00:01<1:18:15, 1.11s/it]
2
  0%| | 2/4250 [00:01<40:12, 1.76it/s]
3
  0%| | 3/4250 [00:01<28:24, 2.49it/s]
4
  0%| | 4/4250 [00:01<22:38, 3.13it/s]
5
  0%| | 5/4250 [00:01<20:43, 3.41it/s]
6
  0%| | 6/4250 [00:02<18:37, 3.80it/s]
7
  0%| | 7/4250 [00:02<17:55, 3.94it/s]
8
  0%| | 8/4250 [00:02<16:59, 4.16it/s]
9
  0%| | 9/4250 [00:02<17:55, 3.94it/s]
10
  0%| | 10/4250 [00:03<17:19, 4.08it/s]
11
  0%| | 11/4250 [00:03<16:45, 4.21it/s]
12
  0%| | 12/4250 [00:03<15:21, 4.60it/s]
13
  0%| | 13/4250 [00:03<17:23, 4.06it/s]
14
  0%| | 14/4250 [00:04<16:58, 4.16it/s]
15
  0%| | 15/4250 [00:04<16:38, 4.24it/s]
16
  0%| | 16/4250 [00:04<15:46, 4.47it/s]
17
  0%| | 17/4250 [00:04<14:44, 4.78it/s]
18
  0%| | 18/4250 [00:04<14:27, 4.88it/s]
19
  0%| | 19/4250 [00:05<15:16, 4.62it/s]
20
  0%| | 20/4250 [00:05<14:22, 4.90it/s]
21
  0%| | 21/4250 [00:05<16:37, 4.24it/s]
22
  1%| | 22/4250 [00:05<15:57, 4.42it/s]
23
  1%| | 23/4250 [00:05<15:28, 4.55it/s]
24
  1%| | 24/4250 [00:06<15:18, 4.60it/s]
25
  1%| | 25/4250 [00:06<16:46, 4.20it/s]
26
  1%| | 26/4250 [00:06<15:14, 4.62it/s]
27
  1%| | 27/4250 [00:06<15:15, 4.61it/s]
28
  1%| | 28/4250 [00:07<14:02, 5.01it/s]
29
  1%| | 29/4250 [00:07<13:43, 5.12it/s]
30
  1%| | 30/4250 [00:07<16:48, 4.18it/s]
31
  1%| | 31/4250 [00:07<15:45, 4.46it/s]
32
  1%| | 32/4250 [00:07<16:53, 4.16it/s]
33
  1%| | 33/4250 [00:08<16:20, 4.30it/s]
34
  1%| | 34/4250 [00:08<16:34, 4.24it/s]
35
  1%| | 35/4250 [00:08<15:53, 4.42it/s]
36
  1%| | 36/4250 [00:08<16:15, 4.32it/s]
37
  1%| | 37/4250 [00:09<15:17, 4.59it/s]
38
  1%| | 38/4250 [00:09<15:07, 4.64it/s]
39
  1%| | 39/4250 [00:09<16:28, 4.26it/s]
40
  1%| | 40/4250 [00:09<15:19, 4.58it/s]
41
  1%| | 41/4250 [00:09<14:59, 4.68it/s]
42
  1%| | 42/4250 [00:10<15:06, 4.64it/s]
43
  1%| | 43/4250 [00:10<17:34, 3.99it/s]
44
  1%| | 44/4250 [00:11<27:51, 2.52it/s]
45
  1%| | 45/4250 [00:11<23:41, 2.96it/s]
46
  1%| | 46/4250 [00:11<22:36, 3.10it/s]
47
  1%| | 47/4250 [00:11<19:58, 3.51it/s]
48
  1%| | 48/4250 [00:12<18:45, 3.73it/s]
49
  1%| | 49/4250 [00:12<16:12, 4.32it/s]
50
  1%| | 50/4250 [00:12<16:45, 4.18it/s]
51
  1%| | 51/4250 [00:12<16:59, 4.12it/s]
52
  1%| | 52/4250 [00:13<16:34, 4.22it/s]
53
  1%| | 53/4250 [00:13<17:18, 4.04it/s]
54
  1%|▏ | 54/4250 [00:13<20:58, 3.33it/s]
55
  1%|▏ | 55/4250 [00:13<19:06, 3.66it/s]
56
  1%|▏ | 56/4250 [00:14<18:27, 3.79it/s]
57
  1%|▏ | 57/4250 [00:14<17:06, 4.08it/s]
58
  1%|▏ | 58/4250 [00:14<15:39, 4.46it/s]
59
  1%|▏ | 59/4250 [00:14<15:05, 4.63it/s]
60
  1%|▏ | 60/4250 [00:14<14:49, 4.71it/s]
61
  1%|▏ | 61/4250 [00:15<15:52, 4.40it/s]
62
  1%|▏ | 62/4250 [00:15<15:58, 4.37it/s]
63
  1%|▏ | 63/4250 [00:15<15:02, 4.64it/s]
64
  2%|▏ | 64/4250 [00:15<14:48, 4.71it/s]
65
  2%|▏ | 65/4250 [00:16<15:34, 4.48it/s]
66
  2%|▏ | 66/4250 [00:16<16:39, 4.19it/s]
67
  2%|▏ | 67/4250 [00:16<16:33, 4.21it/s]
68
  2%|▏ | 68/4250 [00:17<20:48, 3.35it/s]
69
  2%|▏ | 69/4250 [00:17<18:35, 3.75it/s]
70
  2%|▏ | 70/4250 [00:17<19:57, 3.49it/s]
71
  2%|▏ | 71/4250 [00:17<18:59, 3.67it/s]
72
  2%|▏ | 72/4250 [00:18<17:43, 3.93it/s]
73
  2%|▏ | 73/4250 [00:18<17:16, 4.03it/s]
74
  2%|▏ | 74/4250 [00:18<17:31, 3.97it/s]
75
  2%|▏ | 75/4250 [00:18<15:35, 4.46it/s]
76
  2%|▏ | 76/4250 [00:18<16:09, 4.30it/s]
77
  2%|▏ | 77/4250 [00:19<15:16, 4.55it/s]
78
  2%|▏ | 78/4250 [00:19<14:19, 4.85it/s]
79
  2%|▏ | 79/4250 [00:19<14:56, 4.65it/s]
80
  2%|▏ | 80/4250 [00:19<15:15, 4.56it/s]
81
  2%|▏ | 81/4250 [00:19<15:31, 4.47it/s]
82
  2%|▏ | 82/4250 [00:20<14:46, 4.70it/s]
83
  2%|▏ | 83/4250 [00:20<16:51, 4.12it/s]
84
  2%|▏ | 84/4250 [00:20<15:25, 4.50it/s]
85
  2%|▏ | 85/4250 [00:20<15:09, 4.58it/s]
86
  2%|▏ | 86/4250 [00:21<15:01, 4.62it/s]
87
  2%|▏ | 87/4250 [00:21<15:03, 4.61it/s]
88
  2%|▏ | 88/4250 [00:21<13:40, 5.07it/s]
89
  2%|▏ | 89/4250 [00:21<13:52, 5.00it/s]
90
  2%|▏ | 90/4250 [00:21<16:14, 4.27it/s]
91
  2%|▏ | 91/4250 [00:22<14:35, 4.75it/s]
92
  2%|▏ | 92/4250 [00:22<14:04, 4.93it/s]
93
  2%|▏ | 93/4250 [00:22<13:20, 5.19it/s]
94
  2%|▏ | 94/4250 [00:22<13:59, 4.95it/s]
95
  2%|▏ | 95/4250 [00:22<15:09, 4.57it/s]
96
  2%|▏ | 96/4250 [00:23<16:58, 4.08it/s]
97
  2%|▏ | 97/4250 [00:23<15:55, 4.34it/s]
98
  2%|▏ | 98/4250 [00:23<14:47, 4.68it/s]
99
  2%|▏ | 99/4250 [00:23<14:10, 4.88it/s]
100
  2%|▏ | 100/4250 [00:24<14:30, 4.77it/s]
101
  2%|▏ | 101/4250 [00:24<18:02, 3.83it/s]
102
  2%|▏ | 102/4250 [00:24<20:27, 3.38it/s]
103
  2%|▏ | 103/4250 [00:25<18:31, 3.73it/s]
104
  2%|▏ | 104/4250 [00:25<17:01, 4.06it/s]
105
  2%|▏ | 105/4250 [00:25<16:41, 4.14it/s]
106
  2%|▏ | 106/4250 [00:25<16:12, 4.26it/s]
107
  3%|▎ | 107/4250 [00:25<15:58, 4.32it/s]
108
  3%|▎ | 108/4250 [00:26<19:11, 3.60it/s]
109
  3%|▎ | 109/4250 [00:26<18:38, 3.70it/s]
110
  3%|▎ | 110/4250 [00:26<17:15, 4.00it/s]
111
  3%|▎ | 111/4250 [00:26<15:36, 4.42it/s]
112
  3%|▎ | 112/4250 [00:27<16:11, 4.26it/s]
113
  3%|▎ | 113/4250 [00:27<15:49, 4.36it/s]
114
  3%|▎ | 114/4250 [00:27<14:38, 4.71it/s]
115
  3%|▎ | 115/4250 [00:27<14:08, 4.87it/s]
116
  3%|▎ | 116/4250 [00:28<15:38, 4.40it/s]
117
  3%|▎ | 117/4250 [00:28<15:11, 4.54it/s]
118
  3%|▎ | 118/4250 [00:28<14:46, 4.66it/s]
119
  3%|▎ | 119/4250 [00:28<15:08, 4.55it/s]
120
  3%|▎ | 120/4250 [00:28<14:14, 4.83it/s]
121
  3%|▎ | 121/4250 [00:29<14:19, 4.80it/s]
122
  3%|▎ | 122/4250 [00:29<20:15, 3.40it/s]
123
  3%|▎ | 123/4250 [00:29<18:33, 3.71it/s]
124
  3%|▎ | 124/4250 [00:29<16:55, 4.06it/s]
125
  3%|▎ | 125/4250 [00:30<17:07, 4.01it/s]
126
  3%|▎ | 126/4250 [00:30<15:33, 4.42it/s]
127
  3%|▎ | 127/4250 [00:30<14:50, 4.63it/s]
128
  3%|▎ | 128/4250 [00:30<14:17, 4.81it/s]
129
  3%|▎ | 129/4250 [00:31<17:35, 3.90it/s]
130
  3%|▎ | 130/4250 [00:31<17:02, 4.03it/s]
131
  3%|▎ | 131/4250 [00:31<15:32, 4.42it/s]
132
  3%|▎ | 132/4250 [00:31<14:40, 4.68it/s]
133
  3%|▎ | 133/4250 [00:31<15:39, 4.38it/s]
134
  3%|▎ | 134/4250 [00:32<15:20, 4.47it/s]
135
  3%|▎ | 135/4250 [00:32<15:08, 4.53it/s]
136
  3%|▎ | 136/4250 [00:32<15:13, 4.50it/s]
137
  3%|▎ | 137/4250 [00:32<15:00, 4.57it/s]
138
  3%|▎ | 138/4250 [00:33<14:29, 4.73it/s]
139
  3%|▎ | 139/4250 [00:33<14:58, 4.58it/s]
140
  3%|▎ | 140/4250 [00:33<15:55, 4.30it/s]
141
  3%|▎ | 141/4250 [00:33<15:59, 4.28it/s]
142
  3%|▎ | 142/4250 [00:33<15:00, 4.56it/s]
143
  3%|▎ | 143/4250 [00:34<14:38, 4.67it/s]
144
  3%|▎ | 144/4250 [00:34<14:27, 4.73it/s]
145
  3%|▎ | 145/4250 [00:34<16:59, 4.03it/s]
146
  3%|▎ | 146/4250 [00:35<22:15, 3.07it/s]
147
  3%|▎ | 147/4250 [00:35<19:50, 3.45it/s]
148
  3%|▎ | 148/4250 [00:35<19:43, 3.47it/s]
149
  4%|▎ | 149/4250 [00:35<18:58, 3.60it/s]
150
  4%|▎ | 150/4250 [00:36<17:38, 3.87it/s]
151
  4%|▎ | 151/4250 [00:36<16:10, 4.23it/s]
152
  4%|▎ | 152/4250 [00:36<17:03, 4.00it/s]
153
  4%|▎ | 153/4250 [00:36<16:24, 4.16it/s]
154
  4%|▎ | 154/4250 [00:37<15:22, 4.44it/s]
155
  4%|▎ | 155/4250 [00:37<14:32, 4.70it/s]
156
  4%|▎ | 156/4250 [00:37<14:50, 4.60it/s]
157
  4%|▎ | 157/4250 [00:37<14:30, 4.70it/s]
158
  4%|▎ | 158/4250 [00:37<17:37, 3.87it/s]
159
  4%|▎ | 159/4250 [00:38<18:35, 3.67it/s]
160
  4%|▍ | 160/4250 [00:38<17:00, 4.01it/s]
161
  4%|▍ | 161/4250 [00:38<16:02, 4.25it/s]
162
  4%|▍ | 162/4250 [00:38<14:55, 4.56it/s]
163
  4%|▍ | 163/4250 [00:39<14:47, 4.60it/s]
164
  4%|▍ | 164/4250 [00:39<18:40, 3.65it/s]
165
  4%|▍ | 165/4250 [00:39<18:13, 3.74it/s]
166
  4%|▍ | 166/4250 [00:40<18:24, 3.70it/s]
167
  4%|▍ | 167/4250 [00:40<17:29, 3.89it/s]
168
  4%|▍ | 168/4250 [00:40<15:56, 4.27it/s]
169
  4%|▍ | 169/4250 [00:40<15:32, 4.38it/s]
170
  4%|▍ | 170/4250 [00:40<15:28, 4.39it/s]
171
  4%|▍ | 171/4250 [00:41<16:01, 4.24it/s]
172
  4%|▍ | 172/4250 [00:41<15:13, 4.46it/s]
173
  4%|▍ | 173/4250 [00:41<18:22, 3.70it/s]
174
  4%|▍ | 174/4250 [00:41<16:38, 4.08it/s]
175
  4%|▍ | 175/4250 [00:42<19:24, 3.50it/s]
176
  4%|▍ | 176/4250 [00:42<19:20, 3.51it/s]
177
  4%|▍ | 177/4250 [00:42<17:04, 3.98it/s]
178
  4%|▍ | 178/4250 [00:42<16:07, 4.21it/s]
179
  4%|▍ | 179/4250 [00:43<15:41, 4.32it/s]
180
  4%|▍ | 180/4250 [00:43<14:28, 4.69it/s]
181
  4%|▍ | 181/4250 [00:43<14:38, 4.63it/s]
182
  4%|▍ | 182/4250 [00:43<17:11, 3.94it/s]
183
  4%|▍ | 183/4250 [00:44<16:36, 4.08it/s]
184
  4%|▍ | 184/4250 [00:44<16:28, 4.11it/s]
185
  4%|▍ | 185/4250 [00:44<15:07, 4.48it/s]
186
  4%|▍ | 186/4250 [00:44<14:39, 4.62it/s]
187
  4%|▍ | 187/4250 [00:44<13:47, 4.91it/s]
188
  4%|▍ | 188/4250 [00:45<14:41, 4.61it/s]
189
  4%|��� | 189/4250 [00:45<14:11, 4.77it/s]
190
  4%|▍ | 190/4250 [00:45<13:45, 4.92it/s]
191
  4%|▍ | 191/4250 [00:45<14:23, 4.70it/s]
192
  5%|▍ | 192/4250 [00:45<14:11, 4.76it/s]
193
  5%|▍ | 193/4250 [00:46<13:55, 4.86it/s]
194
  5%|▍ | 194/4250 [00:46<16:42, 4.05it/s]
195
  5%|▍ | 195/4250 [00:46<15:38, 4.32it/s]
196
  5%|▍ | 196/4250 [00:46<14:26, 4.68it/s]
197
  5%|▍ | 197/4250 [00:47<14:55, 4.53it/s]
198
  5%|▍ | 198/4250 [00:47<15:19, 4.41it/s]
199
  5%|▍ | 199/4250 [00:47<15:25, 4.38it/s]
200
  5%|▍ | 200/4250 [00:47<15:35, 4.33it/s]
201
  5%|▍ | 201/4250 [00:48<15:14, 4.43it/s]
202
  5%|▍ | 202/4250 [00:48<15:31, 4.35it/s]
203
  5%|▍ | 203/4250 [00:48<14:26, 4.67it/s]
204
  5%|▍ | 204/4250 [00:48<13:47, 4.89it/s]
205
  5%|▍ | 205/4250 [00:48<14:44, 4.57it/s]
206
  5%|▍ | 206/4250 [00:49<14:29, 4.65it/s]
207
  5%|▍ | 207/4250 [00:49<16:45, 4.02it/s]
208
  5%|▍ | 208/4250 [00:49<15:19, 4.40it/s]
209
  5%|▍ | 209/4250 [00:49<15:12, 4.43it/s]
210
  5%|▍ | 210/4250 [00:49<14:01, 4.80it/s]
211
  5%|▍ | 211/4250 [00:50<15:09, 4.44it/s]
212
  5%|▍ | 212/4250 [00:50<14:26, 4.66it/s]
213
  5%|▌ | 213/4250 [00:50<17:31, 3.84it/s]
214
  5%|▌ | 214/4250 [00:50<15:24, 4.37it/s]
215
  5%|▌ | 215/4250 [00:51<15:17, 4.40it/s]
216
  5%|▌ | 216/4250 [00:51<19:04, 3.52it/s]
217
  5%|▌ | 217/4250 [00:51<17:03, 3.94it/s]
218
  5%|▌ | 218/4250 [00:51<15:56, 4.21it/s]
219
  5%|▌ | 219/4250 [00:52<16:35, 4.05it/s]
220
  5%|▌ | 220/4250 [00:52<16:40, 4.03it/s]
221
  5%|▌ | 221/4250 [00:52<18:41, 3.59it/s]
222
  5%|▌ | 222/4250 [00:53<20:01, 3.35it/s]
223
  5%|▌ | 223/4250 [00:53<18:26, 3.64it/s]
224
  5%|▌ | 224/4250 [00:53<16:31, 4.06it/s]
225
  5%|▌ | 225/4250 [00:53<15:07, 4.43it/s]
226
  5%|▌ | 226/4250 [00:53<14:23, 4.66it/s]
227
  5%|▌ | 227/4250 [00:54<14:01, 4.78it/s]
228
  5%|▌ | 228/4250 [00:54<13:47, 4.86it/s]
229
  5%|▌ | 229/4250 [00:54<12:58, 5.17it/s]
230
  5%|▌ | 230/4250 [00:54<14:21, 4.67it/s]
231
  5%|▌ | 231/4250 [00:54<13:45, 4.87it/s]
232
  5%|▌ | 232/4250 [00:55<13:35, 4.93it/s]
233
  5%|▌ | 233/4250 [00:55<13:23, 5.00it/s]
234
  6%|▌ | 234/4250 [00:55<15:41, 4.26it/s]
235
  6%|▌ | 235/4250 [00:56<20:37, 3.24it/s]
236
  6%|▌ | 236/4250 [00:56<21:51, 3.06it/s]
237
  6%|▌ | 237/4250 [00:56<19:42, 3.39it/s]
238
  6%|▌ | 238/4250 [00:56<17:25, 3.84it/s]
239
  6%|▌ | 239/4250 [00:57<17:28, 3.83it/s]
240
  6%|▌ | 240/4250 [00:57<19:56, 3.35it/s]
241
  6%|▌ | 241/4250 [00:57<17:57, 3.72it/s]
242
  6%|▌ | 242/4250 [00:57<16:26, 4.06it/s]
243
  6%|▌ | 243/4250 [00:58<15:57, 4.19it/s]
244
  6%|▌ | 244/4250 [00:58<15:58, 4.18it/s]
245
  6%|▌ | 245/4250 [00:58<15:56, 4.19it/s]
246
  6%|▌ | 246/4250 [00:59<24:58, 2.67it/s]
247
  6%|▌ | 247/4250 [00:59<22:20, 2.99it/s]
248
  6%|▌ | 248/4250 [00:59<19:38, 3.40it/s]
249
  6%|▌ | 249/4250 [01:00<23:51, 2.80it/s]
250
  6%|▌ | 250/4250 [01:00<25:17, 2.64it/s]
251
  6%|▌ | 251/4250 [01:01<22:48, 2.92it/s]
252
  6%|▌ | 252/4250 [01:01<19:31, 3.41it/s]
253
  6%|▌ | 253/4250 [01:01<17:13, 3.87it/s]
254
  6%|▌ | 254/4250 [01:01<17:53, 3.72it/s]
255
  6%|▌ | 255/4250 [01:01<18:10, 3.66it/s]
256
  6%|▌ | 256/4250 [01:02<17:55, 3.71it/s]
257
  6%|▌ | 257/4250 [01:02<16:43, 3.98it/s]
258
  6%|▌ | 258/4250 [01:02<15:35, 4.27it/s]
259
  6%|▌ | 259/4250 [01:02<14:28, 4.59it/s]
260
  6%|▌ | 260/4250 [01:03<15:44, 4.22it/s]
261
  6%|▌ | 261/4250 [01:03<15:00, 4.43it/s]
262
  6%|▌ | 262/4250 [01:03<13:52, 4.79it/s]
263
  6%|▌ | 263/4250 [01:03<12:53, 5.15it/s]
264
  6%|▌ | 264/4250 [01:03<14:53, 4.46it/s]
265
  6%|▌ | 265/4250 [01:04<15:50, 4.19it/s]
266
  6%|▋ | 266/4250 [01:04<15:46, 4.21it/s]
267
  6%|▋ | 267/4250 [01:04<15:43, 4.22it/s]
268
  6%|▋ | 268/4250 [01:04<14:47, 4.49it/s]
269
  6%|▋ | 269/4250 [01:05<14:19, 4.63it/s]
270
  6%|▋ | 270/4250 [01:05<16:43, 3.97it/s]
271
  6%|▋ | 271/4250 [01:05<15:07, 4.38it/s]
272
  6%|▋ | 272/4250 [01:05<14:35, 4.54it/s]
273
  6%|▋ | 273/4250 [01:05<14:08, 4.69it/s]
274
  6%|▋ | 274/4250 [01:06<13:43, 4.83it/s]
275
  6%|▋ | 275/4250 [01:06<13:31, 4.90it/s]
276
  6%|▋ | 276/4250 [01:06<12:58, 5.11it/s]
277
  7%|▋ | 277/4250 [01:06<13:41, 4.84it/s]
278
  7%|▋ | 278/4250 [01:06<13:31, 4.89it/s]
279
  7%|▋ | 279/4250 [01:07<14:09, 4.67it/s]
280
  7%|▋ | 280/4250 [01:07<17:33, 3.77it/s]
281
  7%|▋ | 281/4250 [01:07<16:06, 4.10it/s]
282
  7%|▋ | 282/4250 [01:07<15:58, 4.14it/s]
283
  7%|▋ | 283/4250 [01:08<15:01, 4.40it/s]
284
  7%|▋ | 284/4250 [01:08<14:33, 4.54it/s]
285
  7%|▋ | 285/4250 [01:08<14:43, 4.49it/s]
286
  7%|▋ | 286/4250 [01:08<14:18, 4.62it/s]
287
  7%|▋ | 287/4250 [01:09<15:53, 4.16it/s]
288
  7%|▋ | 288/4250 [01:09<14:44, 4.48it/s]
289
  7%|▋ | 289/4250 [01:09<14:55, 4.42it/s]
290
  7%|▋ | 290/4250 [01:09<14:05, 4.69it/s]
291
  7%|▋ | 291/4250 [01:09<14:55, 4.42it/s]
292
  7%|▋ | 292/4250 [01:10<14:01, 4.70it/s]
293
  7%|▋ | 293/4250 [01:10<13:19, 4.95it/s]
294
  7%|▋ | 294/4250 [01:10<12:53, 5.11it/s]
295
  7%|▋ | 295/4250 [01:10<14:15, 4.62it/s]
296
  7%|▋ | 296/4250 [01:10<13:25, 4.91it/s]
297
  7%|▋ | 297/4250 [01:11<13:07, 5.02it/s]
298
  7%|▋ | 298/4250 [01:11<13:03, 5.04it/s]
299
  7%|▋ | 299/4250 [01:11<13:43, 4.80it/s]
300
  7%|▋ | 300/4250 [01:11<15:32, 4.23it/s]
301
  7%|▋ | 301/4250 [01:12<14:53, 4.42it/s]
302
  7%|▋ | 302/4250 [01:12<16:06, 4.09it/s]
303
  7%|▋ | 303/4250 [01:12<17:14, 3.82it/s]
304
  7%|▋ | 304/4250 [01:12<16:34, 3.97it/s]
305
  7%|▋ | 305/4250 [01:13<16:20, 4.03it/s]
306
  7%|▋ | 306/4250 [01:13<18:10, 3.62it/s]
307
  7%|▋ | 307/4250 [01:13<16:03, 4.09it/s]
308
  7%|▋ | 308/4250 [01:13<15:15, 4.31it/s]
309
  7%|▋ | 309/4250 [01:14<15:01, 4.37it/s]
310
  7%|▋ | 310/4250 [01:14<14:24, 4.56it/s]
311
  7%|▋ | 311/4250 [01:14<13:58, 4.70it/s]
312
  7%|▋ | 312/4250 [01:14<16:55, 3.88it/s]
313
  7%|▋ | 313/4250 [01:15<16:12, 4.05it/s]
314
  7%|▋ | 314/4250 [01:15<16:17, 4.03it/s]
315
  7%|▋ | 315/4250 [01:15<15:12, 4.31it/s]
316
  7%|▋ | 316/4250 [01:15<14:32, 4.51it/s]
317
  7%|▋ | 317/4250 [01:15<15:41, 4.18it/s]
318
  7%|▋ | 318/4250 [01:16<15:25, 4.25it/s]
319
  8%|▊ | 319/4250 [01:16<15:46, 4.15it/s]
320
  8%|▊ | 320/4250 [01:16<16:25, 3.99it/s]
321
  8%|▊ | 321/4250 [01:16<15:03, 4.35it/s]
322
  8%|▊ | 322/4250 [01:17<14:35, 4.49it/s]
323
  8%|▊ | 323/4250 [01:17<13:27, 4.86it/s]
324
  8%|▊ | 324/4250 [01:17<13:13, 4.95it/s]
325
  8%|▊ | 325/4250 [01:17<13:22, 4.89it/s]
326
  8%|▊ | 326/4250 [01:17<12:56, 5.05it/s]
327
  8%|▊ | 327/4250 [01:18<12:34, 5.20it/s]
328
  8%|▊ | 328/4250 [01:18<16:35, 3.94it/s]
329
  8%|▊ | 329/4250 [01:18<17:45, 3.68it/s]
330
  8%|▊ | 330/4250 [01:18<16:40, 3.92it/s]
331
  8%|▊ | 331/4250 [01:19<16:15, 4.02it/s]
332
  8%|▊ | 332/4250 [01:19<15:10, 4.31it/s]
333
  8%|▊ | 333/4250 [01:19<14:15, 4.58it/s]
334
  8%|▊ | 334/4250 [01:19<14:33, 4.48it/s]
335
  8%|▊ | 335/4250 [01:20<15:52, 4.11it/s]
336
  8%|▊ | 336/4250 [01:20<15:40, 4.16it/s]
337
  8%|▊ | 337/4250 [01:20<14:30, 4.50it/s]
338
  8%|▊ | 338/4250 [01:20<13:51, 4.70it/s]
339
  8%|▊ | 339/4250 [01:21<17:39, 3.69it/s]
340
  8%|▊ | 340/4250 [01:21<16:24, 3.97it/s]
341
  8%|▊ | 341/4250 [01:21<17:23, 3.75it/s]
342
  8%|▊ | 342/4250 [01:21<15:45, 4.13it/s]
343
  8%|▊ | 343/4250 [01:22<16:07, 4.04it/s]
344
  8%|▊ | 344/4250 [01:22<15:55, 4.09it/s]
345
  8%|▊ | 345/4250 [01:22<17:25, 3.74it/s]
346
  8%|▊ | 346/4250 [01:22<18:30, 3.52it/s]
347
  8%|▊ | 347/4250 [01:23<17:09, 3.79it/s]
348
  8%|▊ | 348/4250 [01:23<18:01, 3.61it/s]
349
  8%|▊ | 349/4250 [01:23<16:19, 3.98it/s]
350
  8%|▊ | 350/4250 [01:23<16:02, 4.05it/s]
351
  8%|▊ | 351/4250 [01:24<15:28, 4.20it/s]
352
  8%|▊ | 352/4250 [01:24<15:45, 4.12it/s]
353
  8%|▊ | 353/4250 [01:24<14:05, 4.61it/s]
354
  8%|▊ | 354/4250 [01:24<14:12, 4.57it/s]
355
  8%|▊ | 355/4250 [01:25<15:18, 4.24it/s]
356
  8%|▊ | 356/4250 [01:25<14:48, 4.38it/s]
357
  8%|▊ | 357/4250 [01:25<14:00, 4.63it/s]
358
  8%|▊ | 358/4250 [01:25<13:29, 4.81it/s]
359
  8%|▊ | 359/4250 [01:25<12:50, 5.05it/s]
360
  8%|▊ | 360/4250 [01:26<14:46, 4.39it/s]
361
  8%|▊ | 361/4250 [01:26<14:14, 4.55it/s]
362
  9%|▊ | 362/4250 [01:26<13:28, 4.81it/s]
363
  9%|▊ | 363/4250 [01:26<14:21, 4.51it/s]
364
  9%|▊ | 364/4250 [01:27<20:35, 3.15it/s]
365
  9%|▊ | 365/4250 [01:27<19:18, 3.35it/s]
366
  9%|▊ | 366/4250 [01:27<18:56, 3.42it/s]
367
  9%|▊ | 367/4250 [01:27<17:11, 3.77it/s]
368
  9%|▊ | 368/4250 [01:28<15:13, 4.25it/s]
369
  9%|▊ | 369/4250 [01:28<16:11, 4.00it/s]
370
  9%|▊ | 370/4250 [01:28<15:10, 4.26it/s]
371
  9%|▊ | 371/4250 [01:28<14:18, 4.52it/s]
372
  9%|▉ | 372/4250 [01:29<13:52, 4.66it/s]
373
  9%|▉ | 373/4250 [01:29<15:47, 4.09it/s]
374
  9%|▉ | 374/4250 [01:29<15:20, 4.21it/s]
375
  9%|▉ | 375/4250 [01:30<25:51, 2.50it/s]
376
  9%|▉ | 376/4250 [01:30<22:05, 2.92it/s]
377
  9%|▉ | 377/4250 [01:30<19:34, 3.30it/s]
378
  9%|▉ | 378/4250 [01:30<17:28, 3.69it/s]
379
  9%|▉ | 379/4250 [01:31<15:52, 4.06it/s]
380
  9%|▉ | 380/4250 [01:31<15:12, 4.24it/s]
381
  9%|▉ | 381/4250 [01:31<14:26, 4.46it/s]
382
  9%|▉ | 382/4250 [01:31<13:57, 4.62it/s]
383
  9%|▉ | 383/4250 [01:32<15:16, 4.22it/s]
384
  9%|▉ | 384/4250 [01:32<14:11, 4.54it/s]
385
  9%|▉ | 385/4250 [01:32<16:37, 3.87it/s]
386
  9%|▉ | 386/4250 [01:32<18:50, 3.42it/s]
387
  9%|▉ | 387/4250 [01:33<16:38, 3.87it/s]
388
  9%|▉ | 388/4250 [01:33<15:54, 4.05it/s]
389
  9%|▉ | 389/4250 [01:33<15:11, 4.24it/s]
390
  9%|▉ | 390/4250 [01:33<15:52, 4.05it/s]
391
  9%|▉ | 391/4250 [01:33<14:31, 4.43it/s]
392
  9%|▉ | 392/4250 [01:34<14:36, 4.40it/s]
393
  9%|▉ | 393/4250 [01:34<13:42, 4.69it/s]
394
  9%|▉ | 394/4250 [01:34<13:42, 4.69it/s]
395
  9%|▉ | 395/4250 [01:34<13:04, 4.91it/s]
396
  9%|▉ | 396/4250 [01:34<12:50, 5.00it/s]
397
  9%|▉ | 397/4250 [01:35<13:30, 4.75it/s]
398
  9%|▉ | 398/4250 [01:35<14:10, 4.53it/s]
399
  9%|▉ | 399/4250 [01:35<13:17, 4.83it/s]
400
  9%|▉ | 400/4250 [01:35<14:40, 4.37it/s]
401
  9%|▉ | 401/4250 [01:36<14:06, 4.54it/s]
402
  9%|▉ | 402/4250 [01:36<15:53, 4.04it/s]
403
  9%|▉ | 403/4250 [01:36<15:28, 4.14it/s]
404
  10%|▉ | 404/4250 [01:36<14:41, 4.36it/s]
405
  10%|▉ | 405/4250 [01:37<15:17, 4.19it/s]
406
  10%|▉ | 406/4250 [01:37<15:58, 4.01it/s]
407
  10%|▉ | 407/4250 [01:38<23:04, 2.78it/s]
408
  10%|▉ | 408/4250 [01:38<21:23, 2.99it/s]
409
  10%|▉ | 409/4250 [01:38<18:34, 3.45it/s]
410
  10%|▉ | 410/4250 [01:38<17:37, 3.63it/s]
411
  10%|▉ | 411/4250 [01:38<17:21, 3.68it/s]
412
  10%|▉ | 412/4250 [01:39<18:26, 3.47it/s]
413
  10%|▉ | 413/4250 [01:39<17:12, 3.72it/s]
414
  10%|▉ | 414/4250 [01:39<16:36, 3.85it/s]
415
  10%|▉ | 415/4250 [01:39<15:21, 4.16it/s]
416
  10%|▉ | 416/4250 [01:40<14:56, 4.28it/s]
417
  10%|▉ | 417/4250 [01:40<14:55, 4.28it/s]
418
  10%|▉ | 418/4250 [01:40<13:45, 4.64it/s]
419
  10%|▉ | 419/4250 [01:40<14:56, 4.27it/s]
420
  10%|▉ | 420/4250 [01:41<15:00, 4.25it/s]
421
  10%|▉ | 421/4250 [01:41<15:28, 4.12it/s]
422
  10%|▉ | 422/4250 [01:41<14:59, 4.26it/s]
423
  10%|▉ | 423/4250 [01:41<13:50, 4.61it/s]
424
  10%|▉ | 424/4250 [01:42<14:34, 4.37it/s]
425
  10%|█ | 425/4250 [01:42<14:28, 4.40it/s][INFO|trainer.py:805] 2024-08-30 20:27:31,190 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
 
 
 
 
 
 
426
  0%| | 0/852 [00:00<?, ?it/s]
 
427
  1%| | 9/852 [00:00<00:09, 88.66it/s]
 
428
  2%|▏ | 18/852 [00:00<00:10, 79.48it/s]
 
429
  3%|▎ | 27/852 [00:00<00:10, 78.72it/s]
 
430
  4%|▍ | 35/852 [00:00<00:10, 77.17it/s]
 
431
  5%|▌ | 43/852 [00:00<00:10, 77.47it/s]
 
432
  6%|▌ | 51/852 [00:00<00:10, 78.01it/s]
 
433
  7%|▋ | 60/852 [00:00<00:10, 78.82it/s]
 
434
  8%|▊ | 68/852 [00:00<00:10, 76.55it/s]
 
435
  9%|▉ | 76/852 [00:00<00:10, 76.57it/s]
 
436
  10%|▉ | 84/852 [00:01<00:10, 74.53it/s]
 
437
  11%|█ | 92/852 [00:01<00:10, 74.88it/s]
 
438
  12%|█▏ | 100/852 [00:01<00:10, 74.67it/s]
 
439
  13%|█▎ | 108/852 [00:01<00:09, 74.65it/s]
 
440
  14%|█▎ | 116/852 [00:01<00:09, 75.39it/s]
 
441
  15%|█▍ | 125/852 [00:01<00:09, 76.70it/s]
 
442
  16%|█▌ | 133/852 [00:01<00:09, 72.98it/s]
 
443
  17%|█▋ | 141/852 [00:01<00:09, 73.08it/s]
 
444
  17%|█▋ | 149/852 [00:01<00:09, 72.57it/s]
 
445
  18%|█▊ | 157/852 [00:02<00:09, 74.49it/s]
 
446
  19%|█▉ | 165/852 [00:02<00:09, 75.40it/s]
 
447
  20%|██ | 173/852 [00:02<00:08, 75.96it/s]
 
448
  21%|██ | 181/852 [00:02<00:08, 76.27it/s]
 
449
  22%|██▏ | 190/852 [00:02<00:08, 77.54it/s]
 
450
  23%|██▎ | 198/852 [00:02<00:08, 77.17it/s]
 
451
  24%|██▍ | 206/852 [00:02<00:08, 76.97it/s]
 
452
  25%|██▌ | 214/852 [00:02<00:08, 75.85it/s]
 
453
  26%|██▌ | 222/852 [00:02<00:08, 77.02it/s]
 
454
  27%|██▋ | 230/852 [00:03<00:07, 77.88it/s]
 
455
  28%|██▊ | 238/852 [00:03<00:07, 77.06it/s]
 
456
  29%|██▉ | 246/852 [00:03<00:08, 75.49it/s]
 
457
  30%|██▉ | 255/852 [00:03<00:07, 77.83it/s]
 
458
  31%|███ | 263/852 [00:03<00:07, 78.44it/s]
 
459
  32%|███▏ | 271/852 [00:03<00:07, 77.33it/s]
 
460
  33%|███▎ | 280/852 [00:03<00:07, 78.78it/s]
 
461
  34%|███▍ | 288/852 [00:03<00:07, 78.13it/s]
 
462
  35%|███▍ | 296/852 [00:03<00:07, 78.46it/s]
 
463
  36%|███▌ | 305/852 [00:03<00:06, 79.51it/s]
 
464
  37%|���██▋ | 313/852 [00:04<00:06, 77.59it/s]
 
465
  38%|███▊ | 322/852 [00:04<00:06, 79.45it/s]
 
466
  39%|███▊ | 330/852 [00:04<00:06, 78.56it/s]
 
467
  40%|███▉ | 338/852 [00:04<00:06, 75.62it/s]
 
468
  41%|████ | 346/852 [00:04<00:06, 76.01it/s]
 
469
  42%|████▏ | 354/852 [00:04<00:06, 75.26it/s]
 
470
  42%|████▏ | 362/852 [00:04<00:06, 75.94it/s]
 
471
  43%|████▎ | 370/852 [00:04<00:06, 76.55it/s]
 
472
  44%|████▍ | 378/852 [00:04<00:06, 77.17it/s]
 
473
  45%|████▌ | 386/852 [00:05<00:06, 76.59it/s]
 
474
  46%|████▋ | 395/852 [00:05<00:05, 77.79it/s]
 
475
  47%|████▋ | 403/852 [00:05<00:05, 77.58it/s]
 
476
  48%|████▊ | 411/852 [00:05<00:05, 75.31it/s]
 
477
  49%|████▉ | 419/852 [00:05<00:05, 76.29it/s]
 
478
  50%|█████ | 427/852 [00:05<00:05, 74.65it/s]
 
479
  51%|█████ | 435/852 [00:05<00:05, 75.66it/s]
 
480
  52%|█████▏ | 443/852 [00:05<00:05, 76.76it/s]
 
481
  53%|█████▎ | 451/852 [00:05<00:05, 77.61it/s]
 
482
  54%|█████▍ | 460/852 [00:05<00:05, 77.74it/s]
 
483
  55%|█████▍ | 468/852 [00:06<00:05, 75.24it/s]
 
484
  56%|█████▌ | 476/852 [00:06<00:05, 72.34it/s]
 
485
  57%|█████▋ | 484/852 [00:06<00:05, 72.40it/s]
 
486
  58%|█████▊ | 492/852 [00:06<00:04, 74.29it/s]
 
487
  59%|█████▉ | 501/852 [00:06<00:04, 76.52it/s]
 
488
  60%|█████▉ | 509/852 [00:06<00:04, 75.58it/s]
 
489
  61%|██████ | 517/852 [00:06<00:04, 76.67it/s]
 
490
  62%|██████▏ | 525/852 [00:06<00:04, 75.52it/s]
 
491
  63%|██████▎ | 533/852 [00:06<00:04, 75.91it/s]
 
492
  63%|██████▎ | 541/852 [00:07<00:04, 77.03it/s]
 
493
  64%|██████▍ | 549/852 [00:07<00:04, 75.19it/s]
 
494
  65%|██████▌ | 557/852 [00:07<00:03, 75.67it/s]
 
495
  66%|██████▋ | 566/852 [00:07<00:03, 77.52it/s]
 
496
  67%|██████▋ | 574/852 [00:07<00:03, 77.51it/s]
 
497
  68%|██████▊ | 582/852 [00:07<00:03, 76.66it/s]
 
498
  69%|██████▉ | 590/852 [00:07<00:03, 75.98it/s]
 
499
  70%|███████ | 598/852 [00:07<00:03, 76.35it/s]
 
500
  71%|███████ | 606/852 [00:07<00:03, 76.06it/s]
 
501
  72%|███████▏ | 614/852 [00:08<00:03, 75.05it/s]
 
502
  73%|███████▎ | 622/852 [00:08<00:03, 75.43it/s]
 
503
  74%|███████▍ | 630/852 [00:08<00:02, 74.34it/s]
 
504
  75%|███████▍ | 638/852 [00:08<00:02, 75.78it/s]
 
505
  76%|███████▌ | 646/852 [00:08<00:02, 73.52it/s]
 
506
  77%|███████▋ | 654/852 [00:08<00:02, 75.08it/s]
 
507
  78%|███████▊ | 662/852 [00:08<00:02, 75.52it/s]
 
508
  79%|███████▊ | 670/852 [00:08<00:02, 76.49it/s]
 
509
  80%|███████▉ | 678/852 [00:08<00:02, 77.21it/s]
 
510
  81%|████████ | 686/852 [00:08<00:02, 77.55it/s]
 
511
  82%|████████▏ | 695/852 [00:09<00:01, 78.56it/s]
 
512
  83%|████████▎ | 704/852 [00:09<00:01, 79.29it/s]
 
513
  84%|████████▎ | 713/852 [00:09<00:01, 79.99it/s]
 
514
  85%|████████▍ | 721/852 [00:09<00:01, 78.74it/s]
 
515
  86%|████████▌ | 730/852 [00:09<00:01, 79.95it/s]
 
516
  87%|████████▋ | 738/852 [00:09<00:01, 79.68it/s]
 
517
  88%|████████▊ | 747/852 [00:09<00:01, 80.02it/s]
 
518
  89%|████████▊ | 756/852 [00:09<00:01, 80.47it/s]
 
519
  90%|████████▉ | 765/852 [00:09<00:01, 80.21it/s]
 
520
  91%|█████████ | 774/852 [00:10<00:00, 79.37it/s]
 
521
  92%|█████████▏| 782/852 [00:10<00:00, 78.22it/s]
 
522
  93%|█████████▎| 790/852 [00:10<00:00, 77.38it/s]
 
523
  94%|█████████▎| 798/852 [00:10<00:00, 76.61it/s]
 
524
  95%|█████████▍| 807/852 [00:10<00:00, 78.06it/s]
 
525
  96%|█████████▌| 815/852 [00:10<00:00, 76.73it/s]
 
526
  97%|█████████▋| 824/852 [00:10<00:00, 77.89it/s]
 
527
  98%|█████████▊| 833/852 [00:10<00:00, 78.72it/s]
 
528
  99%|█████████▊| 841/852 [00:10<00:00, 77.89it/s]
 
529
 
 
530
 
531
  10%|█ | 425/4250 [01:57<14:28, 4.40it/s]
 
 
532
  [INFO|trainer.py:3478] 2024-08-30 20:27:45,881 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-425
 
 
 
 
 
 
 
533
  10%|█ | 426/4250 [02:01<6:17:25, 5.92s/it]
534
  10%|█ | 427/4250 [02:01<4:27:24, 4.20s/it]
535
  10%|█ | 428/4250 [02:01<3:11:11, 3.00s/it]
536
  10%|█ | 429/4250 [02:02<2:20:01, 2.20s/it]
537
  10%|█ | 430/4250 [02:02<1:41:14, 1.59s/it]
538
  10%|█ | 431/4250 [02:02<1:15:55, 1.19s/it]
539
  10%|█ | 432/4250 [02:02<56:49, 1.12it/s]
540
  10%|█ | 433/4250 [02:02<43:08, 1.47it/s]
541
  10%|█ | 434/4250 [02:03<35:31, 1.79it/s]
542
  10%|█ | 435/4250 [02:03<28:25, 2.24it/s]
543
  10%|█ | 436/4250 [02:03<24:21, 2.61it/s]
544
  10%|█ | 437/4250 [02:03<20:52, 3.04it/s]
545
  10%|█ | 438/4250 [02:04<18:32, 3.43it/s]
546
  10%|█ | 439/4250 [02:04<17:05, 3.72it/s]
547
  10%|█ | 440/4250 [02:04<15:35, 4.07it/s]
548
  10%|█ | 441/4250 [02:04<14:45, 4.30it/s]
549
  10%|█ | 442/4250 [02:04<14:42, 4.32it/s]
550
  10%|█ | 443/4250 [02:05<16:22, 3.88it/s]
551
  10%|█ | 444/4250 [02:05<15:35, 4.07it/s]
552
  10%|█ | 445/4250 [02:05<14:09, 4.48it/s]
553
  10%|█ | 446/4250 [02:05<15:10, 4.18it/s]
554
  11%|█ | 447/4250 [02:06<14:17, 4.43it/s]
555
  11%|█ | 448/4250 [02:06<13:13, 4.79it/s]
556
  11%|█ | 449/4250 [02:06<13:06, 4.83it/s]
557
  11%|█ | 450/4250 [02:06<15:05, 4.20it/s]
558
  11%|█ | 451/4250 [02:07<15:49, 4.00it/s]
559
  11%|█ | 452/4250 [02:07<14:54, 4.25it/s]
560
  11%|█ | 453/4250 [02:07<15:41, 4.03it/s]
561
  11%|█ | 454/4250 [02:07<15:39, 4.04it/s]
562
  11%|█ | 455/4250 [02:08<15:58, 3.96it/s]
563
  11%|█ | 456/4250 [02:08<14:30, 4.36it/s]
564
  11%|█ | 457/4250 [02:08<13:21, 4.73it/s]
565
  11%|█ | 458/4250 [02:08<14:27, 4.37it/s]
566
  11%|█ | 459/4250 [02:09<24:59, 2.53it/s]
567
  11%|█ | 460/4250 [02:09<25:16, 2.50it/s]
568
  11%|█ | 461/4250 [02:10<23:13, 2.72it/s]
569
  11%|█ | 462/4250 [02:10<20:18, 3.11it/s]
570
  11%|█ | 463/4250 [02:10<18:00, 3.51it/s]
571
  11%|█ | 464/4250 [02:10<16:52, 3.74it/s]
572
  11%|█ | 465/4250 [02:10<15:29, 4.07it/s]
573
  11%|█ | 466/4250 [02:11<14:52, 4.24it/s]
574
  11%|█ | 467/4250 [02:11<15:16, 4.13it/s]
575
  11%|█ | 468/4250 [02:11<15:22, 4.10it/s]
576
  11%|█ | 469/4250 [02:11<14:56, 4.22it/s]
577
  11%|█ | 470/4250 [02:12<14:16, 4.41it/s]
578
  11%|█ | 471/4250 [02:12<13:02, 4.83it/s]
579
  11%|█ | 472/4250 [02:12<14:34, 4.32it/s]
580
  11%|█ | 473/4250 [02:12<14:11, 4.43it/s]
581
  11%|█ | 474/4250 [02:13<15:34, 4.04it/s]
582
  11%|█ | 475/4250 [02:13<19:44, 3.19it/s]
583
  11%|█ | 476/4250 [02:13<19:06, 3.29it/s]
584
  11%|█ | 477/4250 [02:14<18:12, 3.45it/s]
585
  11%|█ | 478/4250 [02:14<18:34, 3.39it/s]
586
  11%|█▏ | 479/4250 [02:14<17:12, 3.65it/s]
587
  11%|█▏ | 480/4250 [02:14<15:17, 4.11it/s]
588
  11%|█▏ | 481/4250 [02:15<16:05, 3.90it/s]
589
  11%|█▏ | 482/4250 [02:15<17:51, 3.52it/s]
590
  11%|█▏ | 483/4250 [02:15<16:15, 3.86it/s]
591
  11%|█▏ | 484/4250 [02:15<14:39, 4.28it/s]
592
  11%|█▏ | 485/4250 [02:16<14:48, 4.24it/s]
593
  11%|█▏ | 486/4250 [02:16<15:40, 4.00it/s]
594
  11%|█▏ | 487/4250 [02:16<14:28, 4.33it/s]
595
  11%|█▏ | 488/4250 [02:16<14:12, 4.41it/s]
596
  12%|█▏ | 489/4250 [02:17<18:04, 3.47it/s]
597
  12%|█▏ | 490/4250 [02:17<16:37, 3.77it/s]
598
  12%|█▏ | 491/4250 [02:17<16:44, 3.74it/s]
599
  12%|█▏ | 492/4250 [02:17<15:59, 3.92it/s]
600
  12%|█▏ | 493/4250 [02:18<14:35, 4.29it/s]
601
  12%|█▏ | 494/4250 [02:18<15:03, 4.16it/s]
602
  12%|█▏ | 495/4250 [02:18<17:00, 3.68it/s]
603
  12%|█▏ | 496/4250 [02:18<16:06, 3.89it/s]
604
  12%|█▏ | 497/4250 [02:19<14:40, 4.26it/s]
 
1
+ 2024-08-30 20:25:26.549777: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2
+ 2024-08-30 20:25:26.568217: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
3
+ 2024-08-30 20:25:26.590253: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
4
+ 2024-08-30 20:25:26.597224: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
5
+ 2024-08-30 20:25:26.612962: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
6
+ To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
7
+ 2024-08-30 20:25:27.916342: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
8
+ /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1494: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
9
+ warnings.warn(
10
+ 08/30/2024 20:25:29 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
11
+ 08/30/2024 20:25:29 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
12
+ _n_gpu=1,
13
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
14
+ adafactor=False,
15
+ adam_beta1=0.9,
16
+ adam_beta2=0.999,
17
+ adam_epsilon=1e-08,
18
+ auto_find_batch_size=False,
19
+ batch_eval_metrics=False,
20
+ bf16=False,
21
+ bf16_full_eval=False,
22
+ data_seed=None,
23
+ dataloader_drop_last=False,
24
+ dataloader_num_workers=0,
25
+ dataloader_persistent_workers=False,
26
+ dataloader_pin_memory=True,
27
+ dataloader_prefetch_factor=None,
28
+ ddp_backend=None,
29
+ ddp_broadcast_buffers=None,
30
+ ddp_bucket_cap_mb=None,
31
+ ddp_find_unused_parameters=None,
32
+ ddp_timeout=1800,
33
+ debug=[],
34
+ deepspeed=None,
35
+ disable_tqdm=False,
36
+ dispatch_batches=None,
37
+ do_eval=True,
38
+ do_predict=True,
39
+ do_train=True,
40
+ eval_accumulation_steps=None,
41
+ eval_delay=0,
42
+ eval_do_concat_batches=True,
43
+ eval_on_start=False,
44
+ eval_steps=None,
45
+ eval_strategy=epoch,
46
+ evaluation_strategy=epoch,
47
+ fp16=False,
48
+ fp16_backend=auto,
49
+ fp16_full_eval=False,
50
+ fp16_opt_level=O1,
51
+ fsdp=[],
52
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
53
+ fsdp_min_num_params=0,
54
+ fsdp_transformer_layer_cls_to_wrap=None,
55
+ full_determinism=False,
56
+ gradient_accumulation_steps=2,
57
+ gradient_checkpointing=False,
58
+ gradient_checkpointing_kwargs=None,
59
+ greater_is_better=True,
60
+ group_by_length=False,
61
+ half_precision_backend=auto,
62
+ hub_always_push=False,
63
+ hub_model_id=None,
64
+ hub_private_repo=False,
65
+ hub_strategy=every_save,
66
+ hub_token=<HUB_TOKEN>,
67
+ ignore_data_skip=False,
68
+ include_inputs_for_metrics=False,
69
+ include_num_input_tokens_seen=False,
70
+ include_tokens_per_second=False,
71
+ jit_mode_eval=False,
72
+ label_names=None,
73
+ label_smoothing_factor=0.0,
74
+ learning_rate=5e-05,
75
+ length_column_name=length,
76
+ load_best_model_at_end=True,
77
+ local_rank=0,
78
+ log_level=passive,
79
+ log_level_replica=warning,
80
+ log_on_each_node=True,
81
+ logging_dir=/content/dissertation/scripts/ner/output/tb,
82
+ logging_first_step=False,
83
+ logging_nan_inf_filter=True,
84
+ logging_steps=500,
85
+ logging_strategy=steps,
86
+ lr_scheduler_kwargs={},
87
+ lr_scheduler_type=linear,
88
+ max_grad_norm=1.0,
89
+ max_steps=-1,
90
+ metric_for_best_model=f1,
91
+ mp_parameters=,
92
+ neftune_noise_alpha=None,
93
+ no_cuda=False,
94
+ num_train_epochs=10.0,
95
+ optim=adamw_torch,
96
+ optim_args=None,
97
+ optim_target_modules=None,
98
+ output_dir=/content/dissertation/scripts/ner/output,
99
+ overwrite_output_dir=True,
100
+ past_index=-1,
101
+ per_device_eval_batch_size=8,
102
+ per_device_train_batch_size=32,
103
+ prediction_loss_only=False,
104
+ push_to_hub=True,
105
+ push_to_hub_model_id=None,
106
+ push_to_hub_organization=None,
107
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
108
+ ray_scope=last,
109
+ remove_unused_columns=True,
110
+ report_to=['tensorboard'],
111
+ restore_callback_states_from_checkpoint=False,
112
+ resume_from_checkpoint=None,
113
+ run_name=/content/dissertation/scripts/ner/output,
114
+ save_on_each_node=False,
115
+ save_only_model=False,
116
+ save_safetensors=True,
117
+ save_steps=500,
118
+ save_strategy=epoch,
119
+ save_total_limit=None,
120
+ seed=42,
121
+ skip_memory_metrics=True,
122
+ split_batches=None,
123
+ tf32=None,
124
+ torch_compile=False,
125
+ torch_compile_backend=None,
126
+ torch_compile_mode=None,
127
+ torchdynamo=None,
128
+ tpu_metrics_debug=False,
129
+ tpu_num_cores=None,
130
+ use_cpu=False,
131
+ use_ipex=False,
132
+ use_legacy_prediction_loop=False,
133
+ use_mps_device=False,
134
+ warmup_ratio=0.0,
135
+ warmup_steps=0,
136
+ weight_decay=0.0,
137
+ )
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+ [INFO|configuration_utils.py:733] 2024-08-30 20:25:41,919 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
146
+ [INFO|configuration_utils.py:800] 2024-08-30 20:25:41,923 >> Model config RobertaConfig {
147
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
148
+ "architectures": [
149
+ "RobertaForMaskedLM"
150
+ ],
151
+ "attention_probs_dropout_prob": 0.1,
152
+ "bos_token_id": 0,
153
+ "classifier_dropout": null,
154
+ "eos_token_id": 2,
155
+ "finetuning_task": "ner",
156
+ "gradient_checkpointing": false,
157
+ "hidden_act": "gelu",
158
+ "hidden_dropout_prob": 0.1,
159
+ "hidden_size": 768,
160
+ "id2label": {
161
+ "0": "O",
162
+ "1": "B-ENFERMEDAD",
163
+ "2": "I-ENFERMEDAD"
164
+ },
165
+ "initializer_range": 0.02,
166
+ "intermediate_size": 3072,
167
+ "label2id": {
168
+ "B-ENFERMEDAD": 1,
169
+ "I-ENFERMEDAD": 2,
170
+ "O": 0
171
+ },
172
+ "layer_norm_eps": 1e-05,
173
+ "max_position_embeddings": 514,
174
+ "model_type": "roberta",
175
+ "num_attention_heads": 12,
176
+ "num_hidden_layers": 12,
177
+ "pad_token_id": 1,
178
+ "position_embedding_type": "absolute",
179
+ "transformers_version": "4.42.4",
180
+ "type_vocab_size": 1,
181
+ "use_cache": true,
182
+ "vocab_size": 50262
183
+ }
184
+
185
+ [INFO|configuration_utils.py:733] 2024-08-30 20:25:42,016 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
186
+ [INFO|configuration_utils.py:800] 2024-08-30 20:25:42,017 >> Model config RobertaConfig {
187
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
188
+ "architectures": [
189
+ "RobertaForMaskedLM"
190
+ ],
191
+ "attention_probs_dropout_prob": 0.1,
192
+ "bos_token_id": 0,
193
+ "classifier_dropout": null,
194
+ "eos_token_id": 2,
195
+ "gradient_checkpointing": false,
196
+ "hidden_act": "gelu",
197
+ "hidden_dropout_prob": 0.1,
198
+ "hidden_size": 768,
199
+ "initializer_range": 0.02,
200
+ "intermediate_size": 3072,
201
+ "layer_norm_eps": 1e-05,
202
+ "max_position_embeddings": 514,
203
+ "model_type": "roberta",
204
+ "num_attention_heads": 12,
205
+ "num_hidden_layers": 12,
206
+ "pad_token_id": 1,
207
+ "position_embedding_type": "absolute",
208
+ "transformers_version": "4.42.4",
209
+ "type_vocab_size": 1,
210
+ "use_cache": true,
211
+ "vocab_size": 50262
212
+ }
213
+
214
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 20:25:42,027 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
215
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 20:25:42,028 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
216
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 20:25:42,028 >> loading file tokenizer.json from cache at None
217
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 20:25:42,028 >> loading file added_tokens.json from cache at None
218
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 20:25:42,028 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
219
+ [INFO|tokenization_utils_base.py:2161] 2024-08-30 20:25:42,028 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
220
+ [INFO|configuration_utils.py:733] 2024-08-30 20:25:42,028 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
221
+ [INFO|configuration_utils.py:800] 2024-08-30 20:25:42,029 >> Model config RobertaConfig {
222
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
223
+ "architectures": [
224
+ "RobertaForMaskedLM"
225
+ ],
226
+ "attention_probs_dropout_prob": 0.1,
227
+ "bos_token_id": 0,
228
+ "classifier_dropout": null,
229
+ "eos_token_id": 2,
230
+ "gradient_checkpointing": false,
231
+ "hidden_act": "gelu",
232
+ "hidden_dropout_prob": 0.1,
233
+ "hidden_size": 768,
234
+ "initializer_range": 0.02,
235
+ "intermediate_size": 3072,
236
+ "layer_norm_eps": 1e-05,
237
+ "max_position_embeddings": 514,
238
+ "model_type": "roberta",
239
+ "num_attention_heads": 12,
240
+ "num_hidden_layers": 12,
241
+ "pad_token_id": 1,
242
+ "position_embedding_type": "absolute",
243
+ "transformers_version": "4.42.4",
244
+ "type_vocab_size": 1,
245
+ "use_cache": true,
246
+ "vocab_size": 50262
247
+ }
248
+
249
+ [INFO|configuration_utils.py:733] 2024-08-30 20:25:42,112 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
250
+ [INFO|configuration_utils.py:800] 2024-08-30 20:25:42,113 >> Model config RobertaConfig {
251
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
252
+ "architectures": [
253
+ "RobertaForMaskedLM"
254
+ ],
255
+ "attention_probs_dropout_prob": 0.1,
256
+ "bos_token_id": 0,
257
+ "classifier_dropout": null,
258
+ "eos_token_id": 2,
259
+ "gradient_checkpointing": false,
260
+ "hidden_act": "gelu",
261
+ "hidden_dropout_prob": 0.1,
262
+ "hidden_size": 768,
263
+ "initializer_range": 0.02,
264
+ "intermediate_size": 3072,
265
+ "layer_norm_eps": 1e-05,
266
+ "max_position_embeddings": 514,
267
+ "model_type": "roberta",
268
+ "num_attention_heads": 12,
269
+ "num_hidden_layers": 12,
270
+ "pad_token_id": 1,
271
+ "position_embedding_type": "absolute",
272
+ "transformers_version": "4.42.4",
273
+ "type_vocab_size": 1,
274
+ "use_cache": true,
275
+ "vocab_size": 50262
276
+ }
277
+
278
+ [INFO|modeling_utils.py:3556] 2024-08-30 20:25:42,300 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
279
+ [INFO|modeling_utils.py:4354] 2024-08-30 20:25:42,438 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
280
+ - This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
281
+ - This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
282
+ [WARNING|modeling_utils.py:4366] 2024-08-30 20:25:42,438 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
283
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
284
+
285
+
286
+
287
+ /content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
288
+ metric = load_metric("seqeval", trust_remote_code=True)
289
+ [INFO|trainer.py:805] 2024-08-30 20:25:48,288 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
290
+ [INFO|trainer.py:2128] 2024-08-30 20:25:48,850 >> ***** Running training *****
291
+ [INFO|trainer.py:2129] 2024-08-30 20:25:48,850 >> Num examples = 27,229
292
+ [INFO|trainer.py:2130] 2024-08-30 20:25:48,850 >> Num Epochs = 10
293
+ [INFO|trainer.py:2131] 2024-08-30 20:25:48,850 >> Instantaneous batch size per device = 32
294
+ [INFO|trainer.py:2134] 2024-08-30 20:25:48,850 >> Total train batch size (w. parallel, distributed & accumulation) = 64
295
+ [INFO|trainer.py:2135] 2024-08-30 20:25:48,851 >> Gradient Accumulation steps = 2
296
+ [INFO|trainer.py:2136] 2024-08-30 20:25:48,851 >> Total optimization steps = 4,250
297
+ [INFO|trainer.py:2137] 2024-08-30 20:25:48,851 >> Number of trainable parameters = 124,055,043
298
+
299
  0%| | 0/4250 [00:00<?, ?it/s]
300
  0%| | 1/4250 [00:01<1:18:15, 1.11s/it]
301
  0%| | 2/4250 [00:01<40:12, 1.76it/s]
302
  0%| | 3/4250 [00:01<28:24, 2.49it/s]
303
  0%| | 4/4250 [00:01<22:38, 3.13it/s]
304
  0%| | 5/4250 [00:01<20:43, 3.41it/s]
305
  0%| | 6/4250 [00:02<18:37, 3.80it/s]
306
  0%| | 7/4250 [00:02<17:55, 3.94it/s]
307
  0%| | 8/4250 [00:02<16:59, 4.16it/s]
308
  0%| | 9/4250 [00:02<17:55, 3.94it/s]
309
  0%| | 10/4250 [00:03<17:19, 4.08it/s]
310
  0%| | 11/4250 [00:03<16:45, 4.21it/s]
311
  0%| | 12/4250 [00:03<15:21, 4.60it/s]
312
  0%| | 13/4250 [00:03<17:23, 4.06it/s]
313
  0%| | 14/4250 [00:04<16:58, 4.16it/s]
314
  0%| | 15/4250 [00:04<16:38, 4.24it/s]
315
  0%| | 16/4250 [00:04<15:46, 4.47it/s]
316
  0%| | 17/4250 [00:04<14:44, 4.78it/s]
317
  0%| | 18/4250 [00:04<14:27, 4.88it/s]
318
  0%| | 19/4250 [00:05<15:16, 4.62it/s]
319
  0%| | 20/4250 [00:05<14:22, 4.90it/s]
320
  0%| | 21/4250 [00:05<16:37, 4.24it/s]
321
  1%| | 22/4250 [00:05<15:57, 4.42it/s]
322
  1%| | 23/4250 [00:05<15:28, 4.55it/s]
323
  1%| | 24/4250 [00:06<15:18, 4.60it/s]
324
  1%| | 25/4250 [00:06<16:46, 4.20it/s]
325
  1%| | 26/4250 [00:06<15:14, 4.62it/s]
326
  1%| | 27/4250 [00:06<15:15, 4.61it/s]
327
  1%| | 28/4250 [00:07<14:02, 5.01it/s]
328
  1%| | 29/4250 [00:07<13:43, 5.12it/s]
329
  1%| | 30/4250 [00:07<16:48, 4.18it/s]
330
  1%| | 31/4250 [00:07<15:45, 4.46it/s]
331
  1%| | 32/4250 [00:07<16:53, 4.16it/s]
332
  1%| | 33/4250 [00:08<16:20, 4.30it/s]
333
  1%| | 34/4250 [00:08<16:34, 4.24it/s]
334
  1%| | 35/4250 [00:08<15:53, 4.42it/s]
335
  1%| | 36/4250 [00:08<16:15, 4.32it/s]
336
  1%| | 37/4250 [00:09<15:17, 4.59it/s]
337
  1%| | 38/4250 [00:09<15:07, 4.64it/s]
338
  1%| | 39/4250 [00:09<16:28, 4.26it/s]
339
  1%| | 40/4250 [00:09<15:19, 4.58it/s]
340
  1%| | 41/4250 [00:09<14:59, 4.68it/s]
341
  1%| | 42/4250 [00:10<15:06, 4.64it/s]
342
  1%| | 43/4250 [00:10<17:34, 3.99it/s]
343
  1%| | 44/4250 [00:11<27:51, 2.52it/s]
344
  1%| | 45/4250 [00:11<23:41, 2.96it/s]
345
  1%| | 46/4250 [00:11<22:36, 3.10it/s]
346
  1%| | 47/4250 [00:11<19:58, 3.51it/s]
347
  1%| | 48/4250 [00:12<18:45, 3.73it/s]
348
  1%| | 49/4250 [00:12<16:12, 4.32it/s]
349
  1%| | 50/4250 [00:12<16:45, 4.18it/s]
350
  1%| | 51/4250 [00:12<16:59, 4.12it/s]
351
  1%| | 52/4250 [00:13<16:34, 4.22it/s]
352
  1%| | 53/4250 [00:13<17:18, 4.04it/s]
353
  1%|▏ | 54/4250 [00:13<20:58, 3.33it/s]
354
  1%|▏ | 55/4250 [00:13<19:06, 3.66it/s]
355
  1%|▏ | 56/4250 [00:14<18:27, 3.79it/s]
356
  1%|▏ | 57/4250 [00:14<17:06, 4.08it/s]
357
  1%|▏ | 58/4250 [00:14<15:39, 4.46it/s]
358
  1%|▏ | 59/4250 [00:14<15:05, 4.63it/s]
359
  1%|▏ | 60/4250 [00:14<14:49, 4.71it/s]
360
  1%|▏ | 61/4250 [00:15<15:52, 4.40it/s]
361
  1%|▏ | 62/4250 [00:15<15:58, 4.37it/s]
362
  1%|▏ | 63/4250 [00:15<15:02, 4.64it/s]
363
  2%|▏ | 64/4250 [00:15<14:48, 4.71it/s]
364
  2%|▏ | 65/4250 [00:16<15:34, 4.48it/s]
365
  2%|▏ | 66/4250 [00:16<16:39, 4.19it/s]
366
  2%|▏ | 67/4250 [00:16<16:33, 4.21it/s]
367
  2%|▏ | 68/4250 [00:17<20:48, 3.35it/s]
368
  2%|▏ | 69/4250 [00:17<18:35, 3.75it/s]
369
  2%|▏ | 70/4250 [00:17<19:57, 3.49it/s]
370
  2%|▏ | 71/4250 [00:17<18:59, 3.67it/s]
371
  2%|▏ | 72/4250 [00:18<17:43, 3.93it/s]
372
  2%|▏ | 73/4250 [00:18<17:16, 4.03it/s]
373
  2%|▏ | 74/4250 [00:18<17:31, 3.97it/s]
374
  2%|▏ | 75/4250 [00:18<15:35, 4.46it/s]
375
  2%|▏ | 76/4250 [00:18<16:09, 4.30it/s]
376
  2%|▏ | 77/4250 [00:19<15:16, 4.55it/s]
377
  2%|▏ | 78/4250 [00:19<14:19, 4.85it/s]
378
  2%|▏ | 79/4250 [00:19<14:56, 4.65it/s]
379
  2%|▏ | 80/4250 [00:19<15:15, 4.56it/s]
380
  2%|▏ | 81/4250 [00:19<15:31, 4.47it/s]
381
  2%|▏ | 82/4250 [00:20<14:46, 4.70it/s]
382
  2%|▏ | 83/4250 [00:20<16:51, 4.12it/s]
383
  2%|▏ | 84/4250 [00:20<15:25, 4.50it/s]
384
  2%|▏ | 85/4250 [00:20<15:09, 4.58it/s]
385
  2%|▏ | 86/4250 [00:21<15:01, 4.62it/s]
386
  2%|▏ | 87/4250 [00:21<15:03, 4.61it/s]
387
  2%|▏ | 88/4250 [00:21<13:40, 5.07it/s]
388
  2%|▏ | 89/4250 [00:21<13:52, 5.00it/s]
389
  2%|▏ | 90/4250 [00:21<16:14, 4.27it/s]
390
  2%|▏ | 91/4250 [00:22<14:35, 4.75it/s]
391
  2%|▏ | 92/4250 [00:22<14:04, 4.93it/s]
392
  2%|▏ | 93/4250 [00:22<13:20, 5.19it/s]
393
  2%|▏ | 94/4250 [00:22<13:59, 4.95it/s]
394
  2%|▏ | 95/4250 [00:22<15:09, 4.57it/s]
395
  2%|▏ | 96/4250 [00:23<16:58, 4.08it/s]
396
  2%|▏ | 97/4250 [00:23<15:55, 4.34it/s]
397
  2%|▏ | 98/4250 [00:23<14:47, 4.68it/s]
398
  2%|▏ | 99/4250 [00:23<14:10, 4.88it/s]
399
  2%|▏ | 100/4250 [00:24<14:30, 4.77it/s]
400
  2%|▏ | 101/4250 [00:24<18:02, 3.83it/s]
401
  2%|▏ | 102/4250 [00:24<20:27, 3.38it/s]
402
  2%|▏ | 103/4250 [00:25<18:31, 3.73it/s]
403
  2%|▏ | 104/4250 [00:25<17:01, 4.06it/s]
404
  2%|▏ | 105/4250 [00:25<16:41, 4.14it/s]
405
  2%|▏ | 106/4250 [00:25<16:12, 4.26it/s]
406
  3%|▎ | 107/4250 [00:25<15:58, 4.32it/s]
407
  3%|▎ | 108/4250 [00:26<19:11, 3.60it/s]
408
  3%|▎ | 109/4250 [00:26<18:38, 3.70it/s]
409
  3%|▎ | 110/4250 [00:26<17:15, 4.00it/s]
410
  3%|▎ | 111/4250 [00:26<15:36, 4.42it/s]
411
  3%|▎ | 112/4250 [00:27<16:11, 4.26it/s]
412
  3%|▎ | 113/4250 [00:27<15:49, 4.36it/s]
413
  3%|▎ | 114/4250 [00:27<14:38, 4.71it/s]
414
  3%|▎ | 115/4250 [00:27<14:08, 4.87it/s]
415
  3%|▎ | 116/4250 [00:28<15:38, 4.40it/s]
416
  3%|▎ | 117/4250 [00:28<15:11, 4.54it/s]
417
  3%|▎ | 118/4250 [00:28<14:46, 4.66it/s]
418
  3%|▎ | 119/4250 [00:28<15:08, 4.55it/s]
419
  3%|▎ | 120/4250 [00:28<14:14, 4.83it/s]
420
  3%|▎ | 121/4250 [00:29<14:19, 4.80it/s]
421
  3%|▎ | 122/4250 [00:29<20:15, 3.40it/s]
422
  3%|▎ | 123/4250 [00:29<18:33, 3.71it/s]
423
  3%|▎ | 124/4250 [00:29<16:55, 4.06it/s]
424
  3%|▎ | 125/4250 [00:30<17:07, 4.01it/s]
425
  3%|▎ | 126/4250 [00:30<15:33, 4.42it/s]
426
  3%|▎ | 127/4250 [00:30<14:50, 4.63it/s]
427
  3%|▎ | 128/4250 [00:30<14:17, 4.81it/s]
428
  3%|▎ | 129/4250 [00:31<17:35, 3.90it/s]
429
  3%|▎ | 130/4250 [00:31<17:02, 4.03it/s]
430
  3%|▎ | 131/4250 [00:31<15:32, 4.42it/s]
431
  3%|▎ | 132/4250 [00:31<14:40, 4.68it/s]
432
  3%|▎ | 133/4250 [00:31<15:39, 4.38it/s]
433
  3%|▎ | 134/4250 [00:32<15:20, 4.47it/s]
434
  3%|▎ | 135/4250 [00:32<15:08, 4.53it/s]
435
  3%|▎ | 136/4250 [00:32<15:13, 4.50it/s]
436
  3%|▎ | 137/4250 [00:32<15:00, 4.57it/s]
437
  3%|▎ | 138/4250 [00:33<14:29, 4.73it/s]
438
  3%|▎ | 139/4250 [00:33<14:58, 4.58it/s]
439
  3%|▎ | 140/4250 [00:33<15:55, 4.30it/s]
440
  3%|▎ | 141/4250 [00:33<15:59, 4.28it/s]
441
  3%|▎ | 142/4250 [00:33<15:00, 4.56it/s]
442
  3%|▎ | 143/4250 [00:34<14:38, 4.67it/s]
443
  3%|▎ | 144/4250 [00:34<14:27, 4.73it/s]
444
  3%|▎ | 145/4250 [00:34<16:59, 4.03it/s]
445
  3%|▎ | 146/4250 [00:35<22:15, 3.07it/s]
446
  3%|▎ | 147/4250 [00:35<19:50, 3.45it/s]
447
  3%|▎ | 148/4250 [00:35<19:43, 3.47it/s]
448
  4%|▎ | 149/4250 [00:35<18:58, 3.60it/s]
449
  4%|▎ | 150/4250 [00:36<17:38, 3.87it/s]
450
  4%|▎ | 151/4250 [00:36<16:10, 4.23it/s]
451
  4%|▎ | 152/4250 [00:36<17:03, 4.00it/s]
452
  4%|▎ | 153/4250 [00:36<16:24, 4.16it/s]
453
  4%|▎ | 154/4250 [00:37<15:22, 4.44it/s]
454
  4%|▎ | 155/4250 [00:37<14:32, 4.70it/s]
455
  4%|▎ | 156/4250 [00:37<14:50, 4.60it/s]
456
  4%|▎ | 157/4250 [00:37<14:30, 4.70it/s]
457
  4%|▎ | 158/4250 [00:37<17:37, 3.87it/s]
458
  4%|▎ | 159/4250 [00:38<18:35, 3.67it/s]
459
  4%|▍ | 160/4250 [00:38<17:00, 4.01it/s]
460
  4%|▍ | 161/4250 [00:38<16:02, 4.25it/s]
461
  4%|▍ | 162/4250 [00:38<14:55, 4.56it/s]
462
  4%|▍ | 163/4250 [00:39<14:47, 4.60it/s]
463
  4%|▍ | 164/4250 [00:39<18:40, 3.65it/s]
464
  4%|▍ | 165/4250 [00:39<18:13, 3.74it/s]
465
  4%|▍ | 166/4250 [00:40<18:24, 3.70it/s]
466
  4%|▍ | 167/4250 [00:40<17:29, 3.89it/s]
467
  4%|▍ | 168/4250 [00:40<15:56, 4.27it/s]
468
  4%|▍ | 169/4250 [00:40<15:32, 4.38it/s]
469
  4%|▍ | 170/4250 [00:40<15:28, 4.39it/s]
470
  4%|▍ | 171/4250 [00:41<16:01, 4.24it/s]
471
  4%|▍ | 172/4250 [00:41<15:13, 4.46it/s]
472
  4%|▍ | 173/4250 [00:41<18:22, 3.70it/s]
473
  4%|▍ | 174/4250 [00:41<16:38, 4.08it/s]
474
  4%|▍ | 175/4250 [00:42<19:24, 3.50it/s]
475
  4%|▍ | 176/4250 [00:42<19:20, 3.51it/s]
476
  4%|▍ | 177/4250 [00:42<17:04, 3.98it/s]
477
  4%|▍ | 178/4250 [00:42<16:07, 4.21it/s]
478
  4%|▍ | 179/4250 [00:43<15:41, 4.32it/s]
479
  4%|▍ | 180/4250 [00:43<14:28, 4.69it/s]
480
  4%|▍ | 181/4250 [00:43<14:38, 4.63it/s]
481
  4%|▍ | 182/4250 [00:43<17:11, 3.94it/s]
482
  4%|▍ | 183/4250 [00:44<16:36, 4.08it/s]
483
  4%|▍ | 184/4250 [00:44<16:28, 4.11it/s]
484
  4%|▍ | 185/4250 [00:44<15:07, 4.48it/s]
485
  4%|▍ | 186/4250 [00:44<14:39, 4.62it/s]
486
  4%|▍ | 187/4250 [00:44<13:47, 4.91it/s]
487
  4%|▍ | 188/4250 [00:45<14:41, 4.61it/s]
488
  4%|��� | 189/4250 [00:45<14:11, 4.77it/s]
489
  4%|▍ | 190/4250 [00:45<13:45, 4.92it/s]
490
  4%|▍ | 191/4250 [00:45<14:23, 4.70it/s]
491
  5%|▍ | 192/4250 [00:45<14:11, 4.76it/s]
492
  5%|▍ | 193/4250 [00:46<13:55, 4.86it/s]
493
  5%|▍ | 194/4250 [00:46<16:42, 4.05it/s]
494
  5%|▍ | 195/4250 [00:46<15:38, 4.32it/s]
495
  5%|▍ | 196/4250 [00:46<14:26, 4.68it/s]
496
  5%|▍ | 197/4250 [00:47<14:55, 4.53it/s]
497
  5%|▍ | 198/4250 [00:47<15:19, 4.41it/s]
498
  5%|▍ | 199/4250 [00:47<15:25, 4.38it/s]
499
  5%|▍ | 200/4250 [00:47<15:35, 4.33it/s]
500
  5%|▍ | 201/4250 [00:48<15:14, 4.43it/s]
501
  5%|▍ | 202/4250 [00:48<15:31, 4.35it/s]
502
  5%|▍ | 203/4250 [00:48<14:26, 4.67it/s]
503
  5%|▍ | 204/4250 [00:48<13:47, 4.89it/s]
504
  5%|▍ | 205/4250 [00:48<14:44, 4.57it/s]
505
  5%|▍ | 206/4250 [00:49<14:29, 4.65it/s]
506
  5%|▍ | 207/4250 [00:49<16:45, 4.02it/s]
507
  5%|▍ | 208/4250 [00:49<15:19, 4.40it/s]
508
  5%|▍ | 209/4250 [00:49<15:12, 4.43it/s]
509
  5%|▍ | 210/4250 [00:49<14:01, 4.80it/s]
510
  5%|▍ | 211/4250 [00:50<15:09, 4.44it/s]
511
  5%|▍ | 212/4250 [00:50<14:26, 4.66it/s]
512
  5%|▌ | 213/4250 [00:50<17:31, 3.84it/s]
513
  5%|▌ | 214/4250 [00:50<15:24, 4.37it/s]
514
  5%|▌ | 215/4250 [00:51<15:17, 4.40it/s]
515
  5%|▌ | 216/4250 [00:51<19:04, 3.52it/s]
516
  5%|▌ | 217/4250 [00:51<17:03, 3.94it/s]
517
  5%|▌ | 218/4250 [00:51<15:56, 4.21it/s]
518
  5%|▌ | 219/4250 [00:52<16:35, 4.05it/s]
519
  5%|▌ | 220/4250 [00:52<16:40, 4.03it/s]
520
  5%|▌ | 221/4250 [00:52<18:41, 3.59it/s]
521
  5%|▌ | 222/4250 [00:53<20:01, 3.35it/s]
522
  5%|▌ | 223/4250 [00:53<18:26, 3.64it/s]
523
  5%|▌ | 224/4250 [00:53<16:31, 4.06it/s]
524
  5%|▌ | 225/4250 [00:53<15:07, 4.43it/s]
525
  5%|▌ | 226/4250 [00:53<14:23, 4.66it/s]
526
  5%|▌ | 227/4250 [00:54<14:01, 4.78it/s]
527
  5%|▌ | 228/4250 [00:54<13:47, 4.86it/s]
528
  5%|▌ | 229/4250 [00:54<12:58, 5.17it/s]
529
  5%|▌ | 230/4250 [00:54<14:21, 4.67it/s]
530
  5%|▌ | 231/4250 [00:54<13:45, 4.87it/s]
531
  5%|▌ | 232/4250 [00:55<13:35, 4.93it/s]
532
  5%|▌ | 233/4250 [00:55<13:23, 5.00it/s]
533
  6%|▌ | 234/4250 [00:55<15:41, 4.26it/s]
534
  6%|▌ | 235/4250 [00:56<20:37, 3.24it/s]
535
  6%|▌ | 236/4250 [00:56<21:51, 3.06it/s]
536
  6%|▌ | 237/4250 [00:56<19:42, 3.39it/s]
537
  6%|▌ | 238/4250 [00:56<17:25, 3.84it/s]
538
  6%|▌ | 239/4250 [00:57<17:28, 3.83it/s]
539
  6%|▌ | 240/4250 [00:57<19:56, 3.35it/s]
540
  6%|▌ | 241/4250 [00:57<17:57, 3.72it/s]
541
  6%|▌ | 242/4250 [00:57<16:26, 4.06it/s]
542
  6%|▌ | 243/4250 [00:58<15:57, 4.19it/s]
543
  6%|▌ | 244/4250 [00:58<15:58, 4.18it/s]
544
  6%|▌ | 245/4250 [00:58<15:56, 4.19it/s]
545
  6%|▌ | 246/4250 [00:59<24:58, 2.67it/s]
546
  6%|▌ | 247/4250 [00:59<22:20, 2.99it/s]
547
  6%|▌ | 248/4250 [00:59<19:38, 3.40it/s]
548
  6%|▌ | 249/4250 [01:00<23:51, 2.80it/s]
549
  6%|▌ | 250/4250 [01:00<25:17, 2.64it/s]
550
  6%|▌ | 251/4250 [01:01<22:48, 2.92it/s]
551
  6%|▌ | 252/4250 [01:01<19:31, 3.41it/s]
552
  6%|▌ | 253/4250 [01:01<17:13, 3.87it/s]
553
  6%|▌ | 254/4250 [01:01<17:53, 3.72it/s]
554
  6%|▌ | 255/4250 [01:01<18:10, 3.66it/s]
555
  6%|▌ | 256/4250 [01:02<17:55, 3.71it/s]
556
  6%|▌ | 257/4250 [01:02<16:43, 3.98it/s]
557
  6%|▌ | 258/4250 [01:02<15:35, 4.27it/s]
558
  6%|▌ | 259/4250 [01:02<14:28, 4.59it/s]
559
  6%|▌ | 260/4250 [01:03<15:44, 4.22it/s]
560
  6%|▌ | 261/4250 [01:03<15:00, 4.43it/s]
561
  6%|▌ | 262/4250 [01:03<13:52, 4.79it/s]
562
  6%|▌ | 263/4250 [01:03<12:53, 5.15it/s]
563
  6%|▌ | 264/4250 [01:03<14:53, 4.46it/s]
564
  6%|▌ | 265/4250 [01:04<15:50, 4.19it/s]
565
  6%|▋ | 266/4250 [01:04<15:46, 4.21it/s]
566
  6%|▋ | 267/4250 [01:04<15:43, 4.22it/s]
567
  6%|▋ | 268/4250 [01:04<14:47, 4.49it/s]
568
  6%|▋ | 269/4250 [01:05<14:19, 4.63it/s]
569
  6%|▋ | 270/4250 [01:05<16:43, 3.97it/s]
570
  6%|▋ | 271/4250 [01:05<15:07, 4.38it/s]
571
  6%|▋ | 272/4250 [01:05<14:35, 4.54it/s]
572
  6%|▋ | 273/4250 [01:05<14:08, 4.69it/s]
573
  6%|▋ | 274/4250 [01:06<13:43, 4.83it/s]
574
  6%|▋ | 275/4250 [01:06<13:31, 4.90it/s]
575
  6%|▋ | 276/4250 [01:06<12:58, 5.11it/s]
576
  7%|▋ | 277/4250 [01:06<13:41, 4.84it/s]
577
  7%|▋ | 278/4250 [01:06<13:31, 4.89it/s]
578
  7%|▋ | 279/4250 [01:07<14:09, 4.67it/s]
579
  7%|▋ | 280/4250 [01:07<17:33, 3.77it/s]
580
  7%|▋ | 281/4250 [01:07<16:06, 4.10it/s]
581
  7%|▋ | 282/4250 [01:07<15:58, 4.14it/s]
582
  7%|▋ | 283/4250 [01:08<15:01, 4.40it/s]
583
  7%|▋ | 284/4250 [01:08<14:33, 4.54it/s]
584
  7%|▋ | 285/4250 [01:08<14:43, 4.49it/s]
585
  7%|▋ | 286/4250 [01:08<14:18, 4.62it/s]
586
  7%|▋ | 287/4250 [01:09<15:53, 4.16it/s]
587
  7%|▋ | 288/4250 [01:09<14:44, 4.48it/s]
588
  7%|▋ | 289/4250 [01:09<14:55, 4.42it/s]
589
  7%|▋ | 290/4250 [01:09<14:05, 4.69it/s]
590
  7%|▋ | 291/4250 [01:09<14:55, 4.42it/s]
591
  7%|▋ | 292/4250 [01:10<14:01, 4.70it/s]
592
  7%|▋ | 293/4250 [01:10<13:19, 4.95it/s]
593
  7%|▋ | 294/4250 [01:10<12:53, 5.11it/s]
594
  7%|▋ | 295/4250 [01:10<14:15, 4.62it/s]
595
  7%|▋ | 296/4250 [01:10<13:25, 4.91it/s]
596
  7%|▋ | 297/4250 [01:11<13:07, 5.02it/s]
597
  7%|▋ | 298/4250 [01:11<13:03, 5.04it/s]
598
  7%|▋ | 299/4250 [01:11<13:43, 4.80it/s]
599
  7%|▋ | 300/4250 [01:11<15:32, 4.23it/s]
600
  7%|▋ | 301/4250 [01:12<14:53, 4.42it/s]
601
  7%|▋ | 302/4250 [01:12<16:06, 4.09it/s]
602
  7%|▋ | 303/4250 [01:12<17:14, 3.82it/s]
603
  7%|▋ | 304/4250 [01:12<16:34, 3.97it/s]
604
  7%|▋ | 305/4250 [01:13<16:20, 4.03it/s]
605
  7%|▋ | 306/4250 [01:13<18:10, 3.62it/s]
606
  7%|▋ | 307/4250 [01:13<16:03, 4.09it/s]
607
  7%|▋ | 308/4250 [01:13<15:15, 4.31it/s]
608
  7%|▋ | 309/4250 [01:14<15:01, 4.37it/s]
609
  7%|▋ | 310/4250 [01:14<14:24, 4.56it/s]
610
  7%|▋ | 311/4250 [01:14<13:58, 4.70it/s]
611
  7%|▋ | 312/4250 [01:14<16:55, 3.88it/s]
612
  7%|▋ | 313/4250 [01:15<16:12, 4.05it/s]
613
  7%|▋ | 314/4250 [01:15<16:17, 4.03it/s]
614
  7%|▋ | 315/4250 [01:15<15:12, 4.31it/s]
615
  7%|▋ | 316/4250 [01:15<14:32, 4.51it/s]
616
  7%|▋ | 317/4250 [01:15<15:41, 4.18it/s]
617
  7%|▋ | 318/4250 [01:16<15:25, 4.25it/s]
618
  8%|▊ | 319/4250 [01:16<15:46, 4.15it/s]
619
  8%|▊ | 320/4250 [01:16<16:25, 3.99it/s]
620
  8%|▊ | 321/4250 [01:16<15:03, 4.35it/s]
621
  8%|▊ | 322/4250 [01:17<14:35, 4.49it/s]
622
  8%|▊ | 323/4250 [01:17<13:27, 4.86it/s]
623
  8%|▊ | 324/4250 [01:17<13:13, 4.95it/s]
624
  8%|▊ | 325/4250 [01:17<13:22, 4.89it/s]
625
  8%|▊ | 326/4250 [01:17<12:56, 5.05it/s]
626
  8%|▊ | 327/4250 [01:18<12:34, 5.20it/s]
627
  8%|▊ | 328/4250 [01:18<16:35, 3.94it/s]
628
  8%|▊ | 329/4250 [01:18<17:45, 3.68it/s]
629
  8%|▊ | 330/4250 [01:18<16:40, 3.92it/s]
630
  8%|▊ | 331/4250 [01:19<16:15, 4.02it/s]
631
  8%|▊ | 332/4250 [01:19<15:10, 4.31it/s]
632
  8%|▊ | 333/4250 [01:19<14:15, 4.58it/s]
633
  8%|▊ | 334/4250 [01:19<14:33, 4.48it/s]
634
  8%|▊ | 335/4250 [01:20<15:52, 4.11it/s]
635
  8%|▊ | 336/4250 [01:20<15:40, 4.16it/s]
636
  8%|▊ | 337/4250 [01:20<14:30, 4.50it/s]
637
  8%|▊ | 338/4250 [01:20<13:51, 4.70it/s]
638
  8%|▊ | 339/4250 [01:21<17:39, 3.69it/s]
639
  8%|▊ | 340/4250 [01:21<16:24, 3.97it/s]
640
  8%|▊ | 341/4250 [01:21<17:23, 3.75it/s]
641
  8%|▊ | 342/4250 [01:21<15:45, 4.13it/s]
642
  8%|▊ | 343/4250 [01:22<16:07, 4.04it/s]
643
  8%|▊ | 344/4250 [01:22<15:55, 4.09it/s]
644
  8%|▊ | 345/4250 [01:22<17:25, 3.74it/s]
645
  8%|▊ | 346/4250 [01:22<18:30, 3.52it/s]
646
  8%|▊ | 347/4250 [01:23<17:09, 3.79it/s]
647
  8%|▊ | 348/4250 [01:23<18:01, 3.61it/s]
648
  8%|▊ | 349/4250 [01:23<16:19, 3.98it/s]
649
  8%|▊ | 350/4250 [01:23<16:02, 4.05it/s]
650
  8%|▊ | 351/4250 [01:24<15:28, 4.20it/s]
651
  8%|▊ | 352/4250 [01:24<15:45, 4.12it/s]
652
  8%|▊ | 353/4250 [01:24<14:05, 4.61it/s]
653
  8%|▊ | 354/4250 [01:24<14:12, 4.57it/s]
654
  8%|▊ | 355/4250 [01:25<15:18, 4.24it/s]
655
  8%|▊ | 356/4250 [01:25<14:48, 4.38it/s]
656
  8%|▊ | 357/4250 [01:25<14:00, 4.63it/s]
657
  8%|▊ | 358/4250 [01:25<13:29, 4.81it/s]
658
  8%|▊ | 359/4250 [01:25<12:50, 5.05it/s]
659
  8%|▊ | 360/4250 [01:26<14:46, 4.39it/s]
660
  8%|▊ | 361/4250 [01:26<14:14, 4.55it/s]
661
  9%|▊ | 362/4250 [01:26<13:28, 4.81it/s]
662
  9%|▊ | 363/4250 [01:26<14:21, 4.51it/s]
663
  9%|▊ | 364/4250 [01:27<20:35, 3.15it/s]
664
  9%|▊ | 365/4250 [01:27<19:18, 3.35it/s]
665
  9%|▊ | 366/4250 [01:27<18:56, 3.42it/s]
666
  9%|▊ | 367/4250 [01:27<17:11, 3.77it/s]
667
  9%|▊ | 368/4250 [01:28<15:13, 4.25it/s]
668
  9%|▊ | 369/4250 [01:28<16:11, 4.00it/s]
669
  9%|▊ | 370/4250 [01:28<15:10, 4.26it/s]
670
  9%|▊ | 371/4250 [01:28<14:18, 4.52it/s]
671
  9%|▉ | 372/4250 [01:29<13:52, 4.66it/s]
672
  9%|▉ | 373/4250 [01:29<15:47, 4.09it/s]
673
  9%|▉ | 374/4250 [01:29<15:20, 4.21it/s]
674
  9%|▉ | 375/4250 [01:30<25:51, 2.50it/s]
675
  9%|▉ | 376/4250 [01:30<22:05, 2.92it/s]
676
  9%|▉ | 377/4250 [01:30<19:34, 3.30it/s]
677
  9%|▉ | 378/4250 [01:30<17:28, 3.69it/s]
678
  9%|▉ | 379/4250 [01:31<15:52, 4.06it/s]
679
  9%|▉ | 380/4250 [01:31<15:12, 4.24it/s]
680
  9%|▉ | 381/4250 [01:31<14:26, 4.46it/s]
681
  9%|▉ | 382/4250 [01:31<13:57, 4.62it/s]
682
  9%|▉ | 383/4250 [01:32<15:16, 4.22it/s]
683
  9%|▉ | 384/4250 [01:32<14:11, 4.54it/s]
684
  9%|▉ | 385/4250 [01:32<16:37, 3.87it/s]
685
  9%|▉ | 386/4250 [01:32<18:50, 3.42it/s]
686
  9%|▉ | 387/4250 [01:33<16:38, 3.87it/s]
687
  9%|▉ | 388/4250 [01:33<15:54, 4.05it/s]
688
  9%|▉ | 389/4250 [01:33<15:11, 4.24it/s]
689
  9%|▉ | 390/4250 [01:33<15:52, 4.05it/s]
690
  9%|▉ | 391/4250 [01:33<14:31, 4.43it/s]
691
  9%|▉ | 392/4250 [01:34<14:36, 4.40it/s]
692
  9%|▉ | 393/4250 [01:34<13:42, 4.69it/s]
693
  9%|▉ | 394/4250 [01:34<13:42, 4.69it/s]
694
  9%|▉ | 395/4250 [01:34<13:04, 4.91it/s]
695
  9%|▉ | 396/4250 [01:34<12:50, 5.00it/s]
696
  9%|▉ | 397/4250 [01:35<13:30, 4.75it/s]
697
  9%|▉ | 398/4250 [01:35<14:10, 4.53it/s]
698
  9%|▉ | 399/4250 [01:35<13:17, 4.83it/s]
699
  9%|▉ | 400/4250 [01:35<14:40, 4.37it/s]
700
  9%|▉ | 401/4250 [01:36<14:06, 4.54it/s]
701
  9%|▉ | 402/4250 [01:36<15:53, 4.04it/s]
702
  9%|▉ | 403/4250 [01:36<15:28, 4.14it/s]
703
  10%|▉ | 404/4250 [01:36<14:41, 4.36it/s]
704
  10%|▉ | 405/4250 [01:37<15:17, 4.19it/s]
705
  10%|▉ | 406/4250 [01:37<15:58, 4.01it/s]
706
  10%|▉ | 407/4250 [01:38<23:04, 2.78it/s]
707
  10%|▉ | 408/4250 [01:38<21:23, 2.99it/s]
708
  10%|▉ | 409/4250 [01:38<18:34, 3.45it/s]
709
  10%|▉ | 410/4250 [01:38<17:37, 3.63it/s]
710
  10%|▉ | 411/4250 [01:38<17:21, 3.68it/s]
711
  10%|▉ | 412/4250 [01:39<18:26, 3.47it/s]
712
  10%|▉ | 413/4250 [01:39<17:12, 3.72it/s]
713
  10%|▉ | 414/4250 [01:39<16:36, 3.85it/s]
714
  10%|▉ | 415/4250 [01:39<15:21, 4.16it/s]
715
  10%|▉ | 416/4250 [01:40<14:56, 4.28it/s]
716
  10%|▉ | 417/4250 [01:40<14:55, 4.28it/s]
717
  10%|▉ | 418/4250 [01:40<13:45, 4.64it/s]
718
  10%|▉ | 419/4250 [01:40<14:56, 4.27it/s]
719
  10%|▉ | 420/4250 [01:41<15:00, 4.25it/s]
720
  10%|▉ | 421/4250 [01:41<15:28, 4.12it/s]
721
  10%|▉ | 422/4250 [01:41<14:59, 4.26it/s]
722
  10%|▉ | 423/4250 [01:41<13:50, 4.61it/s]
723
  10%|▉ | 424/4250 [01:42<14:34, 4.37it/s]
724
  10%|█ | 425/4250 [01:42<14:28, 4.40it/s][INFO|trainer.py:805] 2024-08-30 20:27:31,190 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
725
+ [INFO|trainer.py:3788] 2024-08-30 20:27:31,192 >>
726
+ ***** Running Evaluation *****
727
+ [INFO|trainer.py:3790] 2024-08-30 20:27:31,192 >> Num examples = 6810
728
+ [INFO|trainer.py:3793] 2024-08-30 20:27:31,192 >> Batch size = 8
729
+
730
+
731
  0%| | 0/852 [00:00<?, ?it/s]
732
+
733
  1%| | 9/852 [00:00<00:09, 88.66it/s]
734
+
735
  2%|▏ | 18/852 [00:00<00:10, 79.48it/s]
736
+
737
  3%|▎ | 27/852 [00:00<00:10, 78.72it/s]
738
+
739
  4%|▍ | 35/852 [00:00<00:10, 77.17it/s]
740
+
741
  5%|▌ | 43/852 [00:00<00:10, 77.47it/s]
742
+
743
  6%|▌ | 51/852 [00:00<00:10, 78.01it/s]
744
+
745
  7%|▋ | 60/852 [00:00<00:10, 78.82it/s]
746
+
747
  8%|▊ | 68/852 [00:00<00:10, 76.55it/s]
748
+
749
  9%|▉ | 76/852 [00:00<00:10, 76.57it/s]
750
+
751
  10%|▉ | 84/852 [00:01<00:10, 74.53it/s]
752
+
753
  11%|█ | 92/852 [00:01<00:10, 74.88it/s]
754
+
755
  12%|█▏ | 100/852 [00:01<00:10, 74.67it/s]
756
+
757
  13%|█▎ | 108/852 [00:01<00:09, 74.65it/s]
758
+
759
  14%|█▎ | 116/852 [00:01<00:09, 75.39it/s]
760
+
761
  15%|█▍ | 125/852 [00:01<00:09, 76.70it/s]
762
+
763
  16%|█▌ | 133/852 [00:01<00:09, 72.98it/s]
764
+
765
  17%|█▋ | 141/852 [00:01<00:09, 73.08it/s]
766
+
767
  17%|█▋ | 149/852 [00:01<00:09, 72.57it/s]
768
+
769
  18%|█▊ | 157/852 [00:02<00:09, 74.49it/s]
770
+
771
  19%|█▉ | 165/852 [00:02<00:09, 75.40it/s]
772
+
773
  20%|██ | 173/852 [00:02<00:08, 75.96it/s]
774
+
775
  21%|██ | 181/852 [00:02<00:08, 76.27it/s]
776
+
777
  22%|██▏ | 190/852 [00:02<00:08, 77.54it/s]
778
+
779
  23%|██▎ | 198/852 [00:02<00:08, 77.17it/s]
780
+
781
  24%|██▍ | 206/852 [00:02<00:08, 76.97it/s]
782
+
783
  25%|██▌ | 214/852 [00:02<00:08, 75.85it/s]
784
+
785
  26%|██▌ | 222/852 [00:02<00:08, 77.02it/s]
786
+
787
  27%|██▋ | 230/852 [00:03<00:07, 77.88it/s]
788
+
789
  28%|██▊ | 238/852 [00:03<00:07, 77.06it/s]
790
+
791
  29%|██▉ | 246/852 [00:03<00:08, 75.49it/s]
792
+
793
  30%|██▉ | 255/852 [00:03<00:07, 77.83it/s]
794
+
795
  31%|███ | 263/852 [00:03<00:07, 78.44it/s]
796
+
797
  32%|███▏ | 271/852 [00:03<00:07, 77.33it/s]
798
+
799
  33%|███▎ | 280/852 [00:03<00:07, 78.78it/s]
800
+
801
  34%|███▍ | 288/852 [00:03<00:07, 78.13it/s]
802
+
803
  35%|███▍ | 296/852 [00:03<00:07, 78.46it/s]
804
+
805
  36%|███▌ | 305/852 [00:03<00:06, 79.51it/s]
806
+
807
  37%|���██▋ | 313/852 [00:04<00:06, 77.59it/s]
808
+
809
  38%|███▊ | 322/852 [00:04<00:06, 79.45it/s]
810
+
811
  39%|███▊ | 330/852 [00:04<00:06, 78.56it/s]
812
+
813
  40%|███▉ | 338/852 [00:04<00:06, 75.62it/s]
814
+
815
  41%|████ | 346/852 [00:04<00:06, 76.01it/s]
816
+
817
  42%|████▏ | 354/852 [00:04<00:06, 75.26it/s]
818
+
819
  42%|████▏ | 362/852 [00:04<00:06, 75.94it/s]
820
+
821
  43%|████▎ | 370/852 [00:04<00:06, 76.55it/s]
822
+
823
  44%|████▍ | 378/852 [00:04<00:06, 77.17it/s]
824
+
825
  45%|████▌ | 386/852 [00:05<00:06, 76.59it/s]
826
+
827
  46%|████▋ | 395/852 [00:05<00:05, 77.79it/s]
828
+
829
  47%|████▋ | 403/852 [00:05<00:05, 77.58it/s]
830
+
831
  48%|████▊ | 411/852 [00:05<00:05, 75.31it/s]
832
+
833
  49%|████▉ | 419/852 [00:05<00:05, 76.29it/s]
834
+
835
  50%|█████ | 427/852 [00:05<00:05, 74.65it/s]
836
+
837
  51%|█████ | 435/852 [00:05<00:05, 75.66it/s]
838
+
839
  52%|█████▏ | 443/852 [00:05<00:05, 76.76it/s]
840
+
841
  53%|█████▎ | 451/852 [00:05<00:05, 77.61it/s]
842
+
843
  54%|█████▍ | 460/852 [00:05<00:05, 77.74it/s]
844
+
845
  55%|█████▍ | 468/852 [00:06<00:05, 75.24it/s]
846
+
847
  56%|█████▌ | 476/852 [00:06<00:05, 72.34it/s]
848
+
849
  57%|█████▋ | 484/852 [00:06<00:05, 72.40it/s]
850
+
851
  58%|█████▊ | 492/852 [00:06<00:04, 74.29it/s]
852
+
853
  59%|█████▉ | 501/852 [00:06<00:04, 76.52it/s]
854
+
855
  60%|█████▉ | 509/852 [00:06<00:04, 75.58it/s]
856
+
857
  61%|██████ | 517/852 [00:06<00:04, 76.67it/s]
858
+
859
  62%|██████▏ | 525/852 [00:06<00:04, 75.52it/s]
860
+
861
  63%|██████▎ | 533/852 [00:06<00:04, 75.91it/s]
862
+
863
  63%|██████▎ | 541/852 [00:07<00:04, 77.03it/s]
864
+
865
  64%|██████▍ | 549/852 [00:07<00:04, 75.19it/s]
866
+
867
  65%|██████▌ | 557/852 [00:07<00:03, 75.67it/s]
868
+
869
  66%|██████▋ | 566/852 [00:07<00:03, 77.52it/s]
870
+
871
  67%|██████▋ | 574/852 [00:07<00:03, 77.51it/s]
872
+
873
  68%|██████▊ | 582/852 [00:07<00:03, 76.66it/s]
874
+
875
  69%|██████▉ | 590/852 [00:07<00:03, 75.98it/s]
876
+
877
  70%|███████ | 598/852 [00:07<00:03, 76.35it/s]
878
+
879
  71%|███████ | 606/852 [00:07<00:03, 76.06it/s]
880
+
881
  72%|███████▏ | 614/852 [00:08<00:03, 75.05it/s]
882
+
883
  73%|███████▎ | 622/852 [00:08<00:03, 75.43it/s]
884
+
885
  74%|███████▍ | 630/852 [00:08<00:02, 74.34it/s]
886
+
887
  75%|███████▍ | 638/852 [00:08<00:02, 75.78it/s]
888
+
889
  76%|███████▌ | 646/852 [00:08<00:02, 73.52it/s]
890
+
891
  77%|███████▋ | 654/852 [00:08<00:02, 75.08it/s]
892
+
893
  78%|███████▊ | 662/852 [00:08<00:02, 75.52it/s]
894
+
895
  79%|███████▊ | 670/852 [00:08<00:02, 76.49it/s]
896
+
897
  80%|███████▉ | 678/852 [00:08<00:02, 77.21it/s]
898
+
899
  81%|████████ | 686/852 [00:08<00:02, 77.55it/s]
900
+
901
  82%|████████▏ | 695/852 [00:09<00:01, 78.56it/s]
902
+
903
  83%|████████▎ | 704/852 [00:09<00:01, 79.29it/s]
904
+
905
  84%|████████▎ | 713/852 [00:09<00:01, 79.99it/s]
906
+
907
  85%|████████▍ | 721/852 [00:09<00:01, 78.74it/s]
908
+
909
  86%|████████▌ | 730/852 [00:09<00:01, 79.95it/s]
910
+
911
  87%|████████▋ | 738/852 [00:09<00:01, 79.68it/s]
912
+
913
  88%|████████▊ | 747/852 [00:09<00:01, 80.02it/s]
914
+
915
  89%|████████▊ | 756/852 [00:09<00:01, 80.47it/s]
916
+
917
  90%|████████▉ | 765/852 [00:09<00:01, 80.21it/s]
918
+
919
  91%|█████████ | 774/852 [00:10<00:00, 79.37it/s]
920
+
921
  92%|█████████▏| 782/852 [00:10<00:00, 78.22it/s]
922
+
923
  93%|█████████▎| 790/852 [00:10<00:00, 77.38it/s]
924
+
925
  94%|█████████▎| 798/852 [00:10<00:00, 76.61it/s]
926
+
927
  95%|█████████▍| 807/852 [00:10<00:00, 78.06it/s]
928
+
929
  96%|█████████▌| 815/852 [00:10<00:00, 76.73it/s]
930
+
931
  97%|█████████▋| 824/852 [00:10<00:00, 77.89it/s]
932
+
933
  98%|█████████▊| 833/852 [00:10<00:00, 78.72it/s]
934
+
935
  99%|█████████▊| 841/852 [00:10<00:00, 77.89it/s]
936
+
937
 
938
+
939
 
940
  10%|█ | 425/4250 [01:57<14:28, 4.40it/s]
941
+
942
+
943
  [INFO|trainer.py:3478] 2024-08-30 20:27:45,881 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-425
944
+ [INFO|configuration_utils.py:472] 2024-08-30 20:27:45,882 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-425/config.json
945
+ [INFO|modeling_utils.py:2690] 2024-08-30 20:27:47,247 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-425/model.safetensors
946
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 20:27:47,248 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-425/tokenizer_config.json
947
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 20:27:47,248 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-425/special_tokens_map.json
948
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 20:27:50,017 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
949
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 20:27:50,017 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
950
+
951
  10%|█ | 426/4250 [02:01<6:17:25, 5.92s/it]
952
  10%|█ | 427/4250 [02:01<4:27:24, 4.20s/it]
953
  10%|█ | 428/4250 [02:01<3:11:11, 3.00s/it]
954
  10%|█ | 429/4250 [02:02<2:20:01, 2.20s/it]
955
  10%|█ | 430/4250 [02:02<1:41:14, 1.59s/it]
956
  10%|█ | 431/4250 [02:02<1:15:55, 1.19s/it]
957
  10%|█ | 432/4250 [02:02<56:49, 1.12it/s]
958
  10%|█ | 433/4250 [02:02<43:08, 1.47it/s]
959
  10%|█ | 434/4250 [02:03<35:31, 1.79it/s]
960
  10%|█ | 435/4250 [02:03<28:25, 2.24it/s]
961
  10%|█ | 436/4250 [02:03<24:21, 2.61it/s]
962
  10%|█ | 437/4250 [02:03<20:52, 3.04it/s]
963
  10%|█ | 438/4250 [02:04<18:32, 3.43it/s]
964
  10%|█ | 439/4250 [02:04<17:05, 3.72it/s]
965
  10%|█ | 440/4250 [02:04<15:35, 4.07it/s]
966
  10%|█ | 441/4250 [02:04<14:45, 4.30it/s]
967
  10%|█ | 442/4250 [02:04<14:42, 4.32it/s]
968
  10%|█ | 443/4250 [02:05<16:22, 3.88it/s]
969
  10%|█ | 444/4250 [02:05<15:35, 4.07it/s]
970
  10%|█ | 445/4250 [02:05<14:09, 4.48it/s]
971
  10%|█ | 446/4250 [02:05<15:10, 4.18it/s]
972
  11%|█ | 447/4250 [02:06<14:17, 4.43it/s]
973
  11%|█ | 448/4250 [02:06<13:13, 4.79it/s]
974
  11%|█ | 449/4250 [02:06<13:06, 4.83it/s]
975
  11%|█ | 450/4250 [02:06<15:05, 4.20it/s]
976
  11%|█ | 451/4250 [02:07<15:49, 4.00it/s]
977
  11%|█ | 452/4250 [02:07<14:54, 4.25it/s]
978
  11%|█ | 453/4250 [02:07<15:41, 4.03it/s]
979
  11%|█ | 454/4250 [02:07<15:39, 4.04it/s]
980
  11%|█ | 455/4250 [02:08<15:58, 3.96it/s]
981
  11%|█ | 456/4250 [02:08<14:30, 4.36it/s]
982
  11%|█ | 457/4250 [02:08<13:21, 4.73it/s]
983
  11%|█ | 458/4250 [02:08<14:27, 4.37it/s]
984
  11%|█ | 459/4250 [02:09<24:59, 2.53it/s]
985
  11%|█ | 460/4250 [02:09<25:16, 2.50it/s]
986
  11%|█ | 461/4250 [02:10<23:13, 2.72it/s]
987
  11%|█ | 462/4250 [02:10<20:18, 3.11it/s]
988
  11%|█ | 463/4250 [02:10<18:00, 3.51it/s]
989
  11%|█ | 464/4250 [02:10<16:52, 3.74it/s]
990
  11%|█ | 465/4250 [02:10<15:29, 4.07it/s]
991
  11%|█ | 466/4250 [02:11<14:52, 4.24it/s]
992
  11%|█ | 467/4250 [02:11<15:16, 4.13it/s]
993
  11%|█ | 468/4250 [02:11<15:22, 4.10it/s]
994
  11%|█ | 469/4250 [02:11<14:56, 4.22it/s]
995
  11%|█ | 470/4250 [02:12<14:16, 4.41it/s]
996
  11%|█ | 471/4250 [02:12<13:02, 4.83it/s]
997
  11%|█ | 472/4250 [02:12<14:34, 4.32it/s]
998
  11%|█ | 473/4250 [02:12<14:11, 4.43it/s]
999
  11%|█ | 474/4250 [02:13<15:34, 4.04it/s]
1000
  11%|█ | 475/4250 [02:13<19:44, 3.19it/s]
1001
  11%|█ | 476/4250 [02:13<19:06, 3.29it/s]
1002
  11%|█ | 477/4250 [02:14<18:12, 3.45it/s]
1003
  11%|█ | 478/4250 [02:14<18:34, 3.39it/s]
1004
  11%|█▏ | 479/4250 [02:14<17:12, 3.65it/s]
1005
  11%|█▏ | 480/4250 [02:14<15:17, 4.11it/s]
1006
  11%|█▏ | 481/4250 [02:15<16:05, 3.90it/s]
1007
  11%|█▏ | 482/4250 [02:15<17:51, 3.52it/s]
1008
  11%|█▏ | 483/4250 [02:15<16:15, 3.86it/s]
1009
  11%|█▏ | 484/4250 [02:15<14:39, 4.28it/s]
1010
  11%|█▏ | 485/4250 [02:16<14:48, 4.24it/s]
1011
  11%|█▏ | 486/4250 [02:16<15:40, 4.00it/s]
1012
  11%|█▏ | 487/4250 [02:16<14:28, 4.33it/s]
1013
  11%|█▏ | 488/4250 [02:16<14:12, 4.41it/s]
1014
  12%|█▏ | 489/4250 [02:17<18:04, 3.47it/s]
1015
  12%|█▏ | 490/4250 [02:17<16:37, 3.77it/s]
1016
  12%|█▏ | 491/4250 [02:17<16:44, 3.74it/s]
1017
  12%|█▏ | 492/4250 [02:17<15:59, 3.92it/s]
1018
  12%|█▏ | 493/4250 [02:18<14:35, 4.29it/s]
1019
  12%|█▏ | 494/4250 [02:18<15:03, 4.16it/s]
1020
  12%|█▏ | 495/4250 [02:18<17:00, 3.68it/s]
1021
  12%|█▏ | 496/4250 [02:18<16:06, 3.89it/s]
1022
  12%|█▏ | 497/4250 [02:19<14:40, 4.26it/s]
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.988249118683902,
3
+ "total_flos": 1.2649810588547778e+16,
4
+ "train_loss": 0.10639642311544979,
5
+ "train_runtime": 1208.2019,
6
+ "train_samples": 27229,
7
+ "train_samples_per_second": 225.368,
8
+ "train_steps_per_second": 3.518
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.17333222008850296,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2127",
4
+ "epoch": 9.988249118683902,
5
+ "eval_steps": 500,
6
+ "global_step": 4250,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9988249118683902,
13
+ "eval_accuracy": 0.7849561289082346,
14
+ "eval_f1": 0.1613083366573594,
15
+ "eval_loss": 0.6610585451126099,
16
+ "eval_precision": 0.08832008386476806,
17
+ "eval_recall": 0.9292279411764706,
18
+ "eval_runtime": 14.7703,
19
+ "eval_samples_per_second": 461.06,
20
+ "eval_steps_per_second": 57.683,
21
+ "step": 425
22
+ },
23
+ {
24
+ "epoch": 1.1750881316098707,
25
+ "grad_norm": 1.4406206607818604,
26
+ "learning_rate": 4.411764705882353e-05,
27
+ "loss": 0.3349,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_accuracy": 0.7551182940392986,
33
+ "eval_f1": 0.14506880733944952,
34
+ "eval_loss": 0.9204075932502747,
35
+ "eval_precision": 0.07866915422885572,
36
+ "eval_recall": 0.9301470588235294,
37
+ "eval_runtime": 14.7453,
38
+ "eval_samples_per_second": 461.844,
39
+ "eval_steps_per_second": 57.781,
40
+ "step": 851
41
+ },
42
+ {
43
+ "epoch": 2.3501762632197414,
44
+ "grad_norm": 2.988006591796875,
45
+ "learning_rate": 3.8235294117647055e-05,
46
+ "loss": 0.1788,
47
+ "step": 1000
48
+ },
49
+ {
50
+ "epoch": 2.99882491186839,
51
+ "eval_accuracy": 0.7645035495077375,
52
+ "eval_f1": 0.15487907225146869,
53
+ "eval_loss": 0.9544711709022522,
54
+ "eval_precision": 0.0844496214327315,
55
+ "eval_recall": 0.9329044117647058,
56
+ "eval_runtime": 14.8107,
57
+ "eval_samples_per_second": 459.803,
58
+ "eval_steps_per_second": 57.526,
59
+ "step": 1276
60
+ },
61
+ {
62
+ "epoch": 3.525264394829612,
63
+ "grad_norm": 1.1645787954330444,
64
+ "learning_rate": 3.235294117647059e-05,
65
+ "loss": 0.1227,
66
+ "step": 1500
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "eval_accuracy": 0.7692064756203056,
71
+ "eval_f1": 0.1618332674832082,
72
+ "eval_loss": 1.0923608541488647,
73
+ "eval_precision": 0.08852770813521224,
74
+ "eval_recall": 0.9411764705882353,
75
+ "eval_runtime": 14.8211,
76
+ "eval_samples_per_second": 459.48,
77
+ "eval_steps_per_second": 57.486,
78
+ "step": 1702
79
+ },
80
+ {
81
+ "epoch": 4.700352526439483,
82
+ "grad_norm": 1.1214195489883423,
83
+ "learning_rate": 2.647058823529412e-05,
84
+ "loss": 0.0856,
85
+ "step": 2000
86
+ },
87
+ {
88
+ "epoch": 4.9988249118683905,
89
+ "eval_accuracy": 0.7932840841995413,
90
+ "eval_f1": 0.17333222008850296,
91
+ "eval_loss": 1.0502684116363525,
92
+ "eval_precision": 0.09532555790247038,
93
+ "eval_recall": 0.9540441176470589,
94
+ "eval_runtime": 14.797,
95
+ "eval_samples_per_second": 460.229,
96
+ "eval_steps_per_second": 57.579,
97
+ "step": 2127
98
+ },
99
+ {
100
+ "epoch": 5.875440658049354,
101
+ "grad_norm": 1.1390776634216309,
102
+ "learning_rate": 2.058823529411765e-05,
103
+ "loss": 0.0597,
104
+ "step": 2500
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "eval_accuracy": 0.7787771018990209,
109
+ "eval_f1": 0.16632958498503356,
110
+ "eval_loss": 1.2641881704330444,
111
+ "eval_precision": 0.0911913421449481,
112
+ "eval_recall": 0.9448529411764706,
113
+ "eval_runtime": 14.5753,
114
+ "eval_samples_per_second": 467.228,
115
+ "eval_steps_per_second": 58.455,
116
+ "step": 2553
117
+ },
118
+ {
119
+ "epoch": 6.9988249118683905,
120
+ "eval_accuracy": 0.7828758564817994,
121
+ "eval_f1": 0.16898640903880793,
122
+ "eval_loss": 1.3261910676956177,
123
+ "eval_precision": 0.09275570735214812,
124
+ "eval_recall": 0.9485294117647058,
125
+ "eval_runtime": 14.5617,
126
+ "eval_samples_per_second": 467.665,
127
+ "eval_steps_per_second": 58.51,
128
+ "step": 2978
129
+ },
130
+ {
131
+ "epoch": 7.050528789659224,
132
+ "grad_norm": 0.5195357799530029,
133
+ "learning_rate": 1.4705882352941177e-05,
134
+ "loss": 0.0458,
135
+ "step": 3000
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_accuracy": 0.7848943386381424,
140
+ "eval_f1": 0.1687846203975236,
141
+ "eval_loss": 1.3697636127471924,
142
+ "eval_precision": 0.09259921344297461,
143
+ "eval_recall": 0.9522058823529411,
144
+ "eval_runtime": 14.6608,
145
+ "eval_samples_per_second": 464.504,
146
+ "eval_steps_per_second": 58.114,
147
+ "step": 3404
148
+ },
149
+ {
150
+ "epoch": 8.225616921269095,
151
+ "grad_norm": 0.8723571300506592,
152
+ "learning_rate": 8.823529411764707e-06,
153
+ "loss": 0.0343,
154
+ "step": 3500
155
+ },
156
+ {
157
+ "epoch": 8.99882491186839,
158
+ "eval_accuracy": 0.782223625853049,
159
+ "eval_f1": 0.165499476776946,
160
+ "eval_loss": 1.4433350563049316,
161
+ "eval_precision": 0.09069254521393913,
162
+ "eval_recall": 0.9448529411764706,
163
+ "eval_runtime": 15.0504,
164
+ "eval_samples_per_second": 452.478,
165
+ "eval_steps_per_second": 56.61,
166
+ "step": 3829
167
+ },
168
+ {
169
+ "epoch": 9.400705052878966,
170
+ "grad_norm": 0.6075822710990906,
171
+ "learning_rate": 2.9411764705882355e-06,
172
+ "loss": 0.0292,
173
+ "step": 4000
174
+ },
175
+ {
176
+ "epoch": 9.988249118683902,
177
+ "eval_accuracy": 0.7820794485561674,
178
+ "eval_f1": 0.16674769081186194,
179
+ "eval_loss": 1.4861844778060913,
180
+ "eval_precision": 0.0914341567442687,
181
+ "eval_recall": 0.9457720588235294,
182
+ "eval_runtime": 15.3988,
183
+ "eval_samples_per_second": 442.243,
184
+ "eval_steps_per_second": 55.329,
185
+ "step": 4250
186
+ },
187
+ {
188
+ "epoch": 9.988249118683902,
189
+ "step": 4250,
190
+ "total_flos": 1.2649810588547778e+16,
191
+ "train_loss": 0.10639642311544979,
192
+ "train_runtime": 1208.2019,
193
+ "train_samples_per_second": 225.368,
194
+ "train_steps_per_second": 3.518
195
+ }
196
+ ],
197
+ "logging_steps": 500,
198
+ "max_steps": 4250,
199
+ "num_input_tokens_seen": 0,
200
+ "num_train_epochs": 10,
201
+ "save_steps": 500,
202
+ "stateful_callbacks": {
203
+ "TrainerControl": {
204
+ "args": {
205
+ "should_epoch_stop": false,
206
+ "should_evaluate": false,
207
+ "should_log": false,
208
+ "should_save": true,
209
+ "should_training_stop": true
210
+ },
211
+ "attributes": {}
212
+ }
213
+ },
214
+ "total_flos": 1.2649810588547778e+16,
215
+ "train_batch_size": 32,
216
+ "trial_name": null,
217
+ "trial_params": null
218
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7fb15010252c8cbc6a884d820aedc076e55d4b9641d17f55264378c0fcf155
3
+ size 5176
vocab.json ADDED
The diff for this file is too large to render. See raw diff