Rodrigo1771 commited on
Commit
0f6759c
β€’
1 Parent(s): 10a928e

End of training

Browse files
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
+ tags:
5
+ - token-classification
6
+ - generated_from_trainer
7
+ datasets:
8
+ - Rodrigo1771/multi-train-drugtemist-dev-ner
9
+ model-index:
10
+ - name: output
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # output
18
+
19
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/multi-train-drugtemist-dev-ner dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - eval_loss: 2.4031
22
+ - eval_precision: 0.0004
23
+ - eval_recall: 0.0386
24
+ - eval_f1: 0.0007
25
+ - eval_accuracy: 0.0028
26
+ - eval_runtime: 16.7962
27
+ - eval_samples_per_second: 405.27
28
+ - eval_steps_per_second: 50.666
29
+ - step: 0
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-05
49
+ - train_batch_size: 4
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - gradient_accumulation_steps: 4
53
+ - total_train_batch_size: 16
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: linear
56
+ - num_epochs: 10.0
57
+
58
+ ### Framework versions
59
+
60
+ - Transformers 4.40.2
61
+ - Pytorch 2.2.1+cu121
62
+ - Datasets 2.19.1
63
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.002835545241707918,
3
+ "eval_f1": 0.0006942779922141681,
4
+ "eval_loss": 2.4030845165252686,
5
+ "eval_precision": 0.00035028898841544273,
6
+ "eval_recall": 0.03860294117647059,
7
+ "eval_runtime": 16.7962,
8
+ "eval_samples": 6807,
9
+ "eval_samples_per_second": 405.27,
10
+ "eval_steps_per_second": 50.666,
11
+ "predict_accuracy": 0.002835545241707918,
12
+ "predict_f1": 0.0006942779922141681,
13
+ "predict_loss": 2.4030845165252686,
14
+ "predict_precision": 0.00035028898841544273,
15
+ "predict_recall": 0.03860294117647059,
16
+ "predict_runtime": 16.0715,
17
+ "predict_samples_per_second": 423.545,
18
+ "predict_steps_per_second": 52.951
19
+ }
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "ner",
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-ENFERMEDAD",
18
+ "2": "I-ENFERMEDAD",
19
+ "3": "B-PROCEDIMIENTO",
20
+ "4": "I-PROCEDIMIENTO",
21
+ "5": "B-SINTOMA",
22
+ "6": "I-SINTOMA",
23
+ "7": "B-FARMACO",
24
+ "8": "I-FARMACO"
25
+ },
26
+ "initializer_range": 0.02,
27
+ "intermediate_size": 3072,
28
+ "label2id": {
29
+ "B-ENFERMEDAD": 1,
30
+ "B-FARMACO": 7,
31
+ "B-PROCEDIMIENTO": 3,
32
+ "B-SINTOMA": 5,
33
+ "I-ENFERMEDAD": 2,
34
+ "I-FARMACO": 8,
35
+ "I-PROCEDIMIENTO": 4,
36
+ "I-SINTOMA": 6,
37
+ "O": 0
38
+ },
39
+ "layer_norm_eps": 1e-05,
40
+ "max_position_embeddings": 514,
41
+ "model_type": "roberta",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "pad_token_id": 1,
45
+ "position_embedding_type": "absolute",
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.40.2",
48
+ "type_vocab_size": 1,
49
+ "use_cache": true,
50
+ "vocab_size": 50262
51
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.002835545241707918,
3
+ "eval_f1": 0.0006942779922141681,
4
+ "eval_loss": 2.4030845165252686,
5
+ "eval_precision": 0.00035028898841544273,
6
+ "eval_recall": 0.03860294117647059,
7
+ "eval_runtime": 16.7962,
8
+ "eval_samples": 6807,
9
+ "eval_samples_per_second": 405.27,
10
+ "eval_steps_per_second": 50.666
11
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb8f27d656928c021399304ae37e82d147c80df5386c804b66e664424a60fee
3
+ size 496262556
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.002835545241707918,
3
+ "predict_f1": 0.0006942779922141681,
4
+ "predict_loss": 2.4030845165252686,
5
+ "predict_precision": 0.00035028898841544273,
6
+ "predict_recall": 0.03860294117647059,
7
+ "predict_runtime": 16.0715,
8
+ "predict_samples_per_second": 423.545,
9
+ "predict_steps_per_second": 52.951
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tb/events.out.tfevents.1715608825.c331905616cf.2224.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6b43bba93fcdae059a879a573e39c04184713f463eaf8c074aa07d771faa37e
3
+ size 486
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50261": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "max_len": 512,
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
train.log ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/851 [00:00<?, ?it/s]
1
  1%| | 6/851 [00:00<00:14, 56.79it/s]
2
  2%|▏ | 14/851 [00:00<00:12, 66.67it/s]
3
  2%|▏ | 21/851 [00:00<00:12, 67.60it/s]
4
  3%|β–Ž | 29/851 [00:00<00:11, 71.18it/s]
5
  4%|▍ | 37/851 [00:00<00:11, 72.27it/s]
6
  5%|β–Œ | 45/851 [00:00<00:10, 73.94it/s]
7
  6%|β–Œ | 53/851 [00:00<00:10, 75.21it/s]
8
  7%|β–‹ | 62/851 [00:00<00:10, 76.83it/s]
9
  8%|β–Š | 70/851 [00:00<00:10, 72.77it/s]
10
  9%|β–‰ | 78/851 [00:01<00:10, 73.82it/s]
11
  10%|β–ˆ | 86/851 [00:01<00:10, 72.92it/s]
12
  11%|β–ˆ | 94/851 [00:01<00:10, 71.23it/s]
13
  12%|β–ˆβ– | 102/851 [00:01<00:10, 72.43it/s]
14
  13%|β–ˆβ–Ž | 110/851 [00:01<00:10, 71.56it/s]
15
  14%|β–ˆβ– | 118/851 [00:01<00:10, 72.68it/s]
16
  15%|β–ˆβ– | 126/851 [00:01<00:10, 69.25it/s]
17
  16%|β–ˆβ–Œ | 134/851 [00:01<00:10, 69.92it/s]
18
  17%|β–ˆβ–‹ | 142/851 [00:01<00:10, 70.22it/s]
19
  18%|β–ˆβ–Š | 150/851 [00:02<00:10, 68.89it/s]
20
  19%|β–ˆβ–Š | 158/851 [00:02<00:09, 71.66it/s]
21
  20%|β–ˆβ–‰ | 166/851 [00:02<00:09, 72.84it/s]
22
  20%|β–ˆβ–ˆ | 174/851 [00:02<00:09, 73.78it/s]
23
  21%|β–ˆβ–ˆβ– | 182/851 [00:02<00:08, 74.73it/s]
24
  22%|β–ˆβ–ˆβ– | 191/851 [00:02<00:08, 75.91it/s]
25
  24%|β–ˆβ–ˆβ–Ž | 200/851 [00:02<00:08, 77.69it/s]
26
  24%|β–ˆβ–ˆβ– | 208/851 [00:02<00:08, 74.46it/s]
27
  25%|β–ˆβ–ˆβ–Œ | 216/851 [00:02<00:08, 73.02it/s]
28
  26%|β–ˆβ–ˆβ–‹ | 224/851 [00:03<00:08, 73.31it/s]
29
  27%|β–ˆβ–ˆβ–‹ | 233/851 [00:03<00:08, 75.80it/s]
30
  28%|β–ˆβ–ˆβ–Š | 241/851 [00:03<00:08, 70.34it/s]
31
  29%|β–ˆβ–ˆβ–‰ | 249/851 [00:03<00:08, 70.90it/s]
32
  30%|β–ˆβ–ˆβ–ˆ | 258/851 [00:03<00:07, 74.54it/s]
33
  31%|β–ˆβ–ˆβ–ˆβ– | 267/851 [00:03<00:07, 74.56it/s]
34
  32%|β–ˆβ–ˆβ–ˆβ– | 275/851 [00:03<00:07, 75.23it/s]
35
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 284/851 [00:03<00:07, 77.16it/s]
36
  34%|β–ˆβ–ˆβ–ˆβ– | 292/851 [00:03<00:07, 75.36it/s]
37
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 300/851 [00:04<00:07, 76.48it/s]
38
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 309/851 [00:04<00:06, 78.03it/s]
39
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 317/851 [00:04<00:07, 73.84it/s]
40
  38%|β–ˆβ–ˆβ–ˆβ–Š | 325/851 [00:04<00:07, 74.16it/s]
41
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 333/851 [00:04<00:07, 73.48it/s]
42
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 341/851 [00:04<00:06, 75.03it/s]
43
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 349/851 [00:04<00:06, 75.15it/s]
44
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 357/851 [00:04<00:06, 71.25it/s]
45
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 365/851 [00:04<00:06, 73.08it/s]
46
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 373/851 [00:05<00:06, 71.71it/s]
47
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 381/851 [00:05<00:06, 69.65it/s]
48
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 389/851 [00:05<00:06, 72.30it/s]
49
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 397/851 [00:05<00:06, 72.22it/s]
50
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 405/851 [00:05<00:06, 67.45it/s]
51
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 413/851 [00:05<00:06, 68.89it/s]
52
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 421/851 [00:05<00:05, 71.75it/s]
53
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 429/851 [00:05<00:06, 70.29it/s]
54
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 437/851 [00:06<00:05, 72.84it/s]
55
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 445/851 [00:06<00:05, 73.81it/s]
56
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 453/851 [00:06<00:05, 74.50it/s]
57
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 461/851 [00:06<00:05, 72.92it/s]
58
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 469/851 [00:06<00:05, 70.51it/s]
59
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 477/851 [00:06<00:05, 65.80it/s]
60
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 484/851 [00:06<00:05, 65.68it/s]
61
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 492/851 [00:06<00:05, 69.27it/s]
62
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 501/851 [00:06<00:04, 73.42it/s]
63
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 509/851 [00:07<00:04, 72.53it/s]
64
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 517/851 [00:07<00:04, 74.04it/s]
65
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 525/851 [00:07<00:04, 69.50it/s]
66
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 533/851 [00:07<00:04, 70.76it/s]
67
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 541/851 [00:07<00:04, 72.75it/s]
68
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 549/851 [00:07<00:04, 70.77it/s]
69
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 557/851 [00:07<00:04, 72.93it/s]
70
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 566/851 [00:07<00:03, 75.96it/s]
71
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 574/851 [00:07<00:03, 75.68it/s]
72
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 582/851 [00:08<00:03, 73.54it/s]
73
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 590/851 [00:08<00:03, 70.20it/s]
74
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 598/851 [00:08<00:03, 71.00it/s]
75
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 606/851 [00:08<00:03, 69.47it/s]
76
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 613/851 [00:08<00:03, 67.88it/s]
77
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 620/851 [00:08<00:03, 68.06it/s]
78
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 627/851 [00:08<00:03, 67.97it/s]
79
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 635/851 [00:08<00:03, 68.41it/s]
80
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 642/851 [00:08<00:03, 63.33it/s]
81
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 650/851 [00:09<00:02, 67.25it/s]
82
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 658/851 [00:09<00:02, 69.51it/s]
83
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 666/851 [00:09<00:02, 70.79it/s]
84
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 674/851 [00:09<00:02, 70.05it/s]
85
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 682/851 [00:09<00:02, 71.60it/s]
86
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 691/851 [00:09<00:02, 74.53it/s]
87
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 699/851 [00:09<00:02, 74.50it/s]
88
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 708/851 [00:09<00:01, 76.84it/s]
89
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 716/851 [00:09<00:01, 74.99it/s]
90
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 724/851 [00:10<00:01, 75.33it/s]
91
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 732/851 [00:10<00:01, 76.41it/s]
92
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 740/851 [00:10<00:01, 76.60it/s]
93
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 748/851 [00:10<00:01, 76.14it/s]
94
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 756/851 [00:10<00:01, 75.65it/s]
95
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 764/851 [00:10<00:01, 76.38it/s]
96
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 772/851 [00:10<00:01, 71.78it/s]
97
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 780/851 [00:10<00:01, 69.90it/s]
98
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 788/851 [00:10<00:00, 70.05it/s]
99
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 796/851 [00:11<00:00, 70.92it/s]
100
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 804/851 [00:11<00:00, 73.14it/s]
101
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 812/851 [00:11<00:00, 71.35it/s]
102
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 820/851 [00:11<00:00, 72.32it/s]
103
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 828/851 [00:11<00:00, 73.57it/s]
104
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 836/851 [00:11<00:00, 73.25it/s]
105
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 844/851 [00:11<00:00, 70.06it/s]/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  0%| | 0/851 [00:00<?, ?it/s]
107
  1%| | 10/851 [00:00<00:09, 91.44it/s]
108
  2%|▏ | 20/851 [00:00<00:10, 80.05it/s]
109
  3%|β–Ž | 29/851 [00:00<00:10, 76.91it/s]
110
  4%|▍ | 37/851 [00:00<00:10, 76.11it/s]
111
  5%|β–Œ | 45/851 [00:00<00:10, 76.81it/s]
112
  6%|β–Œ | 53/851 [00:00<00:10, 77.42it/s]
113
  7%|β–‹ | 62/851 [00:00<00:10, 78.22it/s]
114
  8%|β–Š | 70/851 [00:00<00:10, 73.17it/s]
115
  9%|β–‰ | 78/851 [00:01<00:10, 74.14it/s]
116
  10%|β–ˆ | 86/851 [00:01<00:10, 74.47it/s]
117
  11%|β–ˆ | 94/851 [00:01<00:10, 71.54it/s]
118
  12%|β–ˆβ– | 102/851 [00:01<00:10, 72.66it/s]
119
  13%|β–ˆβ–Ž | 110/851 [00:01<00:10, 71.47it/s]
120
  14%|β–ˆβ– | 118/851 [00:01<00:10, 72.13it/s]
121
  15%|β–ˆβ– | 126/851 [00:01<00:10, 69.31it/s]
122
  16%|β–ˆβ–Œ | 134/851 [00:01<00:10, 69.99it/s]
123
  17%|β–ˆβ–‹ | 142/851 [00:01<00:10, 70.34it/s]
124
  18%|β–ˆβ–Š | 150/851 [00:02<00:10, 69.10it/s]
125
  19%|β–ˆβ–Š | 159/851 [00:02<00:09, 73.31it/s]
126
  20%|β–ˆβ–‰ | 167/851 [00:02<00:09, 73.76it/s]
127
  21%|β–ˆβ–ˆ | 175/851 [00:02<00:09, 74.62it/s]
128
  22%|β–ˆβ–ˆβ– | 183/851 [00:02<00:08, 75.28it/s]
129
  22%|β–ˆβ–ˆβ– | 191/851 [00:02<00:08, 75.29it/s]
130
  24%|β–ˆβ–ˆβ–Ž | 200/851 [00:02<00:08, 77.19it/s]
131
  24%|β–ˆβ–ˆβ– | 208/851 [00:02<00:08, 74.09it/s]
132
  25%|β–ˆβ–ˆβ–Œ | 216/851 [00:02<00:08, 72.43it/s]
133
  26%|β–ˆβ–ˆβ–‹ | 224/851 [00:03<00:08, 72.43it/s]
134
  27%|β–ˆβ–ˆβ–‹ | 232/851 [00:03<00:08, 74.47it/s]
135
  28%|β–ˆβ–ˆβ–Š | 240/851 [00:03<00:08, 72.23it/s]
136
  29%|β–ˆβ–ˆβ–‰ | 248/851 [00:03<00:08, 70.30it/s]
137
  30%|β–ˆβ–ˆβ–ˆ | 257/851 [00:03<00:08, 74.22it/s]
138
  31%|β–ˆβ–ˆβ–ˆβ– | 266/851 [00:03<00:07, 76.38it/s]
139
  32%|β–ˆβ–ˆβ–ˆβ– | 274/851 [00:03<00:07, 73.69it/s]
140
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 283/851 [00:03<00:07, 75.70it/s]
141
  34%|β–ˆβ–ˆβ–ˆβ– | 291/851 [00:03<00:07, 74.53it/s]
142
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 299/851 [00:04<00:07, 75.95it/s]
143
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 308/851 [00:04<00:06, 77.72it/s]
144
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 316/851 [00:04<00:07, 73.39it/s]
145
  38%|β–ˆβ–ˆβ–ˆβ–Š | 324/851 [00:04<00:07, 74.25it/s]
146
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 332/851 [00:04<00:07, 73.40it/s]
147
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 340/851 [00:04<00:06, 74.69it/s]
148
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 348/851 [00:04<00:06, 74.45it/s]
149
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 356/851 [00:04<00:06, 71.61it/s]
150
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 364/851 [00:04<00:06, 71.59it/s]
151
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 372/851 [00:05<00:06, 71.39it/s]
152
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 380/851 [00:05<00:06, 68.82it/s]
153
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 388/851 [00:05<00:06, 71.34it/s]
154
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 396/851 [00:05<00:06, 71.74it/s]
155
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 404/851 [00:05<00:06, 67.34it/s]
156
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 411/851 [00:05<00:06, 67.07it/s]
157
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 419/851 [00:05<00:06, 70.05it/s]
158
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 427/851 [00:05<00:06, 68.22it/s]
159
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 435/851 [00:05<00:05, 71.05it/s]
160
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 443/851 [00:06<00:05, 73.07it/s]
161
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 451/851 [00:06<00:05, 73.76it/s]
162
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 459/851 [00:06<00:05, 74.30it/s]
163
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 467/851 [00:06<00:05, 69.53it/s]
164
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 475/851 [00:06<00:05, 64.31it/s]
165
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 482/851 [00:06<00:05, 65.00it/s]
166
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 489/851 [00:06<00:05, 66.01it/s]
167
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 498/851 [00:06<00:04, 70.87it/s]
168
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 506/851 [00:06<00:04, 72.82it/s]
169
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 514/851 [00:07<00:04, 72.54it/s]
170
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 522/851 [00:07<00:04, 69.41it/s]
171
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 530/851 [00:07<00:04, 69.19it/s]
172
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 538/851 [00:07<00:04, 71.41it/s]
173
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 546/851 [00:07<00:04, 72.09it/s]
174
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 554/851 [00:07<00:04, 70.07it/s]
175
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 562/851 [00:07<00:04, 72.16it/s]
176
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 570/851 [00:07<00:03, 73.41it/s]
177
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 578/851 [00:07<00:03, 71.89it/s]
178
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 586/851 [00:08<00:03, 70.00it/s]
179
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 594/851 [00:08<00:03, 70.61it/s]
180
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 602/851 [00:08<00:03, 71.04it/s]
181
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 610/851 [00:08<00:03, 70.41it/s]
182
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 618/851 [00:08<00:03, 66.31it/s]
183
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 626/851 [00:08<00:03, 66.58it/s]
184
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 633/851 [00:08<00:03, 67.17it/s]
185
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 640/851 [00:08<00:03, 67.05it/s]
186
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 647/851 [00:09<00:03, 65.37it/s]
187
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 655/851 [00:09<00:02, 68.38it/s]
188
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 663/851 [00:09<00:02, 69.14it/s]
189
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 671/851 [00:09<00:02, 69.12it/s]
190
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 679/851 [00:09<00:02, 69.97it/s]
191
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 687/851 [00:09<00:02, 70.94it/s]
192
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 695/851 [00:09<00:02, 72.48it/s]
193
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 703/851 [00:09<00:02, 73.26it/s]
194
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 711/851 [00:09<00:01, 74.87it/s]
195
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 719/851 [00:09<00:01, 73.29it/s]
196
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 727/851 [00:10<00:01, 74.62it/s]
197
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 735/851 [00:10<00:01, 75.28it/s]
198
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 743/851 [00:10<00:01, 74.64it/s]
199
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 751/851 [00:10<00:01, 73.98it/s]
200
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 759/851 [00:10<00:01, 74.89it/s]
201
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 767/851 [00:10<00:01, 70.39it/s]
202
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 775/851 [00:10<00:01, 69.17it/s]
203
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 782/851 [00:10<00:01, 68.09it/s]
204
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 790/851 [00:10<00:00, 69.33it/s]
205
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 798/851 [00:11<00:00, 70.49it/s]
206
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 806/851 [00:11<00:00, 72.99it/s]
207
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 814/851 [00:11<00:00, 70.07it/s]
208
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 822/851 [00:11<00:00, 71.14it/s]
209
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 830/851 [00:11<00:00, 71.12it/s]
210
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 838/851 [00:11<00:00, 71.03it/s]
211
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 846/851 [00:11<00:00, 66.90it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-13 13:59:44.191151: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2
+ 2024-05-13 13:59:44.191275: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
3
+ 2024-05-13 13:59:44.193141: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
4
+ 2024-05-13 13:59:45.319933: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
5
+ 05/13/2024 13:59:47 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
6
+ 05/13/2024 13:59:47 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
7
+ _n_gpu=1,
8
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},
9
+ adafactor=False,
10
+ adam_beta1=0.9,
11
+ adam_beta2=0.999,
12
+ adam_epsilon=1e-08,
13
+ auto_find_batch_size=False,
14
+ bf16=False,
15
+ bf16_full_eval=False,
16
+ data_seed=None,
17
+ dataloader_drop_last=False,
18
+ dataloader_num_workers=0,
19
+ dataloader_persistent_workers=False,
20
+ dataloader_pin_memory=True,
21
+ dataloader_prefetch_factor=None,
22
+ ddp_backend=None,
23
+ ddp_broadcast_buffers=None,
24
+ ddp_bucket_cap_mb=None,
25
+ ddp_find_unused_parameters=None,
26
+ ddp_timeout=1800,
27
+ debug=[],
28
+ deepspeed=None,
29
+ disable_tqdm=False,
30
+ dispatch_batches=None,
31
+ do_eval=True,
32
+ do_predict=True,
33
+ do_train=False,
34
+ eval_accumulation_steps=None,
35
+ eval_delay=0,
36
+ eval_do_concat_batches=True,
37
+ eval_steps=None,
38
+ evaluation_strategy=epoch,
39
+ fp16=False,
40
+ fp16_backend=auto,
41
+ fp16_full_eval=False,
42
+ fp16_opt_level=O1,
43
+ fsdp=[],
44
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
45
+ fsdp_min_num_params=0,
46
+ fsdp_transformer_layer_cls_to_wrap=None,
47
+ full_determinism=False,
48
+ gradient_accumulation_steps=4,
49
+ gradient_checkpointing=False,
50
+ gradient_checkpointing_kwargs=None,
51
+ greater_is_better=True,
52
+ group_by_length=False,
53
+ half_precision_backend=auto,
54
+ hub_always_push=False,
55
+ hub_model_id=None,
56
+ hub_private_repo=False,
57
+ hub_strategy=every_save,
58
+ hub_token=<HUB_TOKEN>,
59
+ ignore_data_skip=False,
60
+ include_inputs_for_metrics=False,
61
+ include_num_input_tokens_seen=False,
62
+ include_tokens_per_second=False,
63
+ jit_mode_eval=False,
64
+ label_names=None,
65
+ label_smoothing_factor=0.0,
66
+ learning_rate=5e-05,
67
+ length_column_name=length,
68
+ load_best_model_at_end=True,
69
+ local_rank=0,
70
+ log_level=passive,
71
+ log_level_replica=warning,
72
+ log_on_each_node=True,
73
+ logging_dir=/content/dissertation/scripts/ner/output/tb,
74
+ logging_first_step=False,
75
+ logging_nan_inf_filter=True,
76
+ logging_steps=500,
77
+ logging_strategy=steps,
78
+ lr_scheduler_kwargs={},
79
+ lr_scheduler_type=linear,
80
+ max_grad_norm=1.0,
81
+ max_steps=-1,
82
+ metric_for_best_model=f1,
83
+ mp_parameters=,
84
+ neftune_noise_alpha=None,
85
+ no_cuda=False,
86
+ num_train_epochs=10.0,
87
+ optim=adamw_torch,
88
+ optim_args=None,
89
+ optim_target_modules=None,
90
+ output_dir=/content/dissertation/scripts/ner/output,
91
+ overwrite_output_dir=True,
92
+ past_index=-1,
93
+ per_device_eval_batch_size=8,
94
+ per_device_train_batch_size=4,
95
+ prediction_loss_only=False,
96
+ push_to_hub=True,
97
+ push_to_hub_model_id=None,
98
+ push_to_hub_organization=None,
99
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
100
+ ray_scope=last,
101
+ remove_unused_columns=True,
102
+ report_to=['tensorboard'],
103
+ resume_from_checkpoint=None,
104
+ run_name=/content/dissertation/scripts/ner/output,
105
+ save_on_each_node=False,
106
+ save_only_model=False,
107
+ save_safetensors=True,
108
+ save_steps=500,
109
+ save_strategy=epoch,
110
+ save_total_limit=None,
111
+ seed=42,
112
+ skip_memory_metrics=True,
113
+ split_batches=None,
114
+ tf32=None,
115
+ torch_compile=False,
116
+ torch_compile_backend=None,
117
+ torch_compile_mode=None,
118
+ torchdynamo=None,
119
+ tpu_metrics_debug=False,
120
+ tpu_num_cores=None,
121
+ use_cpu=False,
122
+ use_ipex=False,
123
+ use_legacy_prediction_loop=False,
124
+ use_mps_device=False,
125
+ warmup_ratio=0.0,
126
+ warmup_steps=0,
127
+ weight_decay=0.0,
128
+ )
129
+ /usr/local/lib/python3.10/dist-packages/datasets/load.py:1486: FutureWarning: The repository for Rodrigo1771/multi-train-drugtemist-dev-ner contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/Rodrigo1771/multi-train-drugtemist-dev-ner
130
+ You can avoid this message in future by passing the argument `trust_remote_code=True`.
131
+ Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
132
+ warnings.warn(
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+ /usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
141
+ warnings.warn(
142
+ [INFO|configuration_utils.py:726] 2024-05-13 13:59:59,622 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
143
+ [INFO|configuration_utils.py:789] 2024-05-13 13:59:59,630 >> Model config RobertaConfig {
144
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
145
+ "architectures": [
146
+ "RobertaForMaskedLM"
147
+ ],
148
+ "attention_probs_dropout_prob": 0.1,
149
+ "bos_token_id": 0,
150
+ "classifier_dropout": null,
151
+ "eos_token_id": 2,
152
+ "finetuning_task": "ner",
153
+ "gradient_checkpointing": false,
154
+ "hidden_act": "gelu",
155
+ "hidden_dropout_prob": 0.1,
156
+ "hidden_size": 768,
157
+ "id2label": {
158
+ "0": "O",
159
+ "1": "B-ENFERMEDAD",
160
+ "2": "I-ENFERMEDAD",
161
+ "3": "B-PROCEDIMIENTO",
162
+ "4": "I-PROCEDIMIENTO",
163
+ "5": "B-SINTOMA",
164
+ "6": "I-SINTOMA",
165
+ "7": "B-FARMACO",
166
+ "8": "I-FARMACO"
167
+ },
168
+ "initializer_range": 0.02,
169
+ "intermediate_size": 3072,
170
+ "label2id": {
171
+ "B-ENFERMEDAD": 1,
172
+ "B-FARMACO": 7,
173
+ "B-PROCEDIMIENTO": 3,
174
+ "B-SINTOMA": 5,
175
+ "I-ENFERMEDAD": 2,
176
+ "I-FARMACO": 8,
177
+ "I-PROCEDIMIENTO": 4,
178
+ "I-SINTOMA": 6,
179
+ "O": 0
180
+ },
181
+ "layer_norm_eps": 1e-05,
182
+ "max_position_embeddings": 514,
183
+ "model_type": "roberta",
184
+ "num_attention_heads": 12,
185
+ "num_hidden_layers": 12,
186
+ "pad_token_id": 1,
187
+ "position_embedding_type": "absolute",
188
+ "transformers_version": "4.40.2",
189
+ "type_vocab_size": 1,
190
+ "use_cache": true,
191
+ "vocab_size": 50262
192
+ }
193
+
194
+ [INFO|configuration_utils.py:726] 2024-05-13 13:59:59,860 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
195
+ [INFO|configuration_utils.py:789] 2024-05-13 13:59:59,860 >> Model config RobertaConfig {
196
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
197
+ "architectures": [
198
+ "RobertaForMaskedLM"
199
+ ],
200
+ "attention_probs_dropout_prob": 0.1,
201
+ "bos_token_id": 0,
202
+ "classifier_dropout": null,
203
+ "eos_token_id": 2,
204
+ "gradient_checkpointing": false,
205
+ "hidden_act": "gelu",
206
+ "hidden_dropout_prob": 0.1,
207
+ "hidden_size": 768,
208
+ "initializer_range": 0.02,
209
+ "intermediate_size": 3072,
210
+ "layer_norm_eps": 1e-05,
211
+ "max_position_embeddings": 514,
212
+ "model_type": "roberta",
213
+ "num_attention_heads": 12,
214
+ "num_hidden_layers": 12,
215
+ "pad_token_id": 1,
216
+ "position_embedding_type": "absolute",
217
+ "transformers_version": "4.40.2",
218
+ "type_vocab_size": 1,
219
+ "use_cache": true,
220
+ "vocab_size": 50262
221
+ }
222
+
223
+ [INFO|tokenization_utils_base.py:2087] 2024-05-13 14:00:01,284 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
224
+ [INFO|tokenization_utils_base.py:2087] 2024-05-13 14:00:01,284 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
225
+ [INFO|tokenization_utils_base.py:2087] 2024-05-13 14:00:01,284 >> loading file tokenizer.json from cache at None
226
+ [INFO|tokenization_utils_base.py:2087] 2024-05-13 14:00:01,284 >> loading file added_tokens.json from cache at None
227
+ [INFO|tokenization_utils_base.py:2087] 2024-05-13 14:00:01,284 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
228
+ [INFO|tokenization_utils_base.py:2087] 2024-05-13 14:00:01,284 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
229
+ [INFO|configuration_utils.py:726] 2024-05-13 14:00:01,284 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
230
+ [INFO|configuration_utils.py:789] 2024-05-13 14:00:01,285 >> Model config RobertaConfig {
231
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
232
+ "architectures": [
233
+ "RobertaForMaskedLM"
234
+ ],
235
+ "attention_probs_dropout_prob": 0.1,
236
+ "bos_token_id": 0,
237
+ "classifier_dropout": null,
238
+ "eos_token_id": 2,
239
+ "gradient_checkpointing": false,
240
+ "hidden_act": "gelu",
241
+ "hidden_dropout_prob": 0.1,
242
+ "hidden_size": 768,
243
+ "initializer_range": 0.02,
244
+ "intermediate_size": 3072,
245
+ "layer_norm_eps": 1e-05,
246
+ "max_position_embeddings": 514,
247
+ "model_type": "roberta",
248
+ "num_attention_heads": 12,
249
+ "num_hidden_layers": 12,
250
+ "pad_token_id": 1,
251
+ "position_embedding_type": "absolute",
252
+ "transformers_version": "4.40.2",
253
+ "type_vocab_size": 1,
254
+ "use_cache": true,
255
+ "vocab_size": 50262
256
+ }
257
+
258
+ [INFO|configuration_utils.py:726] 2024-05-13 14:00:01,385 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
259
+ [INFO|configuration_utils.py:789] 2024-05-13 14:00:01,386 >> Model config RobertaConfig {
260
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
261
+ "architectures": [
262
+ "RobertaForMaskedLM"
263
+ ],
264
+ "attention_probs_dropout_prob": 0.1,
265
+ "bos_token_id": 0,
266
+ "classifier_dropout": null,
267
+ "eos_token_id": 2,
268
+ "gradient_checkpointing": false,
269
+ "hidden_act": "gelu",
270
+ "hidden_dropout_prob": 0.1,
271
+ "hidden_size": 768,
272
+ "initializer_range": 0.02,
273
+ "intermediate_size": 3072,
274
+ "layer_norm_eps": 1e-05,
275
+ "max_position_embeddings": 514,
276
+ "model_type": "roberta",
277
+ "num_attention_heads": 12,
278
+ "num_hidden_layers": 12,
279
+ "pad_token_id": 1,
280
+ "position_embedding_type": "absolute",
281
+ "transformers_version": "4.40.2",
282
+ "type_vocab_size": 1,
283
+ "use_cache": true,
284
+ "vocab_size": 50262
285
+ }
286
+
287
+ [INFO|modeling_utils.py:3429] 2024-05-13 14:00:06,524 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
288
+ [INFO|modeling_utils.py:4160] 2024-05-13 14:00:06,768 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
289
+ - This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
290
+ - This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
291
+ [WARNING|modeling_utils.py:4172] 2024-05-13 14:00:06,768 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
292
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
293
+
294
+
295
+ /content/dissertation/scripts/ner/run_ner.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library πŸ€— Evaluate: https://huggingface.co/docs/evaluate
296
+ metric = load_metric("seqeval")
297
+ /usr/local/lib/python3.10/dist-packages/datasets/load.py:759: FutureWarning: The repository for seqeval contains custom code which must be executed to correctly load the metric. You can inspect the repository content at https://raw.githubusercontent.com/huggingface/datasets/2.19.1/metrics/seqeval/seqeval.py
298
+ You can avoid this message in future by passing the argument `trust_remote_code=True`.
299
+ Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
300
+ warnings.warn(
301
+
302
+ 05/13/2024 14:00:09 - INFO - __main__ - *** Evaluate ***
303
+ [INFO|trainer.py:786] 2024-05-13 14:00:09,048 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
304
+ [INFO|trainer.py:3614] 2024-05-13 14:00:09,055 >> ***** Running Evaluation *****
305
+ [INFO|trainer.py:3616] 2024-05-13 14:00:09,055 >> Num examples = 6807
306
+ [INFO|trainer.py:3619] 2024-05-13 14:00:09,055 >> Batch size = 8
307
+
308
  0%| | 0/851 [00:00<?, ?it/s]
309
  1%| | 6/851 [00:00<00:14, 56.79it/s]
310
  2%|▏ | 14/851 [00:00<00:12, 66.67it/s]
311
  2%|▏ | 21/851 [00:00<00:12, 67.60it/s]
312
  3%|β–Ž | 29/851 [00:00<00:11, 71.18it/s]
313
  4%|▍ | 37/851 [00:00<00:11, 72.27it/s]
314
  5%|β–Œ | 45/851 [00:00<00:10, 73.94it/s]
315
  6%|β–Œ | 53/851 [00:00<00:10, 75.21it/s]
316
  7%|β–‹ | 62/851 [00:00<00:10, 76.83it/s]
317
  8%|β–Š | 70/851 [00:00<00:10, 72.77it/s]
318
  9%|β–‰ | 78/851 [00:01<00:10, 73.82it/s]
319
  10%|β–ˆ | 86/851 [00:01<00:10, 72.92it/s]
320
  11%|β–ˆ | 94/851 [00:01<00:10, 71.23it/s]
321
  12%|β–ˆβ– | 102/851 [00:01<00:10, 72.43it/s]
322
  13%|β–ˆβ–Ž | 110/851 [00:01<00:10, 71.56it/s]
323
  14%|β–ˆβ– | 118/851 [00:01<00:10, 72.68it/s]
324
  15%|β–ˆβ– | 126/851 [00:01<00:10, 69.25it/s]
325
  16%|β–ˆβ–Œ | 134/851 [00:01<00:10, 69.92it/s]
326
  17%|β–ˆβ–‹ | 142/851 [00:01<00:10, 70.22it/s]
327
  18%|β–ˆβ–Š | 150/851 [00:02<00:10, 68.89it/s]
328
  19%|β–ˆβ–Š | 158/851 [00:02<00:09, 71.66it/s]
329
  20%|β–ˆβ–‰ | 166/851 [00:02<00:09, 72.84it/s]
330
  20%|β–ˆβ–ˆ | 174/851 [00:02<00:09, 73.78it/s]
331
  21%|β–ˆβ–ˆβ– | 182/851 [00:02<00:08, 74.73it/s]
332
  22%|β–ˆβ–ˆβ– | 191/851 [00:02<00:08, 75.91it/s]
333
  24%|β–ˆβ–ˆβ–Ž | 200/851 [00:02<00:08, 77.69it/s]
334
  24%|β–ˆβ–ˆβ– | 208/851 [00:02<00:08, 74.46it/s]
335
  25%|β–ˆβ–ˆβ–Œ | 216/851 [00:02<00:08, 73.02it/s]
336
  26%|β–ˆβ–ˆβ–‹ | 224/851 [00:03<00:08, 73.31it/s]
337
  27%|β–ˆβ–ˆβ–‹ | 233/851 [00:03<00:08, 75.80it/s]
338
  28%|β–ˆβ–ˆβ–Š | 241/851 [00:03<00:08, 70.34it/s]
339
  29%|β–ˆβ–ˆβ–‰ | 249/851 [00:03<00:08, 70.90it/s]
340
  30%|β–ˆβ–ˆβ–ˆ | 258/851 [00:03<00:07, 74.54it/s]
341
  31%|β–ˆβ–ˆβ–ˆβ– | 267/851 [00:03<00:07, 74.56it/s]
342
  32%|β–ˆβ–ˆβ–ˆβ– | 275/851 [00:03<00:07, 75.23it/s]
343
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 284/851 [00:03<00:07, 77.16it/s]
344
  34%|β–ˆβ–ˆβ–ˆβ– | 292/851 [00:03<00:07, 75.36it/s]
345
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 300/851 [00:04<00:07, 76.48it/s]
346
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 309/851 [00:04<00:06, 78.03it/s]
347
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 317/851 [00:04<00:07, 73.84it/s]
348
  38%|β–ˆβ–ˆβ–ˆβ–Š | 325/851 [00:04<00:07, 74.16it/s]
349
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 333/851 [00:04<00:07, 73.48it/s]
350
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 341/851 [00:04<00:06, 75.03it/s]
351
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 349/851 [00:04<00:06, 75.15it/s]
352
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 357/851 [00:04<00:06, 71.25it/s]
353
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 365/851 [00:04<00:06, 73.08it/s]
354
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 373/851 [00:05<00:06, 71.71it/s]
355
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 381/851 [00:05<00:06, 69.65it/s]
356
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 389/851 [00:05<00:06, 72.30it/s]
357
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 397/851 [00:05<00:06, 72.22it/s]
358
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 405/851 [00:05<00:06, 67.45it/s]
359
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 413/851 [00:05<00:06, 68.89it/s]
360
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 421/851 [00:05<00:05, 71.75it/s]
361
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 429/851 [00:05<00:06, 70.29it/s]
362
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 437/851 [00:06<00:05, 72.84it/s]
363
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 445/851 [00:06<00:05, 73.81it/s]
364
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 453/851 [00:06<00:05, 74.50it/s]
365
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 461/851 [00:06<00:05, 72.92it/s]
366
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 469/851 [00:06<00:05, 70.51it/s]
367
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 477/851 [00:06<00:05, 65.80it/s]
368
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 484/851 [00:06<00:05, 65.68it/s]
369
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 492/851 [00:06<00:05, 69.27it/s]
370
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 501/851 [00:06<00:04, 73.42it/s]
371
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 509/851 [00:07<00:04, 72.53it/s]
372
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 517/851 [00:07<00:04, 74.04it/s]
373
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 525/851 [00:07<00:04, 69.50it/s]
374
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 533/851 [00:07<00:04, 70.76it/s]
375
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 541/851 [00:07<00:04, 72.75it/s]
376
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 549/851 [00:07<00:04, 70.77it/s]
377
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 557/851 [00:07<00:04, 72.93it/s]
378
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 566/851 [00:07<00:03, 75.96it/s]
379
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 574/851 [00:07<00:03, 75.68it/s]
380
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 582/851 [00:08<00:03, 73.54it/s]
381
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 590/851 [00:08<00:03, 70.20it/s]
382
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 598/851 [00:08<00:03, 71.00it/s]
383
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 606/851 [00:08<00:03, 69.47it/s]
384
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 613/851 [00:08<00:03, 67.88it/s]
385
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 620/851 [00:08<00:03, 68.06it/s]
386
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 627/851 [00:08<00:03, 67.97it/s]
387
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 635/851 [00:08<00:03, 68.41it/s]
388
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 642/851 [00:08<00:03, 63.33it/s]
389
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 650/851 [00:09<00:02, 67.25it/s]
390
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 658/851 [00:09<00:02, 69.51it/s]
391
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 666/851 [00:09<00:02, 70.79it/s]
392
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 674/851 [00:09<00:02, 70.05it/s]
393
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 682/851 [00:09<00:02, 71.60it/s]
394
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 691/851 [00:09<00:02, 74.53it/s]
395
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 699/851 [00:09<00:02, 74.50it/s]
396
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 708/851 [00:09<00:01, 76.84it/s]
397
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 716/851 [00:09<00:01, 74.99it/s]
398
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 724/851 [00:10<00:01, 75.33it/s]
399
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 732/851 [00:10<00:01, 76.41it/s]
400
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 740/851 [00:10<00:01, 76.60it/s]
401
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 748/851 [00:10<00:01, 76.14it/s]
402
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 756/851 [00:10<00:01, 75.65it/s]
403
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 764/851 [00:10<00:01, 76.38it/s]
404
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 772/851 [00:10<00:01, 71.78it/s]
405
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 780/851 [00:10<00:01, 69.90it/s]
406
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 788/851 [00:10<00:00, 70.05it/s]
407
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 796/851 [00:11<00:00, 70.92it/s]
408
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 804/851 [00:11<00:00, 73.14it/s]
409
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 812/851 [00:11<00:00, 71.35it/s]
410
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 820/851 [00:11<00:00, 72.32it/s]
411
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 828/851 [00:11<00:00, 73.57it/s]
412
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 836/851 [00:11<00:00, 73.25it/s]
413
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 844/851 [00:11<00:00, 70.06it/s]/usr/local/lib/python3.10/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
414
+ _warn_prf(average, modifier, msg_start, len(result))
415
+
416
+ ***** eval metrics *****
417
+ eval_accuracy = 0.0028
418
+ eval_f1 = 0.0007
419
+ eval_loss = 2.4031
420
+ eval_precision = 0.0004
421
+ eval_recall = 0.0386
422
+ eval_runtime = 0:00:16.79
423
+ eval_samples = 6807
424
+ eval_samples_per_second = 405.27
425
+ eval_steps_per_second = 50.666
426
+ 05/13/2024 14:00:25 - INFO - __main__ - *** Predict ***
427
+ [INFO|trainer.py:786] 2024-05-13 14:00:25,855 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
428
+ [INFO|trainer.py:3614] 2024-05-13 14:00:25,857 >> ***** Running Prediction *****
429
+ [INFO|trainer.py:3616] 2024-05-13 14:00:25,857 >> Num examples = 6807
430
+ [INFO|trainer.py:3619] 2024-05-13 14:00:25,857 >> Batch size = 8
431
+
432
  0%| | 0/851 [00:00<?, ?it/s]
433
  1%| | 10/851 [00:00<00:09, 91.44it/s]
434
  2%|▏ | 20/851 [00:00<00:10, 80.05it/s]
435
  3%|β–Ž | 29/851 [00:00<00:10, 76.91it/s]
436
  4%|▍ | 37/851 [00:00<00:10, 76.11it/s]
437
  5%|β–Œ | 45/851 [00:00<00:10, 76.81it/s]
438
  6%|β–Œ | 53/851 [00:00<00:10, 77.42it/s]
439
  7%|β–‹ | 62/851 [00:00<00:10, 78.22it/s]
440
  8%|β–Š | 70/851 [00:00<00:10, 73.17it/s]
441
  9%|β–‰ | 78/851 [00:01<00:10, 74.14it/s]
442
  10%|β–ˆ | 86/851 [00:01<00:10, 74.47it/s]
443
  11%|β–ˆ | 94/851 [00:01<00:10, 71.54it/s]
444
  12%|β–ˆβ– | 102/851 [00:01<00:10, 72.66it/s]
445
  13%|β–ˆβ–Ž | 110/851 [00:01<00:10, 71.47it/s]
446
  14%|β–ˆβ– | 118/851 [00:01<00:10, 72.13it/s]
447
  15%|β–ˆβ– | 126/851 [00:01<00:10, 69.31it/s]
448
  16%|β–ˆβ–Œ | 134/851 [00:01<00:10, 69.99it/s]
449
  17%|β–ˆβ–‹ | 142/851 [00:01<00:10, 70.34it/s]
450
  18%|β–ˆβ–Š | 150/851 [00:02<00:10, 69.10it/s]
451
  19%|β–ˆβ–Š | 159/851 [00:02<00:09, 73.31it/s]
452
  20%|β–ˆβ–‰ | 167/851 [00:02<00:09, 73.76it/s]
453
  21%|β–ˆβ–ˆ | 175/851 [00:02<00:09, 74.62it/s]
454
  22%|β–ˆβ–ˆβ– | 183/851 [00:02<00:08, 75.28it/s]
455
  22%|β–ˆβ–ˆβ– | 191/851 [00:02<00:08, 75.29it/s]
456
  24%|β–ˆβ–ˆβ–Ž | 200/851 [00:02<00:08, 77.19it/s]
457
  24%|β–ˆβ–ˆβ– | 208/851 [00:02<00:08, 74.09it/s]
458
  25%|β–ˆβ–ˆβ–Œ | 216/851 [00:02<00:08, 72.43it/s]
459
  26%|β–ˆβ–ˆβ–‹ | 224/851 [00:03<00:08, 72.43it/s]
460
  27%|β–ˆβ–ˆβ–‹ | 232/851 [00:03<00:08, 74.47it/s]
461
  28%|β–ˆβ–ˆβ–Š | 240/851 [00:03<00:08, 72.23it/s]
462
  29%|β–ˆβ–ˆβ–‰ | 248/851 [00:03<00:08, 70.30it/s]
463
  30%|β–ˆβ–ˆβ–ˆ | 257/851 [00:03<00:08, 74.22it/s]
464
  31%|β–ˆβ–ˆβ–ˆβ– | 266/851 [00:03<00:07, 76.38it/s]
465
  32%|β–ˆβ–ˆβ–ˆβ– | 274/851 [00:03<00:07, 73.69it/s]
466
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 283/851 [00:03<00:07, 75.70it/s]
467
  34%|β–ˆβ–ˆβ–ˆβ– | 291/851 [00:03<00:07, 74.53it/s]
468
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 299/851 [00:04<00:07, 75.95it/s]
469
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 308/851 [00:04<00:06, 77.72it/s]
470
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 316/851 [00:04<00:07, 73.39it/s]
471
  38%|β–ˆβ–ˆβ–ˆβ–Š | 324/851 [00:04<00:07, 74.25it/s]
472
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 332/851 [00:04<00:07, 73.40it/s]
473
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 340/851 [00:04<00:06, 74.69it/s]
474
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 348/851 [00:04<00:06, 74.45it/s]
475
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 356/851 [00:04<00:06, 71.61it/s]
476
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 364/851 [00:04<00:06, 71.59it/s]
477
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 372/851 [00:05<00:06, 71.39it/s]
478
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 380/851 [00:05<00:06, 68.82it/s]
479
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 388/851 [00:05<00:06, 71.34it/s]
480
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 396/851 [00:05<00:06, 71.74it/s]
481
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 404/851 [00:05<00:06, 67.34it/s]
482
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 411/851 [00:05<00:06, 67.07it/s]
483
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 419/851 [00:05<00:06, 70.05it/s]
484
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 427/851 [00:05<00:06, 68.22it/s]
485
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 435/851 [00:05<00:05, 71.05it/s]
486
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 443/851 [00:06<00:05, 73.07it/s]
487
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 451/851 [00:06<00:05, 73.76it/s]
488
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 459/851 [00:06<00:05, 74.30it/s]
489
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 467/851 [00:06<00:05, 69.53it/s]
490
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 475/851 [00:06<00:05, 64.31it/s]
491
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 482/851 [00:06<00:05, 65.00it/s]
492
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 489/851 [00:06<00:05, 66.01it/s]
493
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 498/851 [00:06<00:04, 70.87it/s]
494
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 506/851 [00:06<00:04, 72.82it/s]
495
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 514/851 [00:07<00:04, 72.54it/s]
496
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 522/851 [00:07<00:04, 69.41it/s]
497
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 530/851 [00:07<00:04, 69.19it/s]
498
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 538/851 [00:07<00:04, 71.41it/s]
499
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 546/851 [00:07<00:04, 72.09it/s]
500
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 554/851 [00:07<00:04, 70.07it/s]
501
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 562/851 [00:07<00:04, 72.16it/s]
502
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 570/851 [00:07<00:03, 73.41it/s]
503
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 578/851 [00:07<00:03, 71.89it/s]
504
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 586/851 [00:08<00:03, 70.00it/s]
505
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 594/851 [00:08<00:03, 70.61it/s]
506
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 602/851 [00:08<00:03, 71.04it/s]
507
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 610/851 [00:08<00:03, 70.41it/s]
508
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 618/851 [00:08<00:03, 66.31it/s]
509
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 626/851 [00:08<00:03, 66.58it/s]
510
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 633/851 [00:08<00:03, 67.17it/s]
511
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 640/851 [00:08<00:03, 67.05it/s]
512
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 647/851 [00:09<00:03, 65.37it/s]
513
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 655/851 [00:09<00:02, 68.38it/s]
514
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 663/851 [00:09<00:02, 69.14it/s]
515
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 671/851 [00:09<00:02, 69.12it/s]
516
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 679/851 [00:09<00:02, 69.97it/s]
517
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 687/851 [00:09<00:02, 70.94it/s]
518
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 695/851 [00:09<00:02, 72.48it/s]
519
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 703/851 [00:09<00:02, 73.26it/s]
520
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 711/851 [00:09<00:01, 74.87it/s]
521
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 719/851 [00:09<00:01, 73.29it/s]
522
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 727/851 [00:10<00:01, 74.62it/s]
523
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 735/851 [00:10<00:01, 75.28it/s]
524
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 743/851 [00:10<00:01, 74.64it/s]
525
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 751/851 [00:10<00:01, 73.98it/s]
526
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 759/851 [00:10<00:01, 74.89it/s]
527
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 767/851 [00:10<00:01, 70.39it/s]
528
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 775/851 [00:10<00:01, 69.17it/s]
529
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 782/851 [00:10<00:01, 68.09it/s]
530
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 790/851 [00:10<00:00, 69.33it/s]
531
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 798/851 [00:11<00:00, 70.49it/s]
532
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 806/851 [00:11<00:00, 72.99it/s]
533
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 814/851 [00:11<00:00, 70.07it/s]
534
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 822/851 [00:11<00:00, 71.14it/s]
535
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 830/851 [00:11<00:00, 71.12it/s]
536
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 838/851 [00:11<00:00, 71.03it/s]
537
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 846/851 [00:11<00:00, 66.90it/s]
538
+ [INFO|trainer.py:3305] 2024-05-13 14:00:42,250 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
539
+ [INFO|configuration_utils.py:471] 2024-05-13 14:00:42,251 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
540
+ [INFO|modeling_utils.py:2590] 2024-05-13 14:00:43,185 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
541
+ [INFO|tokenization_utils_base.py:2488] 2024-05-13 14:00:43,186 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
542
+ [INFO|tokenization_utils_base.py:2497] 2024-05-13 14:00:43,186 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
543
+ [INFO|modelcard.py:450] 2024-05-13 14:00:43,335 >> Dropping the following result as it does not have all the necessary fields:
544
+ {'task': {'name': 'Token Classification', 'type': 'token-classification'}, 'dataset': {'name': 'Rodrigo1771/multi-train-drugtemist-dev-ner', 'type': 'Rodrigo1771/multi-train-drugtemist-dev-ner', 'config': 'MultiTrainDrugTEMISTDevNER', 'split': 'validation', 'args': 'MultiTrainDrugTEMISTDevNER'}}
545
+ ***** predict metrics *****
546
+ predict_accuracy = 0.0028
547
+ predict_f1 = 0.0007
548
+ predict_loss = 2.4031
549
+ predict_precision = 0.0004
550
+ predict_recall = 0.0386
551
+ predict_runtime = 0:00:16.07
552
+ predict_samples_per_second = 423.545
553
+ predict_steps_per_second = 52.951
554
+
555
+
556
+
557
+
558
+
559
+
560
+
561
+
562
+
563
+
564
+
565
+
566
+
567
+
568
+
569
+
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18cce49dc172023921a8c234d2d643afeaa0b4f8f209fd2e1d76d267cbbe3c95
3
+ size 5048
vocab.json ADDED
The diff for this file is too large to render. See raw diff