arubenruben commited on
Commit
9bc95c7
1 Parent(s): 11bf538

Training in progress, epoch 1

Browse files
README.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: neuralmind/bert-large-portuguese-cased
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - harem
8
+ metrics:
9
+ - precision
10
+ - recall
11
+ - f1
12
+ - accuracy
13
+ model-index:
14
+ - name: NER_harem_bert-large-portuguese-cased
15
+ results:
16
+ - task:
17
+ name: Token Classification
18
+ type: token-classification
19
+ dataset:
20
+ name: harem
21
+ type: harem
22
+ config: default
23
+ split: test
24
+ args: default
25
+ metrics:
26
+ - name: Precision
27
+ type: precision
28
+ value: 0.7077353867693384
29
+ - name: Recall
30
+ type: recall
31
+ value: 0.7553231228987672
32
+ - name: F1
33
+ type: f1
34
+ value: 0.7307553306830503
35
+ - name: Accuracy
36
+ type: accuracy
37
+ value: 0.9551379448220711
38
+ ---
39
+
40
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
41
+ should probably proofread and complete it, then remove this comment. -->
42
+
43
+ # NER_harem_bert-large-portuguese-cased
44
+
45
+ This model is a fine-tuned version of [neuralmind/bert-large-portuguese-cased](https://huggingface.co/neuralmind/bert-large-portuguese-cased) on the harem dataset.
46
+ It achieves the following results on the evaluation set:
47
+ - Loss: 0.2487
48
+ - Precision: 0.7077
49
+ - Recall: 0.7553
50
+ - F1: 0.7308
51
+ - Accuracy: 0.9551
52
+
53
+ ## Model description
54
+
55
+ More information needed
56
+
57
+ ## Intended uses & limitations
58
+
59
+ More information needed
60
+
61
+ ## Training and evaluation data
62
+
63
+ More information needed
64
+
65
+ ## Training procedure
66
+
67
+ ### Training hyperparameters
68
+
69
+ The following hyperparameters were used during training:
70
+ - learning_rate: 3e-05
71
+ - train_batch_size: 8
72
+ - eval_batch_size: 8
73
+ - seed: 42
74
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
75
+ - lr_scheduler_type: linear
76
+ - num_epochs: 300
77
+
78
+ ### Training results
79
+
80
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
81
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
82
+ | No log | 1.0 | 16 | 0.6334 | 0.0163 | 0.0078 | 0.0106 | 0.8468 |
83
+ | No log | 2.0 | 32 | 0.4537 | 0.2614 | 0.3112 | 0.2841 | 0.8826 |
84
+ | No log | 3.0 | 48 | 0.3117 | 0.5262 | 0.5671 | 0.5458 | 0.9231 |
85
+ | No log | 4.0 | 64 | 0.2421 | 0.5852 | 0.6631 | 0.6217 | 0.9385 |
86
+ | No log | 5.0 | 80 | 0.2099 | 0.5950 | 0.6855 | 0.6370 | 0.9479 |
87
+ | No log | 6.0 | 96 | 0.2153 | 0.6810 | 0.7464 | 0.7122 | 0.9551 |
88
+ | No log | 7.0 | 112 | 0.2270 | 0.6894 | 0.7198 | 0.7043 | 0.9546 |
89
+ | No log | 8.0 | 128 | 0.2213 | 0.6918 | 0.7437 | 0.7168 | 0.9554 |
90
+ | No log | 9.0 | 144 | 0.2299 | 0.7021 | 0.7564 | 0.7283 | 0.9545 |
91
+ | No log | 10.0 | 160 | 0.2256 | 0.7002 | 0.7591 | 0.7284 | 0.9562 |
92
+ | No log | 11.0 | 176 | 0.2169 | 0.7100 | 0.7736 | 0.7404 | 0.9568 |
93
+ | No log | 12.0 | 192 | 0.2266 | 0.6981 | 0.7740 | 0.7341 | 0.9571 |
94
+ | No log | 13.0 | 208 | 0.2322 | 0.7093 | 0.7620 | 0.7347 | 0.9570 |
95
+ | No log | 14.0 | 224 | 0.2487 | 0.7077 | 0.7553 | 0.7308 | 0.9551 |
96
+
97
+
98
+ ### Framework versions
99
+
100
+ - Transformers 4.38.2
101
+ - Pytorch 2.2.1+cu121
102
+ - Datasets 2.18.0
103
+ - Tokenizers 0.15.2
config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "PORTULAN/albertina-100m-portuguese-ptpt-encoder",
3
+ "architectures": [
4
+ "DebertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "O",
12
+ "1": "B-PESSOA",
13
+ "2": "I-PESSOA",
14
+ "3": "B-ORGANIZACAO",
15
+ "4": "I-ORGANIZACAO",
16
+ "5": "B-LOCAL",
17
+ "6": "I-LOCAL",
18
+ "7": "B-TEMPO",
19
+ "8": "I-TEMPO",
20
+ "9": "B-VALOR",
21
+ "10": "I-VALOR",
22
+ "11": "B-ABSTRACCAO",
23
+ "12": "I-ABSTRACCAO",
24
+ "13": "B-ACONTECIMENTO",
25
+ "14": "I-ACONTECIMENTO",
26
+ "15": "B-COISA",
27
+ "16": "I-COISA",
28
+ "17": "B-OBRA",
29
+ "18": "I-OBRA",
30
+ "19": "B-OUTRO",
31
+ "20": "I-OUTRO"
32
+ },
33
+ "initializer_range": 0.02,
34
+ "intermediate_size": 3072,
35
+ "label2id": {
36
+ "B-ABSTRACCAO": 11,
37
+ "B-ACONTECIMENTO": 13,
38
+ "B-COISA": 15,
39
+ "B-LOCAL": 5,
40
+ "B-OBRA": 17,
41
+ "B-ORGANIZACAO": 3,
42
+ "B-OUTRO": 19,
43
+ "B-PESSOA": 1,
44
+ "B-TEMPO": 7,
45
+ "B-VALOR": 9,
46
+ "I-ABSTRACCAO": 12,
47
+ "I-ACONTECIMENTO": 14,
48
+ "I-COISA": 16,
49
+ "I-LOCAL": 6,
50
+ "I-OBRA": 18,
51
+ "I-ORGANIZACAO": 4,
52
+ "I-OUTRO": 20,
53
+ "I-PESSOA": 2,
54
+ "I-TEMPO": 8,
55
+ "I-VALOR": 10,
56
+ "O": 0
57
+ },
58
+ "layer_norm_eps": 1e-07,
59
+ "max_position_embeddings": 512,
60
+ "max_relative_positions": -1,
61
+ "model_type": "deberta",
62
+ "num_attention_heads": 12,
63
+ "num_hidden_layers": 12,
64
+ "pad_token_id": 0,
65
+ "pooler_dropout": 0,
66
+ "pooler_hidden_act": "gelu",
67
+ "pooler_hidden_size": 768,
68
+ "pos_att_type": [
69
+ "c2p",
70
+ "p2c"
71
+ ],
72
+ "position_biased_input": false,
73
+ "relative_attention": true,
74
+ "torch_dtype": "float32",
75
+ "transformers_version": "4.38.2",
76
+ "type_vocab_size": 0,
77
+ "vocab_size": 50265
78
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69aff6dd40f2af49766291a469076fff8342b3a245c71b24bf2ae7dbe0baad6c
3
+ size 554495468
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[SEP]",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": true,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "[PAD]",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "[CLS]",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "[SEP]",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "[UNK]",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "50264": {
38
+ "content": "[MASK]",
39
+ "lstrip": true,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "bos_token": "[CLS]",
47
+ "clean_up_tokenization_spaces": true,
48
+ "cls_token": "[CLS]",
49
+ "do_lower_case": false,
50
+ "eos_token": "[SEP]",
51
+ "errors": "replace",
52
+ "mask_token": "[MASK]",
53
+ "model_max_length": 512,
54
+ "pad_token": "[PAD]",
55
+ "sep_token": "[SEP]",
56
+ "tokenizer_class": "DebertaTokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "gpt2"
59
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e3cad1c2b4e99dece405c7b633d28012e66454a990462c0dc7b65b576ea3c0
3
+ size 4984
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff