arubenruben commited on
Commit
2c15bb2
1 Parent(s): d3b802b

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: mit
3
- base_model: PORTULAN/albertina-100m-portuguese-ptpt-encoder
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -9,22 +9,22 @@ metrics:
9
  - precision
10
  - recall
11
  model-index:
12
- - name: LVI_albertina-100m-portuguese-ptpt-encoder
13
  results: []
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
- # LVI_albertina-100m-portuguese-ptpt-encoder
20
 
21
- This model is a fine-tuned version of [PORTULAN/albertina-100m-portuguese-ptpt-encoder](https://huggingface.co/PORTULAN/albertina-100m-portuguese-ptpt-encoder) on the None dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.6932
24
- - Accuracy: 0.5
25
- - F1: 0.0
26
- - Precision: 0.0
27
- - Recall: 0.0
28
 
29
  ## Model description
30
 
@@ -43,7 +43,7 @@ More information needed
43
  ### Training hyperparameters
44
 
45
  The following hyperparameters were used during training:
46
- - learning_rate: 5e-05
47
  - train_batch_size: 16
48
  - eval_batch_size: 16
49
  - seed: 42
@@ -53,15 +53,9 @@ The following hyperparameters were used during training:
53
 
54
  ### Training results
55
 
56
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
57
- |:-------------:|:-----:|:-----:|:---------------:|:--------:|:------:|:---------:|:------:|
58
- | 0.5678 | 1.0 | 3217 | 0.6316 | 0.6653 | 0.5619 | 0.8128 | 0.4294 |
59
- | 0.6042 | 2.0 | 6434 | 0.6911 | 0.5 | 0.0 | 0.0 | 0.0 |
60
- | 0.6946 | 3.0 | 9651 | 0.6932 | 0.5 | 0.0 | 0.0 | 0.0 |
61
- | 0.694 | 4.0 | 12868 | 0.6932 | 0.5 | 0.6667 | 0.5 | 1.0 |
62
- | 0.6942 | 5.0 | 16085 | 0.6933 | 0.5 | 0.6667 | 0.5 | 1.0 |
63
- | 0.6936 | 6.0 | 19302 | 0.6937 | 0.5 | 0.6667 | 0.5 | 1.0 |
64
- | 0.6937 | 7.0 | 22519 | 0.6932 | 0.5 | 0.0 | 0.0 | 0.0 |
65
 
66
 
67
  ### Framework versions
 
1
  ---
2
  license: mit
3
+ base_model: neuralmind/bert-large-portuguese-cased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
9
  - precision
10
  - recall
11
  model-index:
12
+ - name: LVI_bert-large-portuguese-cased
13
  results: []
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
+ # LVI_bert-large-portuguese-cased
20
 
21
+ This model is a fine-tuned version of [neuralmind/bert-large-portuguese-cased](https://huggingface.co/neuralmind/bert-large-portuguese-cased) on the None dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.0755
24
+ - Accuracy: 0.9775
25
+ - F1: 0.9775
26
+ - Precision: 0.9758
27
+ - Recall: 0.9793
28
 
29
  ## Model description
30
 
 
43
  ### Training hyperparameters
44
 
45
  The following hyperparameters were used during training:
46
+ - learning_rate: 5e-06
47
  - train_batch_size: 16
48
  - eval_batch_size: 16
49
  - seed: 42
 
53
 
54
  ### Training results
55
 
56
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
57
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
58
+ | 0.1071 | 1.0 | 3217 | 0.0755 | 0.9775 | 0.9775 | 0.9758 | 0.9793 |
 
 
 
 
 
 
59
 
60
 
61
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5333189bc54454612d23d846c0995dfc4cb235496fc187f0d2f217687e6b99c0
3
  size 556799560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f00b138409021c800a8ff86021b5b35d3c99dc28d8be1e156d20c731306f28
3
  size 556799560
tmp-checkpoint-6434/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "neuralmind/bert-large-portuguese-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "PT-PT",
14
+ "1": "PT-BR"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 4096,
18
+ "label2id": {
19
+ "PT-BR": 1,
20
+ "PT-PT": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 24,
27
+ "output_past": true,
28
+ "pad_token_id": 0,
29
+ "pooler_fc_size": 768,
30
+ "pooler_num_attention_heads": 12,
31
+ "pooler_num_fc_layers": 3,
32
+ "pooler_size_per_head": 128,
33
+ "pooler_type": "first_token_transform",
34
+ "position_embedding_type": "absolute",
35
+ "problem_type": "single_label_classification",
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.38.2",
38
+ "type_vocab_size": 2,
39
+ "use_cache": true,
40
+ "vocab_size": 29794
41
+ }
tmp-checkpoint-6434/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:874ec343b9437083fe17a8f75e5256a152f29906afbe9a591a41afc20def4c8a
3
+ size 1337640872
tmp-checkpoint-6434/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6085c2e2b602d2ff46761b8cc3b0580e2715f72e47624913132b0afe11dffebe
3
+ size 2675516589
tmp-checkpoint-6434/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd5fe8d06a3bfa5e0111ae42f40e9072e65f3004a6c69b1447211ab7ff7fcd8
3
+ size 14244
tmp-checkpoint-6434/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79d818fbe78bd027946b5f8726af2230fba2536a3a219eff3b16cc710b1831ed
3
+ size 1064
tmp-checkpoint-6434/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tmp-checkpoint-6434/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tmp-checkpoint-6434/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[PAD]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100": {
13
+ "content": "[UNK]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "101": {
21
+ "content": "[CLS]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "102": {
29
+ "content": "[SEP]",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "103": {
37
+ "content": "[MASK]",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_basic_tokenize": true,
48
+ "do_lower_case": false,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
tmp-checkpoint-6434/trainer_state.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5745548592459963,
3
+ "best_model_checkpoint": "/home/ruben/PT-Pump-Up/package-client/src/pt_pump_up/benchmarking/output/checkpoint-3217",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6434,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 18.098752975463867,
14
+ "learning_rate": 4.9222878458190864e-05,
15
+ "loss": 0.393,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.31,
20
+ "grad_norm": 1.4246498346328735,
21
+ "learning_rate": 4.8445756916381726e-05,
22
+ "loss": 0.3577,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.47,
27
+ "grad_norm": 20.033092498779297,
28
+ "learning_rate": 4.766863537457259e-05,
29
+ "loss": 0.2749,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.62,
34
+ "grad_norm": 0.9361429214477539,
35
+ "learning_rate": 4.689151383276344e-05,
36
+ "loss": 0.2693,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.78,
41
+ "grad_norm": 2.208453893661499,
42
+ "learning_rate": 4.6114392290954305e-05,
43
+ "loss": 0.2839,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.93,
48
+ "grad_norm": 5.299256324768066,
49
+ "learning_rate": 4.5337270749145166e-05,
50
+ "loss": 0.3315,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 1.0,
55
+ "eval_accuracy": 0.6945,
56
+ "eval_f1": 0.5745548592459963,
57
+ "eval_loss": 0.5727517604827881,
58
+ "eval_precision": 0.9459547985587946,
59
+ "eval_recall": 0.4125714285714286,
60
+ "eval_runtime": 971.5986,
61
+ "eval_samples_per_second": 28.818,
62
+ "eval_steps_per_second": 1.801,
63
+ "step": 3217
64
+ },
65
+ {
66
+ "epoch": 1.09,
67
+ "grad_norm": 2.0671701431274414,
68
+ "learning_rate": 4.456014920733603e-05,
69
+ "loss": 0.5044,
70
+ "step": 3500
71
+ },
72
+ {
73
+ "epoch": 1.24,
74
+ "grad_norm": 2.1807188987731934,
75
+ "learning_rate": 4.378302766552689e-05,
76
+ "loss": 0.6689,
77
+ "step": 4000
78
+ },
79
+ {
80
+ "epoch": 1.4,
81
+ "grad_norm": 5.214261054992676,
82
+ "learning_rate": 4.300590612371775e-05,
83
+ "loss": 0.6711,
84
+ "step": 4500
85
+ },
86
+ {
87
+ "epoch": 1.55,
88
+ "grad_norm": 4.0813446044921875,
89
+ "learning_rate": 4.2228784581908613e-05,
90
+ "loss": 0.69,
91
+ "step": 5000
92
+ },
93
+ {
94
+ "epoch": 1.71,
95
+ "grad_norm": 14.689708709716797,
96
+ "learning_rate": 4.1451663040099475e-05,
97
+ "loss": 0.709,
98
+ "step": 5500
99
+ },
100
+ {
101
+ "epoch": 1.87,
102
+ "grad_norm": 3.6824076175689697,
103
+ "learning_rate": 4.067454149829034e-05,
104
+ "loss": 0.6821,
105
+ "step": 6000
106
+ },
107
+ {
108
+ "epoch": 2.0,
109
+ "eval_accuracy": 0.5,
110
+ "eval_f1": 0.0,
111
+ "eval_loss": 0.685297429561615,
112
+ "eval_precision": 0.0,
113
+ "eval_recall": 0.0,
114
+ "eval_runtime": 964.7135,
115
+ "eval_samples_per_second": 29.024,
116
+ "eval_steps_per_second": 1.814,
117
+ "step": 6434
118
+ }
119
+ ],
120
+ "logging_steps": 500,
121
+ "max_steps": 32170,
122
+ "num_input_tokens_seen": 0,
123
+ "num_train_epochs": 10,
124
+ "save_steps": 500,
125
+ "total_flos": 3.215241835190381e+16,
126
+ "train_batch_size": 16,
127
+ "trial_name": null,
128
+ "trial_params": null
129
+ }
tmp-checkpoint-6434/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2026ad5b81d5bd011f6117e2f1893b125e45c7f5d9f2735596c30361ea737dd
3
+ size 5048
tmp-checkpoint-6434/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04bd7c83bfa10fa1094349afa3a643b8aa1f8c5e29cc2f25cc54e8e78e657e13
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d19fad054636ef9accce9a4758d63dcdf2c7f666b2f98bcc3bb45720359ede5d
3
  size 5048