josecannete commited on
Commit
04d8633
1 Parent(s): d0e8cb9

adding model finetuned on MLDoc

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9589999914169312,
4
+ "eval_loss": 0.20911003649234772,
5
+ "eval_runtime": 24.6119,
6
+ "eval_samples": 1000,
7
+ "eval_samples_per_second": 40.631,
8
+ "eval_steps_per_second": 5.079,
9
+ "train_loss": 0.1292072490857808,
10
+ "train_runtime": 3156.6951,
11
+ "train_samples": 9458,
12
+ "train_samples_per_second": 8.989,
13
+ "train_steps_per_second": 0.562
14
+ }
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CenIA/albert_xlarge_spanish",
3
+ "architectures": [
4
+ "AlbertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 2048,
16
+ "id2label": {
17
+ "0": "LABEL_0",
18
+ "1": "LABEL_1",
19
+ "2": "LABEL_2",
20
+ "3": "LABEL_3"
21
+ },
22
+ "initializer_range": 0.01,
23
+ "inner_group_num": 1,
24
+ "intermediate_size": 8192,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1,
28
+ "LABEL_2": 2,
29
+ "LABEL_3": 3
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "albert",
34
+ "net_structure_type": 0,
35
+ "num_attention_heads": 32,
36
+ "num_hidden_groups": 1,
37
+ "num_hidden_layers": 24,
38
+ "num_memory_blocks": 0,
39
+ "pad_token_id": 0,
40
+ "position_embedding_type": "absolute",
41
+ "problem_type": "single_label_classification",
42
+ "torch_dtype": "float32",
43
+ "transformers_version": "4.15.0",
44
+ "type_vocab_size": 2,
45
+ "vocab_size": 31000
46
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9589999914169312,
4
+ "eval_loss": 0.20911003649234772,
5
+ "eval_runtime": 24.6119,
6
+ "eval_samples": 1000,
7
+ "eval_samples_per_second": 40.631,
8
+ "eval_steps_per_second": 5.079
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e735a49adc1ab5e49253adf89a595d5b764fa8a5797d5f424662730a4fa74a9
3
+ size 235459281
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "CenIA/albert_xlarge_spanish", "tokenizer_class": "AlbertTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.1292072490857808,
4
+ "train_runtime": 3156.6951,
5
+ "train_samples": 9458,
6
+ "train_samples_per_second": 8.989,
7
+ "train_steps_per_second": 0.562
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.20911003649234772,
3
+ "best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_xlarge/epochs_3_bs_16_lr_5e-6/checkpoint-1500",
4
+ "epoch": 2.9991546914623837,
5
+ "global_step": 1773,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.17,
12
+ "eval_accuracy": 0.2770000100135803,
13
+ "eval_loss": 1.3528416156768799,
14
+ "eval_runtime": 24.2204,
15
+ "eval_samples_per_second": 41.287,
16
+ "eval_steps_per_second": 5.161,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.34,
21
+ "eval_accuracy": 0.47999998927116394,
22
+ "eval_loss": 1.0430142879486084,
23
+ "eval_runtime": 23.7736,
24
+ "eval_samples_per_second": 42.063,
25
+ "eval_steps_per_second": 5.258,
26
+ "step": 200
27
+ },
28
+ {
29
+ "epoch": 0.51,
30
+ "eval_accuracy": 0.5640000104904175,
31
+ "eval_loss": 1.0979323387145996,
32
+ "eval_runtime": 23.5496,
33
+ "eval_samples_per_second": 42.464,
34
+ "eval_steps_per_second": 5.308,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 0.68,
39
+ "eval_accuracy": 0.5720000267028809,
40
+ "eval_loss": 0.9388349652290344,
41
+ "eval_runtime": 25.4729,
42
+ "eval_samples_per_second": 39.257,
43
+ "eval_steps_per_second": 4.907,
44
+ "step": 400
45
+ },
46
+ {
47
+ "epoch": 0.85,
48
+ "learning_rate": 3.601240834743373e-06,
49
+ "loss": 0.2676,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 0.85,
54
+ "eval_accuracy": 0.7160000205039978,
55
+ "eval_loss": 0.6925698518753052,
56
+ "eval_runtime": 24.8675,
57
+ "eval_samples_per_second": 40.213,
58
+ "eval_steps_per_second": 5.027,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 1.02,
63
+ "eval_accuracy": 0.8560000061988831,
64
+ "eval_loss": 0.673073947429657,
65
+ "eval_runtime": 25.8013,
66
+ "eval_samples_per_second": 38.758,
67
+ "eval_steps_per_second": 4.845,
68
+ "step": 600
69
+ },
70
+ {
71
+ "epoch": 1.18,
72
+ "eval_accuracy": 0.9369999766349792,
73
+ "eval_loss": 0.3746216893196106,
74
+ "eval_runtime": 25.6232,
75
+ "eval_samples_per_second": 39.027,
76
+ "eval_steps_per_second": 4.878,
77
+ "step": 700
78
+ },
79
+ {
80
+ "epoch": 1.35,
81
+ "eval_accuracy": 0.9350000023841858,
82
+ "eval_loss": 0.35705798864364624,
83
+ "eval_runtime": 25.7852,
84
+ "eval_samples_per_second": 38.782,
85
+ "eval_steps_per_second": 4.848,
86
+ "step": 800
87
+ },
88
+ {
89
+ "epoch": 1.52,
90
+ "eval_accuracy": 0.9409999847412109,
91
+ "eval_loss": 0.27689608931541443,
92
+ "eval_runtime": 24.8438,
93
+ "eval_samples_per_second": 40.252,
94
+ "eval_steps_per_second": 5.031,
95
+ "step": 900
96
+ },
97
+ {
98
+ "epoch": 1.69,
99
+ "learning_rate": 2.1912013536379022e-06,
100
+ "loss": 0.1019,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 1.69,
105
+ "eval_accuracy": 0.9350000023841858,
106
+ "eval_loss": 0.291538268327713,
107
+ "eval_runtime": 26.3153,
108
+ "eval_samples_per_second": 38.001,
109
+ "eval_steps_per_second": 4.75,
110
+ "step": 1000
111
+ },
112
+ {
113
+ "epoch": 1.86,
114
+ "eval_accuracy": 0.9509999752044678,
115
+ "eval_loss": 0.21956732869148254,
116
+ "eval_runtime": 24.9343,
117
+ "eval_samples_per_second": 40.105,
118
+ "eval_steps_per_second": 5.013,
119
+ "step": 1100
120
+ },
121
+ {
122
+ "epoch": 2.03,
123
+ "eval_accuracy": 0.9520000219345093,
124
+ "eval_loss": 0.2259828746318817,
125
+ "eval_runtime": 25.7253,
126
+ "eval_samples_per_second": 38.872,
127
+ "eval_steps_per_second": 4.859,
128
+ "step": 1200
129
+ },
130
+ {
131
+ "epoch": 2.2,
132
+ "eval_accuracy": 0.9509999752044678,
133
+ "eval_loss": 0.256782591342926,
134
+ "eval_runtime": 25.7431,
135
+ "eval_samples_per_second": 38.845,
136
+ "eval_steps_per_second": 4.856,
137
+ "step": 1300
138
+ },
139
+ {
140
+ "epoch": 2.37,
141
+ "eval_accuracy": 0.9549999833106995,
142
+ "eval_loss": 0.2290709912776947,
143
+ "eval_runtime": 24.8624,
144
+ "eval_samples_per_second": 40.221,
145
+ "eval_steps_per_second": 5.028,
146
+ "step": 1400
147
+ },
148
+ {
149
+ "epoch": 2.54,
150
+ "learning_rate": 7.81161872532431e-07,
151
+ "loss": 0.0593,
152
+ "step": 1500
153
+ },
154
+ {
155
+ "epoch": 2.54,
156
+ "eval_accuracy": 0.9589999914169312,
157
+ "eval_loss": 0.20911003649234772,
158
+ "eval_runtime": 25.438,
159
+ "eval_samples_per_second": 39.311,
160
+ "eval_steps_per_second": 4.914,
161
+ "step": 1500
162
+ },
163
+ {
164
+ "epoch": 2.71,
165
+ "eval_accuracy": 0.9490000009536743,
166
+ "eval_loss": 0.2575547695159912,
167
+ "eval_runtime": 25.2822,
168
+ "eval_samples_per_second": 39.554,
169
+ "eval_steps_per_second": 4.944,
170
+ "step": 1600
171
+ },
172
+ {
173
+ "epoch": 2.88,
174
+ "eval_accuracy": 0.9509999752044678,
175
+ "eval_loss": 0.23298123478889465,
176
+ "eval_runtime": 25.3064,
177
+ "eval_samples_per_second": 39.516,
178
+ "eval_steps_per_second": 4.939,
179
+ "step": 1700
180
+ },
181
+ {
182
+ "epoch": 3.0,
183
+ "step": 1773,
184
+ "total_flos": 3375408735295488.0,
185
+ "train_loss": 0.1292072490857808,
186
+ "train_runtime": 3156.6951,
187
+ "train_samples_per_second": 8.989,
188
+ "train_steps_per_second": 0.562
189
+ }
190
+ ],
191
+ "max_steps": 1773,
192
+ "num_train_epochs": 3,
193
+ "total_flos": 3375408735295488.0,
194
+ "trial_name": null,
195
+ "trial_params": null
196
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953c7a5d3ce4d8d3ecb032712f0f30c74b466a219b2e6abc755f878a789a3a67
3
+ size 2991