pszemraj commited on
Commit
64f3204
1 Parent(s): f8e147e

Model save

Browse files
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: albert-xxlarge-v2
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - f1
8
+ model-index:
9
+ - name: albert-xxlarge-v2-goodreads-bookgenres-Description_cls-5e
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # albert-xxlarge-v2-goodreads-bookgenres-Description_cls-5e
17
+
18
+ This model is a fine-tuned version of [albert-xxlarge-v2](https://huggingface.co/albert-xxlarge-v2) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.1905
21
+ - F1: 0.7058
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 2e-05
41
+ - train_batch_size: 16
42
+ - eval_batch_size: 16
43
+ - seed: 42
44
+ - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 64
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: linear
48
+ - num_epochs: 5.0
49
+
50
+ ### Training results
51
+
52
+ | Training Loss | Epoch | Step | Validation Loss | F1 |
53
+ |:-------------:|:-----:|:----:|:---------------:|:------:|
54
+ | 0.2903 | 0.99 | 123 | 0.2686 | 0.4011 |
55
+ | 0.2171 | 2.0 | 247 | 0.2168 | 0.6493 |
56
+ | 0.1879 | 3.0 | 371 | 0.1990 | 0.6612 |
57
+ | 0.1476 | 4.0 | 495 | 0.1879 | 0.7060 |
58
+ | 0.1279 | 4.97 | 615 | 0.1905 | 0.7058 |
59
+
60
+
61
+ ### Framework versions
62
+
63
+ - Transformers 4.33.3
64
+ - Pytorch 2.2.0.dev20231001+cu121
65
+ - Datasets 2.14.5
66
+ - Tokenizers 0.13.3
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_f1": 0.46636259977194994,
4
+ "eval_loss": 0.25748351216316223,
5
+ "eval_runtime": 1.8285,
6
+ "eval_samples": 989,
7
+ "eval_samples_per_second": 540.873,
8
+ "eval_steps_per_second": 8.75,
9
+ "train_loss": 0.3167446336438579,
10
+ "train_runtime": 225.8975,
11
+ "train_samples": 7914,
12
+ "train_samples_per_second": 175.168,
13
+ "train_steps_per_second": 1.372
14
+ }
config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert-xxlarge-v2",
3
+ "architectures": [
4
+ "AlbertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "finetuning_task": "text-classification",
13
+ "gap_size": 0,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0,
16
+ "hidden_size": 4096,
17
+ "id2label": {
18
+ "0": "History & Politics",
19
+ "1": "Health & Medicine",
20
+ "2": "Mystery & Thriller",
21
+ "3": "Arts & Design",
22
+ "4": "Self-Help & Wellness",
23
+ "5": "Sports & Recreation",
24
+ "6": "Non-Fiction",
25
+ "7": "Science Fiction & Fantasy",
26
+ "8": "Countries & Geography",
27
+ "9": "Other",
28
+ "10": "Nature & Environment",
29
+ "11": "Business & Finance",
30
+ "12": "Romance",
31
+ "13": "Philosophy & Religion",
32
+ "14": "Literature & Fiction",
33
+ "15": "Science & Technology",
34
+ "16": "Children & Young Adult",
35
+ "17": "Food & Cooking"
36
+ },
37
+ "initializer_range": 0.02,
38
+ "inner_group_num": 1,
39
+ "intermediate_size": 16384,
40
+ "label2id": {
41
+ "Arts & Design": 3,
42
+ "Business & Finance": 11,
43
+ "Children & Young Adult": 16,
44
+ "Countries & Geography": 8,
45
+ "Food & Cooking": 17,
46
+ "Health & Medicine": 1,
47
+ "History & Politics": 0,
48
+ "Literature & Fiction": 14,
49
+ "Mystery & Thriller": 2,
50
+ "Nature & Environment": 10,
51
+ "Non-Fiction": 6,
52
+ "Other": 9,
53
+ "Philosophy & Religion": 13,
54
+ "Romance": 12,
55
+ "Science & Technology": 15,
56
+ "Science Fiction & Fantasy": 7,
57
+ "Self-Help & Wellness": 4,
58
+ "Sports & Recreation": 5
59
+ },
60
+ "layer_norm_eps": 1e-12,
61
+ "layers_to_keep": [],
62
+ "max_position_embeddings": 512,
63
+ "model_type": "albert",
64
+ "net_structure_type": 0,
65
+ "num_attention_heads": 64,
66
+ "num_hidden_groups": 1,
67
+ "num_hidden_layers": 12,
68
+ "num_memory_blocks": 0,
69
+ "pad_token_id": 0,
70
+ "position_embedding_type": "absolute",
71
+ "problem_type": "multi_label_classification",
72
+ "torch_dtype": "float32",
73
+ "transformers_version": "4.33.3",
74
+ "type_vocab_size": 2,
75
+ "vocab_size": 30000
76
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_f1": 0.46636259977194994,
4
+ "eval_loss": 0.25748351216316223,
5
+ "eval_runtime": 1.8285,
6
+ "eval_samples": 989,
7
+ "eval_samples_per_second": 540.873,
8
+ "eval_steps_per_second": 8.75
9
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4872acdc1be4953bdedd514e63fb9e8cc84c9a52f0ab86850986bfffa62f3d
3
+ size 890687039
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fefb02b667a6c5c2fe27602d28e5fb3428f66ab89c7d6f388e7c8d44a02d0336
3
+ size 760289
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": true,
6
+ "eos_token": "[SEP]",
7
+ "keep_accents": false,
8
+ "mask_token": {
9
+ "__type": "AddedToken",
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "model_max_length": 512,
17
+ "pad_token": "<pad>",
18
+ "remove_space": true,
19
+ "sep_token": "[SEP]",
20
+ "tokenizer_class": "AlbertTokenizer",
21
+ "unk_token": "<unk>"
22
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.3167446336438579,
4
+ "train_runtime": 225.8975,
5
+ "train_samples": 7914,
6
+ "train_samples_per_second": 175.168,
7
+ "train_steps_per_second": 1.372
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 310,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "learning_rate": 1.935483870967742e-05,
14
+ "loss": 0.6525,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.32,
19
+ "learning_rate": 1.870967741935484e-05,
20
+ "loss": 0.5089,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.48,
25
+ "learning_rate": 1.806451612903226e-05,
26
+ "loss": 0.422,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.65,
31
+ "learning_rate": 1.741935483870968e-05,
32
+ "loss": 0.3803,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.81,
37
+ "learning_rate": 1.6774193548387098e-05,
38
+ "loss": 0.3615,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.97,
43
+ "learning_rate": 1.6129032258064517e-05,
44
+ "loss": 0.3453,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 1.0,
49
+ "eval_f1": 0.2621971457019582,
50
+ "eval_loss": 0.3179258704185486,
51
+ "eval_runtime": 1.9933,
52
+ "eval_samples_per_second": 496.167,
53
+ "eval_steps_per_second": 8.027,
54
+ "step": 62
55
+ },
56
+ {
57
+ "epoch": 1.13,
58
+ "learning_rate": 1.5483870967741936e-05,
59
+ "loss": 0.3319,
60
+ "step": 70
61
+ },
62
+ {
63
+ "epoch": 1.29,
64
+ "learning_rate": 1.4838709677419357e-05,
65
+ "loss": 0.3196,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 1.45,
70
+ "learning_rate": 1.4193548387096776e-05,
71
+ "loss": 0.3168,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 1.61,
76
+ "learning_rate": 1.3548387096774194e-05,
77
+ "loss": 0.3119,
78
+ "step": 100
79
+ },
80
+ {
81
+ "epoch": 1.77,
82
+ "learning_rate": 1.2903225806451613e-05,
83
+ "loss": 0.3049,
84
+ "step": 110
85
+ },
86
+ {
87
+ "epoch": 1.94,
88
+ "learning_rate": 1.2258064516129034e-05,
89
+ "loss": 0.3017,
90
+ "step": 120
91
+ },
92
+ {
93
+ "epoch": 2.0,
94
+ "eval_f1": 0.37271619975639464,
95
+ "eval_loss": 0.2839711308479309,
96
+ "eval_runtime": 1.8261,
97
+ "eval_samples_per_second": 541.593,
98
+ "eval_steps_per_second": 8.762,
99
+ "step": 124
100
+ },
101
+ {
102
+ "epoch": 2.1,
103
+ "learning_rate": 1.1612903225806453e-05,
104
+ "loss": 0.2931,
105
+ "step": 130
106
+ },
107
+ {
108
+ "epoch": 2.26,
109
+ "learning_rate": 1.096774193548387e-05,
110
+ "loss": 0.2924,
111
+ "step": 140
112
+ },
113
+ {
114
+ "epoch": 2.42,
115
+ "learning_rate": 1.0322580645161291e-05,
116
+ "loss": 0.2911,
117
+ "step": 150
118
+ },
119
+ {
120
+ "epoch": 2.58,
121
+ "learning_rate": 9.67741935483871e-06,
122
+ "loss": 0.2857,
123
+ "step": 160
124
+ },
125
+ {
126
+ "epoch": 2.74,
127
+ "learning_rate": 9.03225806451613e-06,
128
+ "loss": 0.2855,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 2.9,
133
+ "learning_rate": 8.387096774193549e-06,
134
+ "loss": 0.2828,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 3.0,
139
+ "eval_f1": 0.4075178997613365,
140
+ "eval_loss": 0.27112114429473877,
141
+ "eval_runtime": 1.8327,
142
+ "eval_samples_per_second": 539.642,
143
+ "eval_steps_per_second": 8.73,
144
+ "step": 186
145
+ },
146
+ {
147
+ "epoch": 3.06,
148
+ "learning_rate": 7.741935483870968e-06,
149
+ "loss": 0.2874,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 3.23,
154
+ "learning_rate": 7.096774193548388e-06,
155
+ "loss": 0.2774,
156
+ "step": 200
157
+ },
158
+ {
159
+ "epoch": 3.39,
160
+ "learning_rate": 6.451612903225806e-06,
161
+ "loss": 0.2729,
162
+ "step": 210
163
+ },
164
+ {
165
+ "epoch": 3.55,
166
+ "learning_rate": 5.806451612903226e-06,
167
+ "loss": 0.2784,
168
+ "step": 220
169
+ },
170
+ {
171
+ "epoch": 3.71,
172
+ "learning_rate": 5.161290322580646e-06,
173
+ "loss": 0.2726,
174
+ "step": 230
175
+ },
176
+ {
177
+ "epoch": 3.87,
178
+ "learning_rate": 4.516129032258065e-06,
179
+ "loss": 0.2723,
180
+ "step": 240
181
+ },
182
+ {
183
+ "epoch": 4.0,
184
+ "eval_f1": 0.4506226469736461,
185
+ "eval_loss": 0.26035064458847046,
186
+ "eval_runtime": 1.8326,
187
+ "eval_samples_per_second": 539.658,
188
+ "eval_steps_per_second": 8.731,
189
+ "step": 248
190
+ },
191
+ {
192
+ "epoch": 4.03,
193
+ "learning_rate": 3.870967741935484e-06,
194
+ "loss": 0.2688,
195
+ "step": 250
196
+ },
197
+ {
198
+ "epoch": 4.19,
199
+ "learning_rate": 3.225806451612903e-06,
200
+ "loss": 0.268,
201
+ "step": 260
202
+ },
203
+ {
204
+ "epoch": 4.35,
205
+ "learning_rate": 2.580645161290323e-06,
206
+ "loss": 0.266,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 4.52,
211
+ "learning_rate": 1.935483870967742e-06,
212
+ "loss": 0.2657,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 4.68,
217
+ "learning_rate": 1.2903225806451614e-06,
218
+ "loss": 0.2687,
219
+ "step": 290
220
+ },
221
+ {
222
+ "epoch": 4.84,
223
+ "learning_rate": 6.451612903225807e-07,
224
+ "loss": 0.2679,
225
+ "step": 300
226
+ },
227
+ {
228
+ "epoch": 5.0,
229
+ "learning_rate": 0.0,
230
+ "loss": 0.2653,
231
+ "step": 310
232
+ },
233
+ {
234
+ "epoch": 5.0,
235
+ "eval_f1": 0.46636259977194994,
236
+ "eval_loss": 0.25748351216316223,
237
+ "eval_runtime": 1.8325,
238
+ "eval_samples_per_second": 539.71,
239
+ "eval_steps_per_second": 8.731,
240
+ "step": 310
241
+ },
242
+ {
243
+ "epoch": 5.0,
244
+ "step": 310,
245
+ "total_flos": 5243230627246080.0,
246
+ "train_loss": 0.3167446336438579,
247
+ "train_runtime": 225.8975,
248
+ "train_samples_per_second": 175.168,
249
+ "train_steps_per_second": 1.372
250
+ }
251
+ ],
252
+ "logging_steps": 10,
253
+ "max_steps": 310,
254
+ "num_train_epochs": 5,
255
+ "save_steps": 500,
256
+ "total_flos": 5243230627246080.0,
257
+ "trial_name": null,
258
+ "trial_params": null
259
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6799f73d7789bea8ccf5a66eb0bed01d8621989e3edb84ce6232726b939c4fcb
3
+ size 4536
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff