NicolaiSivesind commited on
Commit
38dc20a
1 Parent(s): 4b86bd4

Upload 11 files

Browse files
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NbAiLab/nb-bert-large",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 4096,
23
+ "label2id": {
24
+ "LABEL_0": 0,
25
+ "LABEL_1": 1,
26
+ "LABEL_2": 2,
27
+ "LABEL_3": 3,
28
+ "LABEL_4": 4,
29
+ "LABEL_5": 5,
30
+ "LABEL_6": 6
31
+ },
32
+ "layer_norm_eps": 1e-12,
33
+ "max_position_embeddings": 512,
34
+ "model_type": "bert",
35
+ "num_attention_heads": 16,
36
+ "num_hidden_layers": 24,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "single_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.40.0",
42
+ "type_vocab_size": 2,
43
+ "use_cache": true,
44
+ "vocab_size": 50000
45
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2364077fd4ca53d4a689c7f010523950e4f8b184caebf77203dd5991fb2ebdce
3
+ size 1420425204
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:084f35c82b56101e12303ac699d49a30789ad039323b9f6c5abf4f06f4f2f8e4
3
+ size 2841074157
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f78c04db25254fcb28b4b8f75b8844280da67041351e4b57fa36d0916fb95a3
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fae0b86254a3f75f212ad9defa917a452476f23140d38f4aae369e916295c09
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "501": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "502": {
12
+ "content": "[MASK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "503": {
20
+ "content": "[PAD]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "504": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "505": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": false,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
trainer_state.json ADDED
@@ -0,0 +1,841 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.390835579514825,
5
+ "eval_steps": 30,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08086253369272237,
13
+ "eval_accuracy": 0.34545454545454546,
14
+ "eval_f1": 0.08213880790411401,
15
+ "eval_loss": 1.7152690887451172,
16
+ "eval_precision": 0.12064459930313588,
17
+ "eval_recall": 0.14761904761904762,
18
+ "eval_runtime": 13.6508,
19
+ "eval_samples_per_second": 24.174,
20
+ "eval_steps_per_second": 3.077,
21
+ "step": 30
22
+ },
23
+ {
24
+ "epoch": 0.16172506738544473,
25
+ "eval_accuracy": 0.4393939393939394,
26
+ "eval_f1": 0.21690145794050794,
27
+ "eval_loss": 1.5937703847885132,
28
+ "eval_precision": 0.32583090861779385,
29
+ "eval_recall": 0.25099255283137484,
30
+ "eval_runtime": 13.1278,
31
+ "eval_samples_per_second": 25.137,
32
+ "eval_steps_per_second": 3.199,
33
+ "step": 60
34
+ },
35
+ {
36
+ "epoch": 0.24258760107816713,
37
+ "eval_accuracy": 0.38181818181818183,
38
+ "eval_f1": 0.13698374256214413,
39
+ "eval_loss": 1.5304551124572754,
40
+ "eval_precision": 0.23018433179723502,
41
+ "eval_recall": 0.17921977437427894,
42
+ "eval_runtime": 13.0828,
43
+ "eval_samples_per_second": 25.224,
44
+ "eval_steps_per_second": 3.21,
45
+ "step": 90
46
+ },
47
+ {
48
+ "epoch": 0.32345013477088946,
49
+ "eval_accuracy": 0.37575757575757573,
50
+ "eval_f1": 0.13802749957562382,
51
+ "eval_loss": 1.6096415519714355,
52
+ "eval_precision": 0.1890046674322801,
53
+ "eval_recall": 0.18045360027080612,
54
+ "eval_runtime": 13.3288,
55
+ "eval_samples_per_second": 24.758,
56
+ "eval_steps_per_second": 3.151,
57
+ "step": 120
58
+ },
59
+ {
60
+ "epoch": 0.40431266846361186,
61
+ "eval_accuracy": 0.45151515151515154,
62
+ "eval_f1": 0.23452986300450943,
63
+ "eval_loss": 1.4274822473526,
64
+ "eval_precision": 0.38950040524911433,
65
+ "eval_recall": 0.27641592488584005,
66
+ "eval_runtime": 13.4187,
67
+ "eval_samples_per_second": 24.593,
68
+ "eval_steps_per_second": 3.13,
69
+ "step": 150
70
+ },
71
+ {
72
+ "epoch": 0.48517520215633425,
73
+ "eval_accuracy": 0.5424242424242425,
74
+ "eval_f1": 0.327924405270561,
75
+ "eval_loss": 1.2386934757232666,
76
+ "eval_precision": 0.35548957287833166,
77
+ "eval_recall": 0.35121955898274004,
78
+ "eval_runtime": 14.6075,
79
+ "eval_samples_per_second": 22.591,
80
+ "eval_steps_per_second": 2.875,
81
+ "step": 180
82
+ },
83
+ {
84
+ "epoch": 0.5660377358490566,
85
+ "eval_accuracy": 0.41818181818181815,
86
+ "eval_f1": 0.2642022704662224,
87
+ "eval_loss": 1.7232695817947388,
88
+ "eval_precision": 0.440507406905074,
89
+ "eval_recall": 0.29508974215683664,
90
+ "eval_runtime": 14.2568,
91
+ "eval_samples_per_second": 23.147,
92
+ "eval_steps_per_second": 2.946,
93
+ "step": 210
94
+ },
95
+ {
96
+ "epoch": 0.6469002695417789,
97
+ "eval_accuracy": 0.5575757575757576,
98
+ "eval_f1": 0.36732006348386254,
99
+ "eval_loss": 1.1753942966461182,
100
+ "eval_precision": 0.43188775510204075,
101
+ "eval_recall": 0.36647921681366585,
102
+ "eval_runtime": 13.3212,
103
+ "eval_samples_per_second": 24.772,
104
+ "eval_steps_per_second": 3.153,
105
+ "step": 240
106
+ },
107
+ {
108
+ "epoch": 0.7277628032345014,
109
+ "eval_accuracy": 0.5393939393939394,
110
+ "eval_f1": 0.32937378728076405,
111
+ "eval_loss": 1.2770304679870605,
112
+ "eval_precision": 0.39731428747888,
113
+ "eval_recall": 0.32973768555006505,
114
+ "eval_runtime": 13.2685,
115
+ "eval_samples_per_second": 24.871,
116
+ "eval_steps_per_second": 3.165,
117
+ "step": 270
118
+ },
119
+ {
120
+ "epoch": 0.8086253369272237,
121
+ "eval_accuracy": 0.5606060606060606,
122
+ "eval_f1": 0.3439473243062972,
123
+ "eval_loss": 1.2049232721328735,
124
+ "eval_precision": 0.4835807771246264,
125
+ "eval_recall": 0.33995508230465793,
126
+ "eval_runtime": 15.2882,
127
+ "eval_samples_per_second": 21.585,
128
+ "eval_steps_per_second": 2.747,
129
+ "step": 300
130
+ },
131
+ {
132
+ "epoch": 0.889487870619946,
133
+ "eval_accuracy": 0.5666666666666667,
134
+ "eval_f1": 0.394947488354592,
135
+ "eval_loss": 1.108344554901123,
136
+ "eval_precision": 0.5166598073288214,
137
+ "eval_recall": 0.3978135018724203,
138
+ "eval_runtime": 14.5889,
139
+ "eval_samples_per_second": 22.62,
140
+ "eval_steps_per_second": 2.879,
141
+ "step": 330
142
+ },
143
+ {
144
+ "epoch": 0.9703504043126685,
145
+ "eval_accuracy": 0.6121212121212121,
146
+ "eval_f1": 0.428135738050604,
147
+ "eval_loss": 1.141391634941101,
148
+ "eval_precision": 0.6312669683257919,
149
+ "eval_recall": 0.40262976955895236,
150
+ "eval_runtime": 12.9679,
151
+ "eval_samples_per_second": 25.447,
152
+ "eval_steps_per_second": 3.239,
153
+ "step": 360
154
+ },
155
+ {
156
+ "epoch": 1.0512129380053907,
157
+ "eval_accuracy": 0.603030303030303,
158
+ "eval_f1": 0.4038251382972519,
159
+ "eval_loss": 1.116855263710022,
160
+ "eval_precision": 0.4923091038697975,
161
+ "eval_recall": 0.41548484912495504,
162
+ "eval_runtime": 13.6567,
163
+ "eval_samples_per_second": 24.164,
164
+ "eval_steps_per_second": 3.075,
165
+ "step": 390
166
+ },
167
+ {
168
+ "epoch": 1.1320754716981132,
169
+ "eval_accuracy": 0.5757575757575758,
170
+ "eval_f1": 0.41737552463332295,
171
+ "eval_loss": 1.13466477394104,
172
+ "eval_precision": 0.6076030412164866,
173
+ "eval_recall": 0.4208784549569585,
174
+ "eval_runtime": 12.847,
175
+ "eval_samples_per_second": 25.687,
176
+ "eval_steps_per_second": 3.269,
177
+ "step": 420
178
+ },
179
+ {
180
+ "epoch": 1.2129380053908356,
181
+ "eval_accuracy": 0.603030303030303,
182
+ "eval_f1": 0.43365687052323004,
183
+ "eval_loss": 1.1254369020462036,
184
+ "eval_precision": 0.5694631128061721,
185
+ "eval_recall": 0.41092089239472196,
186
+ "eval_runtime": 12.8187,
187
+ "eval_samples_per_second": 25.744,
188
+ "eval_steps_per_second": 3.276,
189
+ "step": 450
190
+ },
191
+ {
192
+ "epoch": 1.2938005390835579,
193
+ "eval_accuracy": 0.5696969696969697,
194
+ "eval_f1": 0.3965291918438572,
195
+ "eval_loss": 1.2160786390304565,
196
+ "eval_precision": 0.4828996768066082,
197
+ "eval_recall": 0.38449281792865353,
198
+ "eval_runtime": 14.7332,
199
+ "eval_samples_per_second": 22.398,
200
+ "eval_steps_per_second": 2.851,
201
+ "step": 480
202
+ },
203
+ {
204
+ "epoch": 1.3477088948787062,
205
+ "grad_norm": 18.155202865600586,
206
+ "learning_rate": 3.876909254267745e-05,
207
+ "loss": 1.3924,
208
+ "step": 500
209
+ },
210
+ {
211
+ "epoch": 1.3746630727762803,
212
+ "eval_accuracy": 0.5424242424242425,
213
+ "eval_f1": 0.4319260406224048,
214
+ "eval_loss": 1.1475260257720947,
215
+ "eval_precision": 0.45103525254505755,
216
+ "eval_recall": 0.4340482151274285,
217
+ "eval_runtime": 32.076,
218
+ "eval_samples_per_second": 10.288,
219
+ "eval_steps_per_second": 1.309,
220
+ "step": 510
221
+ },
222
+ {
223
+ "epoch": 1.4555256064690028,
224
+ "eval_accuracy": 0.5878787878787879,
225
+ "eval_f1": 0.3939948328709428,
226
+ "eval_loss": 1.1194807291030884,
227
+ "eval_precision": 0.608180480837931,
228
+ "eval_recall": 0.392683155449795,
229
+ "eval_runtime": 110.8563,
230
+ "eval_samples_per_second": 2.977,
231
+ "eval_steps_per_second": 0.379,
232
+ "step": 540
233
+ },
234
+ {
235
+ "epoch": 1.536388140161725,
236
+ "eval_accuracy": 0.5818181818181818,
237
+ "eval_f1": 0.4490235853265899,
238
+ "eval_loss": 1.1963447332382202,
239
+ "eval_precision": 0.5116256986573146,
240
+ "eval_recall": 0.4377877348363712,
241
+ "eval_runtime": 64.5493,
242
+ "eval_samples_per_second": 5.112,
243
+ "eval_steps_per_second": 0.651,
244
+ "step": 570
245
+ },
246
+ {
247
+ "epoch": 1.6172506738544474,
248
+ "eval_accuracy": 0.6060606060606061,
249
+ "eval_f1": 0.44688978798681084,
250
+ "eval_loss": 1.1052302122116089,
251
+ "eval_precision": 0.5367127769479213,
252
+ "eval_recall": 0.4296217856765177,
253
+ "eval_runtime": 71.5496,
254
+ "eval_samples_per_second": 4.612,
255
+ "eval_steps_per_second": 0.587,
256
+ "step": 600
257
+ },
258
+ {
259
+ "epoch": 1.6981132075471699,
260
+ "eval_accuracy": 0.5848484848484848,
261
+ "eval_f1": 0.46957050415643353,
262
+ "eval_loss": 1.108028769493103,
263
+ "eval_precision": 0.572241929381981,
264
+ "eval_recall": 0.4537304361700488,
265
+ "eval_runtime": 12.7244,
266
+ "eval_samples_per_second": 25.935,
267
+ "eval_steps_per_second": 3.301,
268
+ "step": 630
269
+ },
270
+ {
271
+ "epoch": 1.778975741239892,
272
+ "eval_accuracy": 0.6212121212121212,
273
+ "eval_f1": 0.47748925036798145,
274
+ "eval_loss": 1.033095359802246,
275
+ "eval_precision": 0.5650004027150538,
276
+ "eval_recall": 0.4613483358977623,
277
+ "eval_runtime": 13.0381,
278
+ "eval_samples_per_second": 25.31,
279
+ "eval_steps_per_second": 3.221,
280
+ "step": 660
281
+ },
282
+ {
283
+ "epoch": 1.8598382749326146,
284
+ "eval_accuracy": 0.6242424242424243,
285
+ "eval_f1": 0.4612218625033871,
286
+ "eval_loss": 1.0287344455718994,
287
+ "eval_precision": 0.5787994888332474,
288
+ "eval_recall": 0.4401355279945231,
289
+ "eval_runtime": 14.0033,
290
+ "eval_samples_per_second": 23.566,
291
+ "eval_steps_per_second": 2.999,
292
+ "step": 690
293
+ },
294
+ {
295
+ "epoch": 1.940700808625337,
296
+ "eval_accuracy": 0.6,
297
+ "eval_f1": 0.43355058251543566,
298
+ "eval_loss": 1.0855339765548706,
299
+ "eval_precision": 0.5482738467345629,
300
+ "eval_recall": 0.407901742703353,
301
+ "eval_runtime": 14.8086,
302
+ "eval_samples_per_second": 22.284,
303
+ "eval_steps_per_second": 2.836,
304
+ "step": 720
305
+ },
306
+ {
307
+ "epoch": 2.0215633423180592,
308
+ "eval_accuracy": 0.6060606060606061,
309
+ "eval_f1": 0.4278903186584704,
310
+ "eval_loss": 1.0721008777618408,
311
+ "eval_precision": 0.6261473906210748,
312
+ "eval_recall": 0.400766872593428,
313
+ "eval_runtime": 13.33,
314
+ "eval_samples_per_second": 24.756,
315
+ "eval_steps_per_second": 3.151,
316
+ "step": 750
317
+ },
318
+ {
319
+ "epoch": 2.1024258760107815,
320
+ "eval_accuracy": 0.5818181818181818,
321
+ "eval_f1": 0.46279487414189074,
322
+ "eval_loss": 1.0543911457061768,
323
+ "eval_precision": 0.4927064392103887,
324
+ "eval_recall": 0.4551381887414431,
325
+ "eval_runtime": 13.3061,
326
+ "eval_samples_per_second": 24.801,
327
+ "eval_steps_per_second": 3.156,
328
+ "step": 780
329
+ },
330
+ {
331
+ "epoch": 2.183288409703504,
332
+ "eval_accuracy": 0.6,
333
+ "eval_f1": 0.44787598241263715,
334
+ "eval_loss": 1.1306463479995728,
335
+ "eval_precision": 0.5029518040278441,
336
+ "eval_recall": 0.42696476597535643,
337
+ "eval_runtime": 14.7754,
338
+ "eval_samples_per_second": 22.334,
339
+ "eval_steps_per_second": 2.843,
340
+ "step": 810
341
+ },
342
+ {
343
+ "epoch": 2.2641509433962264,
344
+ "eval_accuracy": 0.6151515151515151,
345
+ "eval_f1": 0.47831287112215076,
346
+ "eval_loss": 1.1025209426879883,
347
+ "eval_precision": 0.5683183097103888,
348
+ "eval_recall": 0.45416730293429747,
349
+ "eval_runtime": 14.712,
350
+ "eval_samples_per_second": 22.431,
351
+ "eval_steps_per_second": 2.855,
352
+ "step": 840
353
+ },
354
+ {
355
+ "epoch": 2.3450134770889486,
356
+ "eval_accuracy": 0.5848484848484848,
357
+ "eval_f1": 0.4612687851809403,
358
+ "eval_loss": 1.1565966606140137,
359
+ "eval_precision": 0.5073562417803803,
360
+ "eval_recall": 0.4593010520365005,
361
+ "eval_runtime": 13.517,
362
+ "eval_samples_per_second": 24.414,
363
+ "eval_steps_per_second": 3.107,
364
+ "step": 870
365
+ },
366
+ {
367
+ "epoch": 2.4258760107816713,
368
+ "eval_accuracy": 0.5636363636363636,
369
+ "eval_f1": 0.45972084178362305,
370
+ "eval_loss": 1.2057921886444092,
371
+ "eval_precision": 0.5695289983467323,
372
+ "eval_recall": 0.461413466501444,
373
+ "eval_runtime": 13.0432,
374
+ "eval_samples_per_second": 25.301,
375
+ "eval_steps_per_second": 3.22,
376
+ "step": 900
377
+ },
378
+ {
379
+ "epoch": 2.5067385444743935,
380
+ "eval_accuracy": 0.6090909090909091,
381
+ "eval_f1": 0.4628685133833875,
382
+ "eval_loss": 1.1381313800811768,
383
+ "eval_precision": 0.5632985257985258,
384
+ "eval_recall": 0.4583315282026837,
385
+ "eval_runtime": 14.1243,
386
+ "eval_samples_per_second": 23.364,
387
+ "eval_steps_per_second": 2.974,
388
+ "step": 930
389
+ },
390
+ {
391
+ "epoch": 2.5876010781671157,
392
+ "eval_accuracy": 0.6242424242424243,
393
+ "eval_f1": 0.4615624143566702,
394
+ "eval_loss": 1.0724554061889648,
395
+ "eval_precision": 0.5741660836493663,
396
+ "eval_recall": 0.4438527652883962,
397
+ "eval_runtime": 14.1621,
398
+ "eval_samples_per_second": 23.302,
399
+ "eval_steps_per_second": 2.966,
400
+ "step": 960
401
+ },
402
+ {
403
+ "epoch": 2.6684636118598384,
404
+ "eval_accuracy": 0.6060606060606061,
405
+ "eval_f1": 0.47087755462465813,
406
+ "eval_loss": 1.135140299797058,
407
+ "eval_precision": 0.5209651599354117,
408
+ "eval_recall": 0.45427047745523014,
409
+ "eval_runtime": 14.0682,
410
+ "eval_samples_per_second": 23.457,
411
+ "eval_steps_per_second": 2.985,
412
+ "step": 990
413
+ },
414
+ {
415
+ "epoch": 2.6954177897574123,
416
+ "grad_norm": 7.412640571594238,
417
+ "learning_rate": 2.75381850853549e-05,
418
+ "loss": 0.9741,
419
+ "step": 1000
420
+ },
421
+ {
422
+ "epoch": 2.7493261455525606,
423
+ "eval_accuracy": 0.6242424242424243,
424
+ "eval_f1": 0.4549752385442883,
425
+ "eval_loss": 0.9898973703384399,
426
+ "eval_precision": 0.5431213833871716,
427
+ "eval_recall": 0.44127615017259786,
428
+ "eval_runtime": 113.2331,
429
+ "eval_samples_per_second": 2.914,
430
+ "eval_steps_per_second": 0.371,
431
+ "step": 1020
432
+ },
433
+ {
434
+ "epoch": 2.830188679245283,
435
+ "eval_accuracy": 0.6212121212121212,
436
+ "eval_f1": 0.477029569791836,
437
+ "eval_loss": 1.1227794885635376,
438
+ "eval_precision": 0.5355122892068706,
439
+ "eval_recall": 0.4702582406289971,
440
+ "eval_runtime": 22.5487,
441
+ "eval_samples_per_second": 14.635,
442
+ "eval_steps_per_second": 1.863,
443
+ "step": 1050
444
+ },
445
+ {
446
+ "epoch": 2.9110512129380055,
447
+ "eval_accuracy": 0.6272727272727273,
448
+ "eval_f1": 0.4661588293248819,
449
+ "eval_loss": 1.0796787738800049,
450
+ "eval_precision": 0.5698680297271846,
451
+ "eval_recall": 0.4402755664195842,
452
+ "eval_runtime": 142.0116,
453
+ "eval_samples_per_second": 2.324,
454
+ "eval_steps_per_second": 0.296,
455
+ "step": 1080
456
+ },
457
+ {
458
+ "epoch": 2.9919137466307277,
459
+ "eval_accuracy": 0.6424242424242425,
460
+ "eval_f1": 0.49676860579479715,
461
+ "eval_loss": 1.0576601028442383,
462
+ "eval_precision": 0.5889560125125319,
463
+ "eval_recall": 0.48000300606800994,
464
+ "eval_runtime": 25.0835,
465
+ "eval_samples_per_second": 13.156,
466
+ "eval_steps_per_second": 1.674,
467
+ "step": 1110
468
+ },
469
+ {
470
+ "epoch": 3.07277628032345,
471
+ "eval_accuracy": 0.6333333333333333,
472
+ "eval_f1": 0.5076311071541079,
473
+ "eval_loss": 1.1107242107391357,
474
+ "eval_precision": 0.5412034459392385,
475
+ "eval_recall": 0.4934487547962528,
476
+ "eval_runtime": 13.978,
477
+ "eval_samples_per_second": 23.608,
478
+ "eval_steps_per_second": 3.005,
479
+ "step": 1140
480
+ },
481
+ {
482
+ "epoch": 3.1536388140161726,
483
+ "eval_accuracy": 0.6515151515151515,
484
+ "eval_f1": 0.5186595057636214,
485
+ "eval_loss": 1.1433833837509155,
486
+ "eval_precision": 0.5322955893403752,
487
+ "eval_recall": 0.5121777726730862,
488
+ "eval_runtime": 14.8138,
489
+ "eval_samples_per_second": 22.277,
490
+ "eval_steps_per_second": 2.835,
491
+ "step": 1170
492
+ },
493
+ {
494
+ "epoch": 3.234501347708895,
495
+ "eval_accuracy": 0.6303030303030303,
496
+ "eval_f1": 0.4818021563155342,
497
+ "eval_loss": 1.2040390968322754,
498
+ "eval_precision": 0.5245503034019318,
499
+ "eval_recall": 0.4790353988658817,
500
+ "eval_runtime": 14.5347,
501
+ "eval_samples_per_second": 22.704,
502
+ "eval_steps_per_second": 2.89,
503
+ "step": 1200
504
+ },
505
+ {
506
+ "epoch": 3.315363881401617,
507
+ "eval_accuracy": 0.6303030303030303,
508
+ "eval_f1": 0.5005894502956657,
509
+ "eval_loss": 1.106711983680725,
510
+ "eval_precision": 0.5347839031818471,
511
+ "eval_recall": 0.49606350872380334,
512
+ "eval_runtime": 14.5181,
513
+ "eval_samples_per_second": 22.73,
514
+ "eval_steps_per_second": 2.893,
515
+ "step": 1230
516
+ },
517
+ {
518
+ "epoch": 3.3962264150943398,
519
+ "eval_accuracy": 0.6363636363636364,
520
+ "eval_f1": 0.49230222929692463,
521
+ "eval_loss": 1.100297451019287,
522
+ "eval_precision": 0.5613737992397406,
523
+ "eval_recall": 0.48222645824949406,
524
+ "eval_runtime": 14.6402,
525
+ "eval_samples_per_second": 22.541,
526
+ "eval_steps_per_second": 2.869,
527
+ "step": 1260
528
+ },
529
+ {
530
+ "epoch": 3.477088948787062,
531
+ "eval_accuracy": 0.603030303030303,
532
+ "eval_f1": 0.48380416653626124,
533
+ "eval_loss": 1.1756219863891602,
534
+ "eval_precision": 0.5033237551313431,
535
+ "eval_recall": 0.4908017943866942,
536
+ "eval_runtime": 13.454,
537
+ "eval_samples_per_second": 24.528,
538
+ "eval_steps_per_second": 3.122,
539
+ "step": 1290
540
+ },
541
+ {
542
+ "epoch": 3.557951482479784,
543
+ "eval_accuracy": 0.6060606060606061,
544
+ "eval_f1": 0.4764702057860691,
545
+ "eval_loss": 1.1003729104995728,
546
+ "eval_precision": 0.5288839860268432,
547
+ "eval_recall": 0.4590755558938861,
548
+ "eval_runtime": 12.5627,
549
+ "eval_samples_per_second": 26.268,
550
+ "eval_steps_per_second": 3.343,
551
+ "step": 1320
552
+ },
553
+ {
554
+ "epoch": 3.638814016172507,
555
+ "eval_accuracy": 0.593939393939394,
556
+ "eval_f1": 0.487375493221817,
557
+ "eval_loss": 1.086908221244812,
558
+ "eval_precision": 0.48992444445192695,
559
+ "eval_recall": 0.5073454405336776,
560
+ "eval_runtime": 12.6831,
561
+ "eval_samples_per_second": 26.019,
562
+ "eval_steps_per_second": 3.311,
563
+ "step": 1350
564
+ },
565
+ {
566
+ "epoch": 3.719676549865229,
567
+ "eval_accuracy": 0.5818181818181818,
568
+ "eval_f1": 0.4671306646522057,
569
+ "eval_loss": 1.1851921081542969,
570
+ "eval_precision": 0.5193033472658716,
571
+ "eval_recall": 0.46147135451955557,
572
+ "eval_runtime": 13.4674,
573
+ "eval_samples_per_second": 24.504,
574
+ "eval_steps_per_second": 3.119,
575
+ "step": 1380
576
+ },
577
+ {
578
+ "epoch": 3.8005390835579513,
579
+ "eval_accuracy": 0.6272727272727273,
580
+ "eval_f1": 0.4738897906354585,
581
+ "eval_loss": 1.1233046054840088,
582
+ "eval_precision": 0.5308286308286309,
583
+ "eval_recall": 0.45352857445191025,
584
+ "eval_runtime": 14.4652,
585
+ "eval_samples_per_second": 22.813,
586
+ "eval_steps_per_second": 2.904,
587
+ "step": 1410
588
+ },
589
+ {
590
+ "epoch": 3.881401617250674,
591
+ "eval_accuracy": 0.6333333333333333,
592
+ "eval_f1": 0.49797095491412685,
593
+ "eval_loss": 1.0570012331008911,
594
+ "eval_precision": 0.5228267459278104,
595
+ "eval_recall": 0.48949827535886625,
596
+ "eval_runtime": 13.7881,
597
+ "eval_samples_per_second": 23.934,
598
+ "eval_steps_per_second": 3.046,
599
+ "step": 1440
600
+ },
601
+ {
602
+ "epoch": 3.9622641509433962,
603
+ "eval_accuracy": 0.5696969696969697,
604
+ "eval_f1": 0.436851400958813,
605
+ "eval_loss": 1.196516513824463,
606
+ "eval_precision": 0.4607919084994466,
607
+ "eval_recall": 0.44320607800419104,
608
+ "eval_runtime": 13.9502,
609
+ "eval_samples_per_second": 23.656,
610
+ "eval_steps_per_second": 3.011,
611
+ "step": 1470
612
+ },
613
+ {
614
+ "epoch": 4.0431266846361185,
615
+ "grad_norm": 4.396773815155029,
616
+ "learning_rate": 1.6307277628032348e-05,
617
+ "loss": 0.6945,
618
+ "step": 1500
619
+ },
620
+ {
621
+ "epoch": 4.0431266846361185,
622
+ "eval_accuracy": 0.6242424242424243,
623
+ "eval_f1": 0.48252518710343445,
624
+ "eval_loss": 1.0697778463363647,
625
+ "eval_precision": 0.529925883396837,
626
+ "eval_recall": 0.4736448771051681,
627
+ "eval_runtime": 14.005,
628
+ "eval_samples_per_second": 23.563,
629
+ "eval_steps_per_second": 2.999,
630
+ "step": 1500
631
+ },
632
+ {
633
+ "epoch": 4.123989218328841,
634
+ "eval_accuracy": 0.6242424242424243,
635
+ "eval_f1": 0.49720491179937104,
636
+ "eval_loss": 1.2521579265594482,
637
+ "eval_precision": 0.5386641378880501,
638
+ "eval_recall": 0.49817853705778303,
639
+ "eval_runtime": 115.3902,
640
+ "eval_samples_per_second": 2.86,
641
+ "eval_steps_per_second": 0.364,
642
+ "step": 1530
643
+ },
644
+ {
645
+ "epoch": 4.204851752021563,
646
+ "eval_accuracy": 0.6181818181818182,
647
+ "eval_f1": 0.47697868890738926,
648
+ "eval_loss": 1.4063215255737305,
649
+ "eval_precision": 0.4929803350172045,
650
+ "eval_recall": 0.4728290050243222,
651
+ "eval_runtime": 101.5928,
652
+ "eval_samples_per_second": 3.248,
653
+ "eval_steps_per_second": 0.413,
654
+ "step": 1560
655
+ },
656
+ {
657
+ "epoch": 4.285714285714286,
658
+ "eval_accuracy": 0.6212121212121212,
659
+ "eval_f1": 0.4993387370778744,
660
+ "eval_loss": 1.349918007850647,
661
+ "eval_precision": 0.49880570409982167,
662
+ "eval_recall": 0.5059772713055806,
663
+ "eval_runtime": 115.6893,
664
+ "eval_samples_per_second": 2.852,
665
+ "eval_steps_per_second": 0.363,
666
+ "step": 1590
667
+ },
668
+ {
669
+ "epoch": 4.366576819407008,
670
+ "eval_accuracy": 0.6,
671
+ "eval_f1": 0.46437393231058216,
672
+ "eval_loss": 1.340740442276001,
673
+ "eval_precision": 0.47078319079488345,
674
+ "eval_recall": 0.46173072420332834,
675
+ "eval_runtime": 13.0095,
676
+ "eval_samples_per_second": 25.366,
677
+ "eval_steps_per_second": 3.228,
678
+ "step": 1620
679
+ },
680
+ {
681
+ "epoch": 4.44743935309973,
682
+ "eval_accuracy": 0.6393939393939394,
683
+ "eval_f1": 0.4956975851278201,
684
+ "eval_loss": 1.4181333780288696,
685
+ "eval_precision": 0.5323631437686738,
686
+ "eval_recall": 0.48370483391830105,
687
+ "eval_runtime": 15.6022,
688
+ "eval_samples_per_second": 21.151,
689
+ "eval_steps_per_second": 2.692,
690
+ "step": 1650
691
+ },
692
+ {
693
+ "epoch": 4.528301886792453,
694
+ "eval_accuracy": 0.6212121212121212,
695
+ "eval_f1": 0.48744288293160476,
696
+ "eval_loss": 1.4648175239562988,
697
+ "eval_precision": 0.5288562135045842,
698
+ "eval_recall": 0.484239463132841,
699
+ "eval_runtime": 14.8896,
700
+ "eval_samples_per_second": 22.163,
701
+ "eval_steps_per_second": 2.821,
702
+ "step": 1680
703
+ },
704
+ {
705
+ "epoch": 4.609164420485175,
706
+ "eval_accuracy": 0.6363636363636364,
707
+ "eval_f1": 0.49874561310412674,
708
+ "eval_loss": 1.3795430660247803,
709
+ "eval_precision": 0.515957965289801,
710
+ "eval_recall": 0.49555231275739653,
711
+ "eval_runtime": 15.2135,
712
+ "eval_samples_per_second": 21.691,
713
+ "eval_steps_per_second": 2.761,
714
+ "step": 1710
715
+ },
716
+ {
717
+ "epoch": 4.690026954177897,
718
+ "eval_accuracy": 0.6545454545454545,
719
+ "eval_f1": 0.5072945393065025,
720
+ "eval_loss": 1.255421757698059,
721
+ "eval_precision": 0.527637085699966,
722
+ "eval_recall": 0.5017196060314337,
723
+ "eval_runtime": 13.3432,
724
+ "eval_samples_per_second": 24.732,
725
+ "eval_steps_per_second": 3.148,
726
+ "step": 1740
727
+ },
728
+ {
729
+ "epoch": 4.77088948787062,
730
+ "eval_accuracy": 0.6181818181818182,
731
+ "eval_f1": 0.47364322391678987,
732
+ "eval_loss": 1.2744801044464111,
733
+ "eval_precision": 0.5261603756546867,
734
+ "eval_recall": 0.4762025977768268,
735
+ "eval_runtime": 13.3766,
736
+ "eval_samples_per_second": 24.67,
737
+ "eval_steps_per_second": 3.14,
738
+ "step": 1770
739
+ },
740
+ {
741
+ "epoch": 4.8517520215633425,
742
+ "eval_accuracy": 0.6272727272727273,
743
+ "eval_f1": 0.5006190896494088,
744
+ "eval_loss": 1.2532024383544922,
745
+ "eval_precision": 0.5347134093222614,
746
+ "eval_recall": 0.4860070789020448,
747
+ "eval_runtime": 13.4318,
748
+ "eval_samples_per_second": 24.569,
749
+ "eval_steps_per_second": 3.127,
750
+ "step": 1800
751
+ },
752
+ {
753
+ "epoch": 4.932614555256064,
754
+ "eval_accuracy": 0.6060606060606061,
755
+ "eval_f1": 0.47951387194587414,
756
+ "eval_loss": 1.2477487325668335,
757
+ "eval_precision": 0.4841348163818714,
758
+ "eval_recall": 0.4813308565204252,
759
+ "eval_runtime": 13.4442,
760
+ "eval_samples_per_second": 24.546,
761
+ "eval_steps_per_second": 3.124,
762
+ "step": 1830
763
+ },
764
+ {
765
+ "epoch": 5.013477088948787,
766
+ "eval_accuracy": 0.6393939393939394,
767
+ "eval_f1": 0.4989217275259948,
768
+ "eval_loss": 1.2193477153778076,
769
+ "eval_precision": 0.5258493064306811,
770
+ "eval_recall": 0.4869117013024005,
771
+ "eval_runtime": 13.4387,
772
+ "eval_samples_per_second": 24.556,
773
+ "eval_steps_per_second": 3.125,
774
+ "step": 1860
775
+ },
776
+ {
777
+ "epoch": 5.09433962264151,
778
+ "eval_accuracy": 0.6424242424242425,
779
+ "eval_f1": 0.5115200267534169,
780
+ "eval_loss": 1.381670355796814,
781
+ "eval_precision": 0.5336989009157052,
782
+ "eval_recall": 0.5001792056559704,
783
+ "eval_runtime": 14.4622,
784
+ "eval_samples_per_second": 22.818,
785
+ "eval_steps_per_second": 2.904,
786
+ "step": 1890
787
+ },
788
+ {
789
+ "epoch": 5.175202156334231,
790
+ "eval_accuracy": 0.6272727272727273,
791
+ "eval_f1": 0.49956294294308473,
792
+ "eval_loss": 1.4055507183074951,
793
+ "eval_precision": 0.510424264019138,
794
+ "eval_recall": 0.49196977915696183,
795
+ "eval_runtime": 13.4719,
796
+ "eval_samples_per_second": 24.496,
797
+ "eval_steps_per_second": 3.118,
798
+ "step": 1920
799
+ },
800
+ {
801
+ "epoch": 5.256064690026954,
802
+ "eval_accuracy": 0.6303030303030303,
803
+ "eval_f1": 0.49657096716171206,
804
+ "eval_loss": 1.4433475732803345,
805
+ "eval_precision": 0.5023020702533887,
806
+ "eval_recall": 0.49618528944682805,
807
+ "eval_runtime": 12.5628,
808
+ "eval_samples_per_second": 26.268,
809
+ "eval_steps_per_second": 3.343,
810
+ "step": 1950
811
+ },
812
+ {
813
+ "epoch": 5.336927223719677,
814
+ "eval_accuracy": 0.6212121212121212,
815
+ "eval_f1": 0.4846771824827281,
816
+ "eval_loss": 1.4632567167282104,
817
+ "eval_precision": 0.49600215736518566,
818
+ "eval_recall": 0.4812536923450428,
819
+ "eval_runtime": 12.7703,
820
+ "eval_samples_per_second": 25.841,
821
+ "eval_steps_per_second": 3.289,
822
+ "step": 1980
823
+ },
824
+ {
825
+ "epoch": 5.390835579514825,
826
+ "grad_norm": 10.319731712341309,
827
+ "learning_rate": 5.0763701707097935e-06,
828
+ "loss": 0.4025,
829
+ "step": 2000
830
+ }
831
+ ],
832
+ "logging_steps": 500,
833
+ "max_steps": 2226,
834
+ "num_input_tokens_seen": 0,
835
+ "num_train_epochs": 6,
836
+ "save_steps": 500,
837
+ "total_flos": 9271286495496432.0,
838
+ "train_batch_size": 8,
839
+ "trial_name": null,
840
+ "trial_params": null
841
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802688dde848bb24ec73ba5b6be61944f4eb3d665acc269a57afb8710abb6b87
3
+ size 4984
vocab.txt ADDED
The diff for this file is too large to render. See raw diff