iceman2434 commited on
Commit
4b38d89
1 Parent(s): 82de32c

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.05,
3
+ "eval_accuracy": 0.9518293307243376,
4
+ "eval_f1": 0.8323994177665034,
5
+ "eval_loss": 0.14232856035232544,
6
+ "eval_precision": 0.856675409701988,
7
+ "eval_recall": 0.8188730649190578,
8
+ "eval_runtime": 836.2664,
9
+ "eval_samples_per_second": 24.016,
10
+ "eval_steps_per_second": 3.003,
11
+ "train_loss": 0.26377957344055175,
12
+ "train_runtime": 2810.1261,
13
+ "train_samples_per_second": 2.847,
14
+ "train_steps_per_second": 0.356
15
+ }
config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "xlm-roberta-base",
3
+ "architectures": [
4
+ "XLMRobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "ADJ",
15
+ "1": "ADP",
16
+ "2": "ADV",
17
+ "3": "AUX",
18
+ "4": "CCONJ",
19
+ "5": "DET",
20
+ "6": "INTJ",
21
+ "7": "NOUN",
22
+ "8": "NUM",
23
+ "9": "PART",
24
+ "10": "PRON",
25
+ "11": "PROPN",
26
+ "12": "PUNCT",
27
+ "13": "SCONJ",
28
+ "14": "SYM",
29
+ "15": "VERB",
30
+ "16": "X"
31
+ },
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 3072,
34
+ "label2id": {
35
+ "ADJ": 0,
36
+ "ADP": 1,
37
+ "ADV": 2,
38
+ "AUX": 3,
39
+ "CCONJ": 4,
40
+ "DET": 5,
41
+ "INTJ": 6,
42
+ "NOUN": 7,
43
+ "NUM": 8,
44
+ "PART": 9,
45
+ "PRON": 10,
46
+ "PROPN": 11,
47
+ "PUNCT": 12,
48
+ "SCONJ": 13,
49
+ "SYM": 14,
50
+ "VERB": 15,
51
+ "X": 16
52
+ },
53
+ "layer_norm_eps": 1e-05,
54
+ "max_position_embeddings": 514,
55
+ "model_type": "xlm-roberta",
56
+ "num_attention_heads": 12,
57
+ "num_hidden_layers": 12,
58
+ "output_past": true,
59
+ "pad_token_id": 1,
60
+ "position_embedding_type": "absolute",
61
+ "torch_dtype": "float32",
62
+ "transformers_version": "4.38.2",
63
+ "type_vocab_size": 1,
64
+ "use_cache": true,
65
+ "vocab_size": 250002
66
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.05,
3
+ "eval_accuracy": 0.9518293307243376,
4
+ "eval_f1": 0.8323994177665034,
5
+ "eval_loss": 0.14232856035232544,
6
+ "eval_precision": 0.856675409701988,
7
+ "eval_recall": 0.8188730649190578,
8
+ "eval_runtime": 836.2664,
9
+ "eval_samples_per_second": 24.016,
10
+ "eval_steps_per_second": 3.003
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d210e279f3a4d5fd9b31638741c1365168714982fec8ce2b92953b441ea2309e
3
+ size 1109888564
predictions/test_ft_udpos213-tl.results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"f1": 77.71939484058456}
predictions/test_ft_udpos213-tl.tsv ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PUNCT PUNCT
2
+ PROPN NOUN
3
+ ADV ADV
4
+ NOUN ADV
5
+ PART AUX
6
+ PUNCT PUNCT
7
+ PUNCT PUNCT
8
+ VERB VERB
9
+ PRON SCONJ
10
+ ADP ADP
11
+ NOUN NOUN
12
+ ADP PART
13
+ PROPN PROPN
14
+ CCONJ CCONJ
15
+ VERB VERB
16
+ ADP ADP
17
+ NOUN NOUN
18
+ PRON PRON
19
+ PUNCT PUNCT
20
+ NOUN VERB
21
+ SCONJ ADV
22
+ ADP ADP
23
+ NOUN NOUN
24
+ ADP DET
25
+ NOUN NOUN
26
+ PRON PRON
27
+ PUNCT PUNCT
28
+ SCONJ NOUN
29
+ VERB VERB
30
+ PRON PRON
31
+ ADP ADP
32
+ ADJ NOUN
33
+ PUNCT PUNCT
34
+ PRON PRON
35
+ ADP DET
36
+ ADJ NOUN
37
+ PART ADP
38
+ NOUN NOUN
39
+ ADP ADP
40
+ NOUN NOUN
41
+ PUNCT PUNCT
42
+ ADP PRON
43
+ NOUN VERB
44
+ PRON PRON
45
+ PART DET
46
+ ADJ NOUN
47
+ PART ADP
48
+ NOUN NOUN
49
+ CCONJ CCONJ
50
+ NOUN NOUN
51
+ PART ADP
52
+ ADV ADJ
53
+ ADJ NOUN
54
+ ADP ADP
55
+ PRON PRON
56
+ PUNCT PUNCT
57
+ ADJ VERB
58
+ ADV ADV
59
+ ADP ADP
60
+ NOUN NOUN
61
+ PRON PRON
62
+ PUNCT PUNCT
63
+ VERB VERB
64
+ ADV ADV
65
+ PRON PRON
66
+ ADP ADP
67
+ DET NOUN
68
+ NOUN NOUN
69
+ PRON PRON
70
+ PUNCT PUNCT
71
+ ADV ADV
72
+ PUNCT PUNCT
73
+ VERB VERB
74
+ PRON PRON
75
+ ADP ADP
76
+ NOUN NOUN
77
+ PUNCT PUNCT
78
+ VERB VERB
79
+ ADV ADV
80
+ ADP ADP
81
+ DET NOUN
82
+ NOUN NOUN
83
+ ADP ADP
84
+ NOUN PROPN
85
+ PUNCT PUNCT
86
+ VERB VERB
87
+ PRON PRON
88
+ PRON PRON
89
+ PUNCT PUNCT
90
+ ADV ADV
91
+ ADV PRON
92
+ ADJ NOUN
93
+ ADP ADP
94
+ NOUN NOUN
95
+ SCONJ SCONJ
96
+ VERB VERB
97
+ PRON PRON
98
+ CCONJ CCONJ
99
+ VERB VERB
100
+ PUNCT PUNCT
101
+ ADV ADV
102
+ ADV PART
103
+ ADV ADV
104
+ VERB AUX
105
+ VERB VERB
106
+ PART ADP
107
+ NOUN NOUN
108
+ ADP ADP
109
+ NOUN NOUN
110
+ PUNCT PUNCT
111
+ VERB VERB
112
+ ADP ADP
113
+ NOUN NOUN
114
+ PART ADP
115
+ NOUN PRON
116
+ PUNCT PUNCT
117
+ CCONJ CCONJ
118
+ PART PART
119
+ PRON PRON
120
+ VERB VERB
121
+ PUNCT PUNCT
122
+ VERB VERB
123
+ ADV PART
124
+ PRON PRON
125
+ PUNCT PUNCT
126
+ VERB VERB
127
+ PRON SCONJ
128
+ ADP ADP
129
+ NOUN PROPN
130
+ PART AUX
131
+ VERB VERB
132
+ ADV PRON
133
+ PUNCT PUNCT
134
+ VERB VERB
135
+ ADV ADP
136
+ ADP PRON
137
+ PROPN PROPN
138
+ ADP ADP
139
+ NOUN NOUN
140
+ PUNCT PUNCT
141
+ ADJ VERB
142
+ ADV ADV
143
+ ADP ADP
144
+ NOUN NOUN
145
+ PART ADP
146
+ PRON PRON
147
+ PUNCT PUNCT
148
+ VERB VERB
149
+ PRON PRON
150
+ ADP ADP
151
+ DET DET
152
+ NOUN NOUN
153
+ PUNCT PUNCT
154
+ NOUN NOUN
155
+ ADP ADP
156
+ PROPN PROPN
157
+ PUNCT PUNCT
158
+ VERB VERB
159
+ PRON PRON
160
+ ADV PART
161
+ PART PART
162
+ ADP ADP
163
+ NOUN NOUN
164
+ PART PART
165
+ VERB AUX
166
+ NOUN VERB
167
+ ADP ADP
168
+ NOUN NOUN
169
+ PUNCT PUNCT
170
+ ADV VERB
171
+ ADP ADP
172
+ NOUN NOUN
173
+ ADP ADP
174
+ PROPN PROPN
175
+ ADP ADP
176
+ NOUN NOUN
177
+ ADP ADP
178
+ PROPN PROPN
179
+ PUNCT PUNCT
180
+ ADP ADP
181
+ NOUN NOUN
182
+ ADP ADP
183
+ NOUN NOUN
184
+ PRON PRON
185
+ PART AUX
186
+ ADP ADP
187
+ NOUN PROPN
188
+ PROPN PROPN
189
+ PUNCT PUNCT
190
+ ADJ PROPN
191
+ ADV ADV
192
+ PUNCT PUNCT
193
+ VERB VERB
194
+ PRON PRON
195
+ ADP ADP
196
+ NOUN NOUN
197
+ ADP ADP
198
+ ADP ADP
199
+ NOUN NOUN
200
+ PUNCT PUNCT
201
+ PART SCONJ
202
+ PRON PRON
203
+ VERB VERB
204
+ PUNCT PUNCT
205
+ NUM NUM
206
+ PART ADP
207
+ NOUN NOUN
208
+ ADP PRON
209
+ VERB VERB
210
+ PRON PRON
211
+ PUNCT PUNCT
212
+ PRON PRON
213
+ ADP PRON
214
+ PROPN ADJ
215
+ PROPN PROPN
216
+ PUNCT PUNCT
217
+ PROPN PROPN
218
+ PUNCT PUNCT
219
+ PROPN PROPN
220
+ PUNCT PUNCT
221
+ CCONJ CCONJ
222
+ PROPN PROPN
223
+ PUNCT PUNCT
224
+ VERB VERB
225
+ PRON PRON
226
+ ADV PART
227
+ PART PART
228
+ ADP ADP
229
+ NOUN NOUN
230
+ PUNCT PUNCT
231
+ ADP ADP
232
+ PROPN PROPN
233
+ PROPN PROPN
234
+ NOUN PROPN
235
+ PART AUX
236
+ VERB VERB
237
+ ADV ADV
238
+ ADP ADP
239
+ NOUN NOUN
240
+ PART ADP
241
+ NOUN NOUN
242
+ PUNCT PUNCT
243
+ ADJ VERB
244
+ ADP ADP
245
+ PROPN PROPN
246
+ PROPN PROPN
247
+ NOUN PROPN
248
+ ADP ADP
249
+ PROPN PROPN
250
+ PUNCT PUNCT
251
+ VERB VERB
252
+ PRON PRON
253
+ PART PART
254
+ SCONJ SCONJ
255
+ PRON ADV
256
+ ADP DET
257
+ NOUN NOUN
258
+ PUNCT PUNCT
259
+ PRON PRON
260
+ ADP ADP
261
+ ADJ ADJ
262
+ PART ADP
263
+ NOUN NOUN
264
+ PUNCT PUNCT
265
+ NOUN NOUN
266
+ PUNCT PUNCT
267
+ ADP ADP
268
+ ADP ADP
269
+ NOUN NOUN
270
+ ADP ADP
271
+ NOUN NOUN
272
+ ADP DET
273
+ NOUN NOUN
274
+ ADP ADP
275
+ NOUN NOUN
276
+ PUNCT PUNCT
277
+ VERB VERB
278
+ DET DET
279
+ NOUN NOUN
280
+ PART ADP
281
+ ADJ ADJ
282
+ ADP ADP
283
+ NOUN NOUN
284
+ PUNCT PUNCT
285
+ ADJ VERB
286
+ PRON PRON
287
+ ADP ADP
288
+ DET DET
289
+ NOUN NOUN
290
+ PART ADP
291
+ ADJ ADJ
292
+ ADP ADP
293
+ ADJ ADJ
294
+ PART ADP
295
+ NOUN NOUN
296
+ ADP ADP
297
+ NOUN NOUN
298
+ ADP ADP
299
+ NOUN NOUN
300
+ PUNCT PUNCT
301
+ VERB VERB
302
+ PRON PRON
303
+ ADJ ADJ
304
+ ADP ADP
305
+ NOUN PROPN
306
+ PROPN PROPN
307
+ PUNCT PUNCT
308
+ ADP ADP
309
+ NOUN NOUN
310
+ ADP ADP
311
+ ADJ ADJ
312
+ PART ADP
313
+ NOUN NOUN
314
+ PART ADP
315
+ PRON PRON
316
+ ADP ADP
317
+ ADP ADP
318
+ NOUN NOUN
319
+ PART AUX
320
+ VERB VERB
321
+ ADV ADV
322
+ ADP ADP
323
+ NOUN NOUN
324
+ PUNCT PUNCT
325
+ ADJ VERB
326
+ ADV ADV
327
+ ADP ADP
328
+ DET DET
329
+ NOUN NOUN
330
+ PUNCT PUNCT
331
+ NOUN NOUN
332
+ ADP ADP
333
+ PROPN PROPN
334
+ PUNCT PUNCT
335
+ ADJ ADJ
336
+ ADV ADV
337
+ ADP ADP
338
+ DET DET
339
+ NUM NOUN
340
+ PUNCT PUNCT
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4661b2cb6b8a1007906509fe18cbfbc03062a086102bf7b80cfedb80f16c37
3
+ size 17082854
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "250001": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
train.args ADDED
@@ -0,0 +1 @@
 
 
1
+ udpos --learning_rate=5e-5 --eval_steps=1000 --per_device_batch_size=10 --max_steps=1000 --multi
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.05,
3
+ "train_loss": 0.26377957344055175,
4
+ "train_runtime": 2810.1261,
5
+ "train_samples_per_second": 2.847,
6
+ "train_steps_per_second": 0.356
7
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:042ec2b9b725d0e89ce5c81a57ec781e6aa54fcca6ac3ae85a5ae3496f1dc3cb
3
+ size 4984