KoichiYasuoka commited on
Commit
2bd531f
1 Parent(s): 92784da

initial release

Browse files
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - "ja"
4
+ tags:
5
+ - "japanese"
6
+ - "wikipedia"
7
+ - "token-classification"
8
+ - "pos"
9
+ - "dependency-parsing"
10
+ datasets:
11
+ - "universal_dependencies"
12
+ license: "cc-by-sa-4.0"
13
+ pipeline_tag: "token-classification"
14
+ widget:
15
+ - text: "国境の長いトンネルを抜けると雪国であった。"
16
+ ---
17
+
18
+ # deberta-large-japanese-wikipedia-luw-upos
19
+
20
+ ## Model Description
21
+
22
+ This is a DeBERTa(V2) model pre-trained on Japanese Wikipedia and 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-large-japanese-wikipedia](https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech) and [FEATS](https://universaldependencies.org/u/feat/).
23
+
24
+ ## How to Use
25
+
26
+ ```py
27
+ import torch
28
+ from transformers import AutoTokenizer,AutoModelForTokenClassification
29
+ tokenizer=AutoTokenizer.from_pretrained("KoichiYasuoka/deberta-large-japanese-wikipedia-luw-upos")
30
+ model=AutoModelForTokenClassification.from_pretrained("KoichiYasuoka/deberta-large-japanese-wikipedia-luw-upos")
31
+ s="国境の長いトンネルを抜けると雪国であった。"
32
+ t=tokenizer.tokenize(s)
33
+ p=[model.config.id2label[q] for q in torch.argmax(model(tokenizer.encode(s,return_tensors="pt"))["logits"],dim=2)[0].tolist()[1:-1]]
34
+ print(list(zip(t,p)))
35
+ ```
36
+
37
+ or
38
+
39
+ ```py
40
+ import esupar
41
+ nlp=esupar.load("KoichiYasuoka/deberta-large-japanese-wikipedia-luw-upos")
42
+ print(nlp("国境の長いトンネルを抜けると雪国であった。"))
43
+ ```
44
+
45
+ ## See Also
46
+
47
+ [esupar](https://github.com/KoichiYasuoka/esupar): Tokenizer POS-tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models
48
+
config.json ADDED
@@ -0,0 +1,739 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "ADJ",
13
+ "1": "ADJ+AUX",
14
+ "2": "ADP",
15
+ "3": "ADV",
16
+ "4": "AUX",
17
+ "5": "AUX|Polarity=Neg",
18
+ "6": "AUX|Polarity=Neg+VERB",
19
+ "7": "B-ADJ",
20
+ "8": "B-ADJ+ADJ",
21
+ "9": "B-ADJ+AUX+AUX",
22
+ "10": "B-ADJ+VERB",
23
+ "11": "B-ADP",
24
+ "12": "B-ADP+NOUN",
25
+ "13": "B-ADV",
26
+ "14": "B-ADV+ADP",
27
+ "15": "B-ADV+ADP+VERB",
28
+ "16": "B-AUX",
29
+ "17": "B-AUX+AUX+AUX",
30
+ "18": "B-AUX+AUX+PART",
31
+ "19": "B-AUX+SCONJ",
32
+ "20": "B-AUX|Polarity=Neg",
33
+ "21": "B-AUX|Polarity=Neg+VERB",
34
+ "22": "B-CCONJ",
35
+ "23": "B-DET",
36
+ "24": "B-INTJ",
37
+ "25": "B-NOUN",
38
+ "26": "B-NOUN+ADJ",
39
+ "27": "B-NOUN+ADP",
40
+ "28": "B-NOUN|Polarity=Neg",
41
+ "29": "B-NUM",
42
+ "30": "B-PART",
43
+ "31": "B-PRON",
44
+ "32": "B-PROPN",
45
+ "33": "B-PUNCT",
46
+ "34": "B-SCONJ",
47
+ "35": "B-SYM",
48
+ "36": "B-VERB",
49
+ "37": "B-VERB+AUX",
50
+ "38": "B-VERB+AUX+AUX",
51
+ "39": "B-VERB+AUX+NOUN",
52
+ "40": "B-VERB+AUX+PART",
53
+ "41": "B-VERB+AUX+SCONJ",
54
+ "42": "B-VERB+AUX|Polarity=Neg",
55
+ "43": "B-X",
56
+ "44": "CCONJ",
57
+ "45": "DET",
58
+ "46": "I-ADJ",
59
+ "47": "I-ADJ+ADJ",
60
+ "48": "I-ADJ+AUX+AUX",
61
+ "49": "I-ADJ+VERB",
62
+ "50": "I-ADP",
63
+ "51": "I-ADP+NOUN",
64
+ "52": "I-ADV",
65
+ "53": "I-ADV+ADP",
66
+ "54": "I-ADV+ADP+VERB",
67
+ "55": "I-AUX",
68
+ "56": "I-AUX+AUX+AUX",
69
+ "57": "I-AUX+AUX+PART",
70
+ "58": "I-AUX+SCONJ",
71
+ "59": "I-AUX|Polarity=Neg",
72
+ "60": "I-AUX|Polarity=Neg+VERB",
73
+ "61": "I-CCONJ",
74
+ "62": "I-DET",
75
+ "63": "I-INTJ",
76
+ "64": "I-NOUN",
77
+ "65": "I-NOUN+ADJ",
78
+ "66": "I-NOUN+ADP",
79
+ "67": "I-NOUN|Polarity=Neg",
80
+ "68": "I-NUM",
81
+ "69": "I-PART",
82
+ "70": "I-PRON",
83
+ "71": "I-PROPN",
84
+ "72": "I-PUNCT",
85
+ "73": "I-SCONJ",
86
+ "74": "I-SYM",
87
+ "75": "I-VERB",
88
+ "76": "I-VERB+AUX",
89
+ "77": "I-VERB+AUX+AUX",
90
+ "78": "I-VERB+AUX+NOUN",
91
+ "79": "I-VERB+AUX+PART",
92
+ "80": "I-VERB+AUX+SCONJ",
93
+ "81": "I-VERB+AUX|Polarity=Neg",
94
+ "82": "I-X",
95
+ "83": "INTJ",
96
+ "84": "NOUN",
97
+ "85": "NOUN+ADP",
98
+ "86": "NOUN+AUX",
99
+ "87": "NUM",
100
+ "88": "PART",
101
+ "89": "PRON",
102
+ "90": "PRON+ADP",
103
+ "91": "PROPN",
104
+ "92": "PUNCT",
105
+ "93": "SCONJ",
106
+ "94": "SYM",
107
+ "95": "VERB",
108
+ "96": "VERB+AUX",
109
+ "97": "VERB+AUX|Polarity=Neg",
110
+ "98": "X"
111
+ },
112
+ "initializer_range": 0.02,
113
+ "intermediate_size": 4096,
114
+ "label2id": {
115
+ "ADJ": 0,
116
+ "ADJ+AUX": 1,
117
+ "ADP": 2,
118
+ "ADV": 3,
119
+ "AUX": 4,
120
+ "AUX|Polarity=Neg": 5,
121
+ "AUX|Polarity=Neg+VERB": 6,
122
+ "B-ADJ": 7,
123
+ "B-ADJ+ADJ": 8,
124
+ "B-ADJ+AUX+AUX": 9,
125
+ "B-ADJ+VERB": 10,
126
+ "B-ADP": 11,
127
+ "B-ADP+NOUN": 12,
128
+ "B-ADV": 13,
129
+ "B-ADV+ADP": 14,
130
+ "B-ADV+ADP+VERB": 15,
131
+ "B-AUX": 16,
132
+ "B-AUX+AUX+AUX": 17,
133
+ "B-AUX+AUX+PART": 18,
134
+ "B-AUX+SCONJ": 19,
135
+ "B-AUX|Polarity=Neg": 20,
136
+ "B-AUX|Polarity=Neg+VERB": 21,
137
+ "B-CCONJ": 22,
138
+ "B-DET": 23,
139
+ "B-INTJ": 24,
140
+ "B-NOUN": 25,
141
+ "B-NOUN+ADJ": 26,
142
+ "B-NOUN+ADP": 27,
143
+ "B-NOUN|Polarity=Neg": 28,
144
+ "B-NUM": 29,
145
+ "B-PART": 30,
146
+ "B-PRON": 31,
147
+ "B-PROPN": 32,
148
+ "B-PUNCT": 33,
149
+ "B-SCONJ": 34,
150
+ "B-SYM": 35,
151
+ "B-VERB": 36,
152
+ "B-VERB+AUX": 37,
153
+ "B-VERB+AUX+AUX": 38,
154
+ "B-VERB+AUX+NOUN": 39,
155
+ "B-VERB+AUX+PART": 40,
156
+ "B-VERB+AUX+SCONJ": 41,
157
+ "B-VERB+AUX|Polarity=Neg": 42,
158
+ "B-X": 43,
159
+ "CCONJ": 44,
160
+ "DET": 45,
161
+ "I-ADJ": 46,
162
+ "I-ADJ+ADJ": 47,
163
+ "I-ADJ+AUX+AUX": 48,
164
+ "I-ADJ+VERB": 49,
165
+ "I-ADP": 50,
166
+ "I-ADP+NOUN": 51,
167
+ "I-ADV": 52,
168
+ "I-ADV+ADP": 53,
169
+ "I-ADV+ADP+VERB": 54,
170
+ "I-AUX": 55,
171
+ "I-AUX+AUX+AUX": 56,
172
+ "I-AUX+AUX+PART": 57,
173
+ "I-AUX+SCONJ": 58,
174
+ "I-AUX|Polarity=Neg": 59,
175
+ "I-AUX|Polarity=Neg+VERB": 60,
176
+ "I-CCONJ": 61,
177
+ "I-DET": 62,
178
+ "I-INTJ": 63,
179
+ "I-NOUN": 64,
180
+ "I-NOUN+ADJ": 65,
181
+ "I-NOUN+ADP": 66,
182
+ "I-NOUN|Polarity=Neg": 67,
183
+ "I-NUM": 68,
184
+ "I-PART": 69,
185
+ "I-PRON": 70,
186
+ "I-PROPN": 71,
187
+ "I-PUNCT": 72,
188
+ "I-SCONJ": 73,
189
+ "I-SYM": 74,
190
+ "I-VERB": 75,
191
+ "I-VERB+AUX": 76,
192
+ "I-VERB+AUX+AUX": 77,
193
+ "I-VERB+AUX+NOUN": 78,
194
+ "I-VERB+AUX+PART": 79,
195
+ "I-VERB+AUX+SCONJ": 80,
196
+ "I-VERB+AUX|Polarity=Neg": 81,
197
+ "I-X": 82,
198
+ "INTJ": 83,
199
+ "NOUN": 84,
200
+ "NOUN+ADP": 85,
201
+ "NOUN+AUX": 86,
202
+ "NUM": 87,
203
+ "PART": 88,
204
+ "PRON": 89,
205
+ "PRON+ADP": 90,
206
+ "PROPN": 91,
207
+ "PUNCT": 92,
208
+ "SCONJ": 93,
209
+ "SYM": 94,
210
+ "VERB": 95,
211
+ "VERB+AUX": 96,
212
+ "VERB+AUX|Polarity=Neg": 97,
213
+ "X": 98
214
+ },
215
+ "layer_norm_eps": 1e-07,
216
+ "max_position_embeddings": 512,
217
+ "max_relative_positions": -1,
218
+ "model_type": "deberta-v2",
219
+ "num_attention_heads": 16,
220
+ "num_hidden_layers": 24,
221
+ "pad_token_id": 1,
222
+ "pooler_dropout": 0,
223
+ "pooler_hidden_act": "gelu",
224
+ "pooler_hidden_size": 1024,
225
+ "pos_att_type": null,
226
+ "position_biased_input": true,
227
+ "relative_attention": false,
228
+ "task_specific_params": {
229
+ "upos_multiword": {
230
+ "ADJ+ADJ": {
231
+ "\u5c11\u306a\u304f\u306a\u3044": [
232
+ "\u5c11\u306a\u304f",
233
+ "\u306a\u3044"
234
+ ]
235
+ },
236
+ "ADJ+AUX": {
237
+ "\u679c\u6562\u306a": [
238
+ "\u679c\u6562",
239
+ "\u306a"
240
+ ]
241
+ },
242
+ "ADJ+AUX+AUX": {
243
+ "\u591a\u304b\u3063\u305f\u3089\u3057\u304f": [
244
+ "\u591a\u304b\u3063",
245
+ "\u305f",
246
+ "\u3089\u3057\u304f"
247
+ ]
248
+ },
249
+ "ADJ+VERB": {
250
+ "\u306a\u304f\u306a\u308b": [
251
+ "\u306a\u304f",
252
+ "\u306a\u308b"
253
+ ],
254
+ "\u5c11\u306a\u304f\u306a\u3063": [
255
+ "\u5c11\u306a\u304f",
256
+ "\u306a\u3063"
257
+ ]
258
+ },
259
+ "ADP+NOUN": {
260
+ "\u306f\u3057\u3083\u304d\u3057\u3083\u304d\u30ec\u30bf\u30b9": [
261
+ "\u306f",
262
+ "\u3057\u3083\u304d\u3057\u3083\u304d\u30ec\u30bf\u30b9"
263
+ ]
264
+ },
265
+ "ADV+ADP": {
266
+ "\u3082\u3061\u3063\u3068": [
267
+ "\u3082\u3061\u3063",
268
+ "\u3068"
269
+ ]
270
+ },
271
+ "ADV+ADP+VERB": {
272
+ "\u3082\u3057\u304b\u3057": [
273
+ "\u3082\u3057",
274
+ "\u304b",
275
+ "\u3057"
276
+ ]
277
+ },
278
+ "AUX+AUX+AUX": {
279
+ "\u3060\u3063\u305f\u3089\u3057\u3044": [
280
+ "\u3060\u3063",
281
+ "\u305f",
282
+ "\u3089\u3057\u3044"
283
+ ],
284
+ "\u3060\u3063\u305f\u3089\u3057\u304f": [
285
+ "\u3060\u3063",
286
+ "\u305f",
287
+ "\u3089\u3057\u304f"
288
+ ]
289
+ },
290
+ "AUX+AUX+PART": {
291
+ "\u3066\u3044\u305f\u3060\u3051": [
292
+ "\u3066\u3044",
293
+ "\u305f",
294
+ "\u3060\u3051"
295
+ ]
296
+ },
297
+ "AUX+SCONJ": {
298
+ "\u3060\u3051\u3069": [
299
+ "\u3060",
300
+ "\u3051\u3069"
301
+ ],
302
+ "\u306e\u3060\u3051\u3069": [
303
+ "\u306e\u3060",
304
+ "\u3051\u3069"
305
+ ],
306
+ "\u3093\u3060\u3051\u3069": [
307
+ "\u3093\u3060",
308
+ "\u3051\u3069"
309
+ ]
310
+ },
311
+ "AUX|Polarity=Neg+VERB": {
312
+ "\u306a\u304f\u306a\u3063": [
313
+ "\u306a\u304f",
314
+ "\u306a\u3063"
315
+ ],
316
+ "\u306a\u304f\u306a\u308a": [
317
+ "\u306a\u304f",
318
+ "\u306a\u308a"
319
+ ],
320
+ "\u306a\u304f\u306a\u308b": [
321
+ "\u306a\u304f",
322
+ "\u306a\u308b"
323
+ ]
324
+ },
325
+ "NOUN+ADJ": {
326
+ "\u60aa\u540d\u9ad8\u3044": [
327
+ "\u60aa\u540d",
328
+ "\u9ad8\u3044"
329
+ ]
330
+ },
331
+ "NOUN+ADP": {
332
+ "\u3072\u3068\u308a\u3067": [
333
+ "\u3072\u3068\u308a",
334
+ "\u3067"
335
+ ],
336
+ "\u4ee3\u8d70\u904b\u8ee2\u304c": [
337
+ "\u4ee3\u8d70\u904b\u8ee2",
338
+ "\u304c"
339
+ ],
340
+ "\u4f7f\u7528\u958b\u59cb\u307e\u3067": [
341
+ "\u4f7f\u7528\u958b\u59cb",
342
+ "\u307e\u3067"
343
+ ],
344
+ "\u516c\u8ee2\u304c": [
345
+ "\u516c\u8ee2",
346
+ "\u304c"
347
+ ],
348
+ "\u5171\u98df\u3044\u3082": [
349
+ "\u5171\u98df\u3044",
350
+ "\u3082"
351
+ ],
352
+ "\u9152\u6c17\u5e2f\u3073\u904b\u8ee2\u304c": [
353
+ "\u9152\u6c17\u5e2f\u3073\u904b\u8ee2",
354
+ "\u304c"
355
+ ]
356
+ },
357
+ "NOUN+AUX": {
358
+ "\u3072\u3068\u308a\u3067": [
359
+ "\u3072\u3068\u308a",
360
+ "\u3067"
361
+ ],
362
+ "\u601d\u3044\u3060": [
363
+ "\u601d\u3044",
364
+ "\u3060"
365
+ ]
366
+ },
367
+ "PRON+ADP": {
368
+ "\u306a\u3093\u304b": [
369
+ "\u306a\u3093",
370
+ "\u304b"
371
+ ]
372
+ },
373
+ "VERB+AUX": {
374
+ "\u3044\u308f\u308c": [
375
+ "\u3044\u308f",
376
+ "\u308c"
377
+ ],
378
+ "\u3044\u308f\u308c\u308b": [
379
+ "\u3044\u308f",
380
+ "\u308c\u308b"
381
+ ],
382
+ "\u304a\u3053\u306a\u308f\u308c": [
383
+ "\u304a\u3053\u306a\u308f",
384
+ "\u308c"
385
+ ],
386
+ "\u3055\u3089\u308f\u308c": [
387
+ "\u3055\u3089\u308f",
388
+ "\u308c"
389
+ ],
390
+ "\u3057\u3053\u307e\u305b": [
391
+ "\u3057\u3053\u307e",
392
+ "\u305b"
393
+ ],
394
+ "\u3059\u3079\u304d": [
395
+ "\u3059",
396
+ "\u3079\u304d"
397
+ ],
398
+ "\u306a\u3055\u308c": [
399
+ "\u306a\u3055",
400
+ "\u308c"
401
+ ],
402
+ "\u306a\u3055\u308c\u308b": [
403
+ "\u306a\u3055",
404
+ "\u308c\u308b"
405
+ ],
406
+ "\u306a\u3058\u307e\u305b": [
407
+ "\u306a\u3058\u307e",
408
+ "\u305b"
409
+ ],
410
+ "\u307f\u306a\u3055\u308c": [
411
+ "\u307f\u306a\u3055",
412
+ "\u308c"
413
+ ],
414
+ "\u30c1\u30a7\u30c3\u30af\u3059\u3079\u304f": [
415
+ "\u30c1\u30a7\u30c3\u30af\u3059",
416
+ "\u3079\u304f"
417
+ ],
418
+ "\u30e4\u30e9\u30ec": [
419
+ "\u30e4\u30e9",
420
+ "\u30ec"
421
+ ],
422
+ "\u4e0b\u3055\u308c\u308b": [
423
+ "\u4e0b\u3055",
424
+ "\u308c\u308b"
425
+ ],
426
+ "\u4ecb\u5165\u3059\u3079\u3057": [
427
+ "\u4ecb\u5165\u3059",
428
+ "\u3079\u3057"
429
+ ],
430
+ "\u4ed5\u821e\u308f\u308c": [
431
+ "\u4ed5\u821e\u308f",
432
+ "\u308c"
433
+ ],
434
+ "\u4f11\u307e\u305b": [
435
+ "\u4f11\u307e",
436
+ "\u305b"
437
+ ],
438
+ "\u4f34\u308f\u308c": [
439
+ "\u4f34\u308f",
440
+ "\u308c"
441
+ ],
442
+ "\u5145\u5f53\u3059\u3079\u304f": [
443
+ "\u5145\u5f53\u3059",
444
+ "\u3079\u304f"
445
+ ],
446
+ "\u5165\u3063\u3061\u3083\u3044": [
447
+ "\u5165\u3063",
448
+ "\u3061\u3083\u3044"
449
+ ],
450
+ "\u52d5\u304b\u305b": [
451
+ "\u52d5\u304b",
452
+ "\u305b"
453
+ ],
454
+ "\u533f\u308f\u308c": [
455
+ "\u533f\u308f",
456
+ "\u308c"
457
+ ],
458
+ "\u542b\u307e\u305b": [
459
+ "\u542b\u307e",
460
+ "\u305b"
461
+ ],
462
+ "\u548c\u307e\u305b": [
463
+ "\u548c\u307e",
464
+ "\u305b"
465
+ ],
466
+ "\u554f\u308f\u308c": [
467
+ "\u554f\u308f",
468
+ "\u308c"
469
+ ],
470
+ "\u554f\u308f\u308c\u308b": [
471
+ "\u554f\u308f",
472
+ "\u308c\u308b"
473
+ ],
474
+ "\u596a\u308f\u308c": [
475
+ "\u596a\u308f",
476
+ "\u308c"
477
+ ],
478
+ "\u596a\u308f\u308c\u308b": [
479
+ "\u596a\u308f",
480
+ "\u308c\u308b"
481
+ ],
482
+ "\u5acc\u308f\u308c": [
483
+ "\u5acc\u308f",
484
+ "\u308c"
485
+ ],
486
+ "\u601d\u3063\u3061\u3083\u3044": [
487
+ "\u601d\u3063",
488
+ "\u3061\u3083\u3044"
489
+ ],
490
+ "\u601d\u3063\u3061\u3083\u3046": [
491
+ "\u601d\u3063",
492
+ "\u3061\u3083\u3046"
493
+ ],
494
+ "\u60a9\u307e\u305b": [
495
+ "\u60a9\u307e",
496
+ "\u305b"
497
+ ],
498
+ "\u60a9\u307e\u305b\u308b": [
499
+ "\u60a9\u307e",
500
+ "\u305b\u308b"
501
+ ],
502
+ "\u6271\u308f\u308c": [
503
+ "\u6271\u308f",
504
+ "\u308c"
505
+ ],
506
+ "\u6271\u308f\u308c\u308b": [
507
+ "\u6271\u308f",
508
+ "\u308c\u308b"
509
+ ],
510
+ "\u6279\u5224\u3059\u3079\u304d": [
511
+ "\u6279\u5224\u3059",
512
+ "\u3079\u304d"
513
+ ],
514
+ "\u6392\u9664\u3059\u3079\u304d": [
515
+ "\u6392\u9664\u3059",
516
+ "\u3079\u304d"
517
+ ],
518
+ "\u6458\u767a\u3059\u3079\u304d": [
519
+ "\u6458\u767a\u3059",
520
+ "\u3079\u304d"
521
+ ],
522
+ "\u6551\u308f\u308c\u308b": [
523
+ "\u6551\u308f",
524
+ "\u308c\u308b"
525
+ ],
526
+ "\u66ae\u3089\u3059\u3079\u304d": [
527
+ "\u66ae\u3089\u3059",
528
+ "\u3079\u304d"
529
+ ],
530
+ "\u679c\u305f\u3059\u3079\u304f": [
531
+ "\u679c\u305f\u3059",
532
+ "\u3079\u304f"
533
+ ],
534
+ "\u6b4c\u308f\u308c": [
535
+ "\u6b4c\u308f",
536
+ "\u308c"
537
+ ],
538
+ "\u6b6a\u307e\u305b\u308b": [
539
+ "\u6b6a\u307e",
540
+ "\u305b\u308b"
541
+ ],
542
+ "\u6e08\u307e\u305b": [
543
+ "\u6e08\u307e",
544
+ "\u305b"
545
+ ],
546
+ "\u72d9\u308f\u308c": [
547
+ "\u72d9\u308f",
548
+ "\u308c"
549
+ ],
550
+ "\u751f\u304b\u3059\u3079\u304f": [
551
+ "\u751f\u304b\u3059",
552
+ "\u3079\u304f"
553
+ ],
554
+ "\u77e5\u3089\u305b": [
555
+ "\u77e5\u3089",
556
+ "\u305b"
557
+ ],
558
+ "\u77e5\u3089\u305b\u308b": [
559
+ "\u77e5\u3089",
560
+ "\u305b\u308b"
561
+ ],
562
+ "\u7d42\u308f\u3063\u3061\u3083\u3046": [
563
+ "\u7d42\u308f\u3063",
564
+ "\u3061\u3083\u3046"
565
+ ],
566
+ "\u7d71\u4e00\u3059\u3079\u304f": [
567
+ "\u7d71\u4e00\u3059",
568
+ "\u3079\u304f"
569
+ ],
570
+ "\u884c\u3063\u3061\u3083\u3044": [
571
+ "\u884c\u3063",
572
+ "\u3061\u3083\u3044"
573
+ ],
574
+ "\u884c\u308f\u308c": [
575
+ "\u884c\u308f",
576
+ "\u308c"
577
+ ],
578
+ "\u884c\u308f\u308c\u308b": [
579
+ "\u884c\u308f",
580
+ "\u308c\u308b"
581
+ ],
582
+ "\u88ab\u308f\u308c\u308b": [
583
+ "\u88ab\u308f",
584
+ "\u308c\u308b"
585
+ ],
586
+ "\u8972\u308f\u308c": [
587
+ "\u8972\u308f",
588
+ "\u308c"
589
+ ],
590
+ "\u8986\u308f\u308c": [
591
+ "\u8986\u308f",
592
+ "\u308c"
593
+ ],
594
+ "\u898b\u306a\u3055\u308c": [
595
+ "\u898b\u306a\u3055",
596
+ "\u308c"
597
+ ],
598
+ "\u898b\u821e\u308f\u308c": [
599
+ "\u898b\u821e\u308f",
600
+ "\u308c"
601
+ ],
602
+ "\u89e3\u304d\u660e\u304b\u3059\u3079\u304d": [
603
+ "\u89e3\u304d\u660e\u304b\u3059",
604
+ "\u3079\u304d"
605
+ ],
606
+ "\u8a60\u308f\u308c": [
607
+ "\u8a60\u308f",
608
+ "\u308c"
609
+ ],
610
+ "\u8a98\u308f\u308c": [
611
+ "\u8a98\u308f",
612
+ "\u308c"
613
+ ],
614
+ "\u8aac\u660e\u3059\u3079\u304d": [
615
+ "\u8aac\u660e\u3059",
616
+ "\u3079\u304d"
617
+ ],
618
+ "\u8cb7\u3063\u3061\u3083\u3044": [
619
+ "\u8cb7\u3063",
620
+ "\u3061\u3083\u3044"
621
+ ],
622
+ "\u8e0f\u307e\u305b": [
623
+ "\u8e0f\u307e",
624
+ "\u305b"
625
+ ],
626
+ "\u8f9e\u8077\u3059\u3079\u304d": [
627
+ "\u8f9e\u8077\u3059",
628
+ "\u3079\u304d"
629
+ ],
630
+ "\u990a\u308f\u308c\u308b": [
631
+ "\u990a\u308f",
632
+ "\u308c\u308b"
633
+ ],
634
+ "\u9cf4\u3089\u3059\u3079\u304f": [
635
+ "\u9cf4\u3089\u3059",
636
+ "\u3079\u304f"
637
+ ]
638
+ },
639
+ "VERB+AUX+AUX": {
640
+ "\u306a\u3063\u305f\u3089\u3057\u3044": [
641
+ "\u306a\u3063",
642
+ "\u305f",
643
+ "\u3089\u3057\u3044"
644
+ ],
645
+ "\u601d\u3063\u305f\u3089\u3057\u304f": [
646
+ "\u601d\u3063",
647
+ "\u305f",
648
+ "\u3089\u3057\u304f"
649
+ ],
650
+ "\u8cb7\u3063\u305f\u3089\u3057\u304f": [
651
+ "\u8cb7\u3063",
652
+ "\u305f",
653
+ "\u3089\u3057\u304f"
654
+ ]
655
+ },
656
+ "VERB+AUX+NOUN": {
657
+ "\u5909\u66f4\u3057\u305f\u305f\u3081": [
658
+ "\u5909\u66f4\u3057",
659
+ "\u305f",
660
+ "\u305f\u3081"
661
+ ],
662
+ "\u5931\u6557\u3057\u305f\u305f\u3081": [
663
+ "\u5931\u6557\u3057",
664
+ "\u305f",
665
+ "\u305f\u3081"
666
+ ],
667
+ "\u5bfe\u7acb\u3057\u305f\u305f\u3081": [
668
+ "\u5bfe\u7acb\u3057",
669
+ "\u305f",
670
+ "\u305f\u3081"
671
+ ],
672
+ "\u6f5c\u5165\u3057\u305f\u305f\u3081": [
673
+ "\u6f5c\u5165\u3057",
674
+ "\u305f",
675
+ "\u305f\u3081"
676
+ ],
677
+ "\u8131\u8d70\u3057\u305f\u305f\u3081": [
678
+ "\u8131\u8d70\u3057",
679
+ "\u305f",
680
+ "\u305f\u3081"
681
+ ],
682
+ "\u8868\u660e\u3057\u305f\u305f\u3081": [
683
+ "\u8868\u660e\u3057",
684
+ "\u305f",
685
+ "\u305f\u3081"
686
+ ],
687
+ "\u8981\u8acb\u3057\u305f\u305f\u3081": [
688
+ "\u8981\u8acb\u3057",
689
+ "\u305f",
690
+ "\u305f\u3081"
691
+ ]
692
+ },
693
+ "VERB+AUX+SCONJ": {
694
+ "\u592d\u6298\u3057\u305f\u305f\u3081\u306b": [
695
+ "\u592d\u6298\u3057",
696
+ "\u305f",
697
+ "\u305f\u3081\u306b"
698
+ ],
699
+ "\u5fb4\u767a\u3057\u305f\u305f\u3081\u306b": [
700
+ "\u5fb4\u767a\u3057",
701
+ "\u305f",
702
+ "\u305f\u3081\u306b"
703
+ ],
704
+ "\u62d2\u5426\u3057\u305f\u305f\u3081\u306b": [
705
+ "\u62d2\u5426\u3057",
706
+ "\u305f",
707
+ "\u305f\u3081\u306b"
708
+ ]
709
+ },
710
+ "VERB+AUX|Polarity=Neg": {
711
+ "\u304b\u307e\u308f\u305a": [
712
+ "\u304b\u307e\u308f",
713
+ "\u305a"
714
+ ],
715
+ "\u306a\u3089\u305a": [
716
+ "\u306a\u3089",
717
+ "\u305a"
718
+ ],
719
+ "\u554f\u308f\u305a": [
720
+ "\u554f\u308f",
721
+ "\u305a"
722
+ ],
723
+ "\u69cb\u308f\u305a": [
724
+ "\u69cb\u308f",
725
+ "\u305a"
726
+ ],
727
+ "\u884c\u308f\u305a": [
728
+ "\u884c\u308f",
729
+ "\u305a"
730
+ ]
731
+ }
732
+ }
733
+ },
734
+ "tokenizer_class": "DebertaV2TokenizerFast",
735
+ "torch_dtype": "float32",
736
+ "transformers_version": "4.19.4",
737
+ "type_vocab_size": 0,
738
+ "vocab_size": 32000
739
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cbb774e70b3109292ff5bfefcf2a8f16dd605cd91cc417f94e434c804a73179
3
+ size 1342957619
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
3
+ size 1
supar.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9fa658d2d7e5526e8f3e9cd1f34178f6c3db61ec9c133c059d69c9c576b0993
3
+ size 1391432747
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": true, "keep_accents": true, "model_max_length": 512, "tokenizer_class": "DebertaV2TokenizerFast"}