KoichiYasuoka commited on
Commit
70ac635
1 Parent(s): 0ffe559

initial release

Browse files
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - "ja"
4
+ tags:
5
+ - "japanese"
6
+ - "token-classification"
7
+ - "pos"
8
+ - "dependency-parsing"
9
+ datasets:
10
+ - "universal_dependencies"
11
+ license: "cc-by-sa-4.0"
12
+ pipeline_tag: "token-classification"
13
+ widget:
14
+ - text: "国境の長いトンネルを抜けると雪国であった。"
15
+ ---
16
+
17
+ # deberta-large-japanese-luw-upos
18
+
19
+ ## Model Description
20
+
21
+ This is a DeBERTa(V2) model pre-trained on 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-large-japanese-aozora](https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech).
22
+
23
+ ## How to Use
24
+
25
+ ```py
26
+ import torch
27
+ from transformers import AutoTokenizer,AutoModelForTokenClassification
28
+ tokenizer=AutoTokenizer.from_pretrained("KoichiYasuoka/deberta-large-japanese-luw-upos")
29
+ model=AutoModelForTokenClassification.from_pretrained("KoichiYasuoka/deberta-large-japanese-luw-upos")
30
+ s="国境の長いトンネルを抜けると雪国であった。"
31
+ t=tokenizer.tokenize(s)
32
+ p=[model.config.id2label[q] for q in torch.argmax(model(tokenizer.encode(s,return_tensors="pt"))["logits"],dim=2)[0].tolist()[1:-1]]
33
+ print(list(zip(t,p)))
34
+ ```
35
+
36
+ or
37
+
38
+ ```py
39
+ import esupar
40
+ nlp=esupar.load("KoichiYasuoka/deberta-large-japanese-luw-upos")
41
+ print(nlp("国境の長いトンネルを抜けると雪国であった。"))
42
+ ```
43
+
44
+ ## See Also
45
+
46
+ [esupar](https://github.com/KoichiYasuoka/esupar): Tokenizer POS-tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models
47
+
config.json ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "ADJ",
13
+ "1": "ADJ+AUX",
14
+ "2": "ADP",
15
+ "3": "ADV",
16
+ "4": "AUX",
17
+ "5": "AUX+VERB",
18
+ "6": "B-ADJ",
19
+ "7": "B-ADJ+ADJ",
20
+ "8": "B-ADJ+AUX+AUX",
21
+ "9": "B-ADJ+VERB",
22
+ "10": "B-ADP",
23
+ "11": "B-ADP+NOUN",
24
+ "12": "B-ADV",
25
+ "13": "B-ADV+ADP",
26
+ "14": "B-ADV+ADP+VERB",
27
+ "15": "B-AUX",
28
+ "16": "B-AUX+AUX",
29
+ "17": "B-AUX+AUX+AUX",
30
+ "18": "B-AUX+AUX+PART",
31
+ "19": "B-AUX+SCONJ",
32
+ "20": "B-AUX+VERB",
33
+ "21": "B-CCONJ",
34
+ "22": "B-DET",
35
+ "23": "B-INTJ",
36
+ "24": "B-NOUN",
37
+ "25": "B-NOUN+ADJ",
38
+ "26": "B-NOUN+ADP",
39
+ "27": "B-NUM",
40
+ "28": "B-PART",
41
+ "29": "B-PRON",
42
+ "30": "B-PROPN",
43
+ "31": "B-PUNCT",
44
+ "32": "B-SCONJ",
45
+ "33": "B-SYM",
46
+ "34": "B-VERB",
47
+ "35": "B-VERB+AUX",
48
+ "36": "B-VERB+AUX+AUX",
49
+ "37": "B-VERB+AUX+NOUN",
50
+ "38": "B-VERB+AUX+PART",
51
+ "39": "B-VERB+AUX+SCONJ",
52
+ "40": "B-X",
53
+ "41": "CCONJ",
54
+ "42": "DET",
55
+ "43": "I-ADJ",
56
+ "44": "I-ADJ+ADJ",
57
+ "45": "I-ADJ+AUX+AUX",
58
+ "46": "I-ADJ+VERB",
59
+ "47": "I-ADP",
60
+ "48": "I-ADP+NOUN",
61
+ "49": "I-ADV",
62
+ "50": "I-ADV+ADP",
63
+ "51": "I-ADV+ADP+VERB",
64
+ "52": "I-AUX",
65
+ "53": "I-AUX+AUX",
66
+ "54": "I-AUX+AUX+AUX",
67
+ "55": "I-AUX+AUX+PART",
68
+ "56": "I-AUX+SCONJ",
69
+ "57": "I-AUX+VERB",
70
+ "58": "I-CCONJ",
71
+ "59": "I-DET",
72
+ "60": "I-INTJ",
73
+ "61": "I-NOUN",
74
+ "62": "I-NOUN+ADJ",
75
+ "63": "I-NOUN+ADP",
76
+ "64": "I-NUM",
77
+ "65": "I-PART",
78
+ "66": "I-PRON",
79
+ "67": "I-PROPN",
80
+ "68": "I-PUNCT",
81
+ "69": "I-SCONJ",
82
+ "70": "I-SYM",
83
+ "71": "I-VERB",
84
+ "72": "I-VERB+AUX",
85
+ "73": "I-VERB+AUX+AUX",
86
+ "74": "I-VERB+AUX+NOUN",
87
+ "75": "I-VERB+AUX+PART",
88
+ "76": "I-VERB+AUX+SCONJ",
89
+ "77": "I-X",
90
+ "78": "NOUN",
91
+ "79": "NOUN+ADP",
92
+ "80": "NOUN+AUX",
93
+ "81": "NUM",
94
+ "82": "PART",
95
+ "83": "PRON",
96
+ "84": "PRON+ADP",
97
+ "85": "PROPN",
98
+ "86": "PUNCT",
99
+ "87": "SCONJ",
100
+ "88": "SYM",
101
+ "89": "VERB",
102
+ "90": "VERB+AUX",
103
+ "91": "VERB+AUX+PART",
104
+ "92": "X"
105
+ },
106
+ "initializer_range": 0.02,
107
+ "intermediate_size": 4096,
108
+ "label2id": {
109
+ "ADJ": 0,
110
+ "ADJ+AUX": 1,
111
+ "ADP": 2,
112
+ "ADV": 3,
113
+ "AUX": 4,
114
+ "AUX+VERB": 5,
115
+ "B-ADJ": 6,
116
+ "B-ADJ+ADJ": 7,
117
+ "B-ADJ+AUX+AUX": 8,
118
+ "B-ADJ+VERB": 9,
119
+ "B-ADP": 10,
120
+ "B-ADP+NOUN": 11,
121
+ "B-ADV": 12,
122
+ "B-ADV+ADP": 13,
123
+ "B-ADV+ADP+VERB": 14,
124
+ "B-AUX": 15,
125
+ "B-AUX+AUX": 16,
126
+ "B-AUX+AUX+AUX": 17,
127
+ "B-AUX+AUX+PART": 18,
128
+ "B-AUX+SCONJ": 19,
129
+ "B-AUX+VERB": 20,
130
+ "B-CCONJ": 21,
131
+ "B-DET": 22,
132
+ "B-INTJ": 23,
133
+ "B-NOUN": 24,
134
+ "B-NOUN+ADJ": 25,
135
+ "B-NOUN+ADP": 26,
136
+ "B-NUM": 27,
137
+ "B-PART": 28,
138
+ "B-PRON": 29,
139
+ "B-PROPN": 30,
140
+ "B-PUNCT": 31,
141
+ "B-SCONJ": 32,
142
+ "B-SYM": 33,
143
+ "B-VERB": 34,
144
+ "B-VERB+AUX": 35,
145
+ "B-VERB+AUX+AUX": 36,
146
+ "B-VERB+AUX+NOUN": 37,
147
+ "B-VERB+AUX+PART": 38,
148
+ "B-VERB+AUX+SCONJ": 39,
149
+ "B-X": 40,
150
+ "CCONJ": 41,
151
+ "DET": 42,
152
+ "I-ADJ": 43,
153
+ "I-ADJ+ADJ": 44,
154
+ "I-ADJ+AUX+AUX": 45,
155
+ "I-ADJ+VERB": 46,
156
+ "I-ADP": 47,
157
+ "I-ADP+NOUN": 48,
158
+ "I-ADV": 49,
159
+ "I-ADV+ADP": 50,
160
+ "I-ADV+ADP+VERB": 51,
161
+ "I-AUX": 52,
162
+ "I-AUX+AUX": 53,
163
+ "I-AUX+AUX+AUX": 54,
164
+ "I-AUX+AUX+PART": 55,
165
+ "I-AUX+SCONJ": 56,
166
+ "I-AUX+VERB": 57,
167
+ "I-CCONJ": 58,
168
+ "I-DET": 59,
169
+ "I-INTJ": 60,
170
+ "I-NOUN": 61,
171
+ "I-NOUN+ADJ": 62,
172
+ "I-NOUN+ADP": 63,
173
+ "I-NUM": 64,
174
+ "I-PART": 65,
175
+ "I-PRON": 66,
176
+ "I-PROPN": 67,
177
+ "I-PUNCT": 68,
178
+ "I-SCONJ": 69,
179
+ "I-SYM": 70,
180
+ "I-VERB": 71,
181
+ "I-VERB+AUX": 72,
182
+ "I-VERB+AUX+AUX": 73,
183
+ "I-VERB+AUX+NOUN": 74,
184
+ "I-VERB+AUX+PART": 75,
185
+ "I-VERB+AUX+SCONJ": 76,
186
+ "I-X": 77,
187
+ "NOUN": 78,
188
+ "NOUN+ADP": 79,
189
+ "NOUN+AUX": 80,
190
+ "NUM": 81,
191
+ "PART": 82,
192
+ "PRON": 83,
193
+ "PRON+ADP": 84,
194
+ "PROPN": 85,
195
+ "PUNCT": 86,
196
+ "SCONJ": 87,
197
+ "SYM": 88,
198
+ "VERB": 89,
199
+ "VERB+AUX": 90,
200
+ "VERB+AUX+PART": 91,
201
+ "X": 92
202
+ },
203
+ "layer_norm_eps": 1e-07,
204
+ "max_position_embeddings": 512,
205
+ "max_relative_positions": -1,
206
+ "model_type": "deberta-v2",
207
+ "num_attention_heads": 16,
208
+ "num_hidden_layers": 24,
209
+ "pad_token_id": 1,
210
+ "pooler_dropout": 0,
211
+ "pooler_hidden_act": "gelu",
212
+ "pooler_hidden_size": 1024,
213
+ "pos_att_type": null,
214
+ "position_biased_input": true,
215
+ "relative_attention": false,
216
+ "task_specific_params": {
217
+ "upos_multiword": {
218
+ "ADJ+ADJ": {
219
+ "\u5c11\u306a\u304f\u306a\u3044": [
220
+ "\u5c11\u306a\u304f",
221
+ "\u306a\u3044"
222
+ ]
223
+ },
224
+ "ADJ+AUX": {
225
+ "\u679c\u6562\u306a": [
226
+ "\u679c\u6562",
227
+ "\u306a"
228
+ ]
229
+ },
230
+ "ADJ+AUX+AUX": {
231
+ "\u591a\u304b\u3063\u305f\u3089\u3057\u304f": [
232
+ "\u591a\u304b\u3063",
233
+ "\u305f",
234
+ "\u3089\u3057\u304f"
235
+ ]
236
+ },
237
+ "ADJ+VERB": {
238
+ "\u306a\u304f\u306a\u308b": [
239
+ "\u306a\u304f",
240
+ "\u306a\u308b"
241
+ ],
242
+ "\u5c11\u306a\u304f\u306a\u3063": [
243
+ "\u5c11\u306a\u304f",
244
+ "\u306a\u3063"
245
+ ]
246
+ },
247
+ "ADP+NOUN": {
248
+ "\u306f\u3057\u3083\u304d\u3057\u3083\u304d\u30ec\u30bf\u30b9": [
249
+ "\u306f",
250
+ "\u3057\u3083\u304d\u3057\u3083\u304d\u30ec\u30bf\u30b9"
251
+ ]
252
+ },
253
+ "ADV+ADP": {
254
+ "\u3082\u3061\u3063\u3068": [
255
+ "\u3082\u3061\u3063",
256
+ "\u3068"
257
+ ]
258
+ },
259
+ "ADV+ADP+VERB": {
260
+ "\u3082\u3057\u304b\u3057": [
261
+ "\u3082\u3057",
262
+ "\u304b",
263
+ "\u3057"
264
+ ]
265
+ },
266
+ "AUX+AUX": {
267
+ "\u30c1\u30e3\u30c3\u30bf": [
268
+ "\u30c1\u30e3\u30c3",
269
+ "\u30bf"
270
+ ]
271
+ },
272
+ "AUX+AUX+AUX": {
273
+ "\u3060\u3063\u305f\u3089\u3057\u3044": [
274
+ "\u3060\u3063",
275
+ "\u305f",
276
+ "\u3089\u3057\u3044"
277
+ ],
278
+ "\u3060\u3063\u305f\u3089\u3057\u304f": [
279
+ "\u3060\u3063",
280
+ "\u305f",
281
+ "\u3089\u3057\u304f"
282
+ ]
283
+ },
284
+ "AUX+AUX+PART": {
285
+ "\u3066\u3044\u305f\u3060\u3051": [
286
+ "\u3066\u3044",
287
+ "\u305f",
288
+ "\u3060\u3051"
289
+ ]
290
+ },
291
+ "AUX+SCONJ": {
292
+ "\u3060\u3051\u3069": [
293
+ "\u3060",
294
+ "\u3051\u3069"
295
+ ],
296
+ "\u306e\u3060\u3051\u3069": [
297
+ "\u306e\u3060",
298
+ "\u3051\u3069"
299
+ ],
300
+ "\u3093\u3060\u3051\u3069": [
301
+ "\u3093\u3060",
302
+ "\u3051\u3069"
303
+ ]
304
+ },
305
+ "AUX+VERB": {
306
+ "\u306a\u304f\u306a\u3063": [
307
+ "\u306a\u304f",
308
+ "\u306a\u3063"
309
+ ],
310
+ "\u306a\u304f\u306a\u308a": [
311
+ "\u306a\u304f",
312
+ "\u306a\u308a"
313
+ ],
314
+ "\u306a\u304f\u306a\u308b": [
315
+ "\u306a\u304f",
316
+ "\u306a\u308b"
317
+ ]
318
+ },
319
+ "NOUN+ADJ": {
320
+ "\u60aa\u540d\u9ad8\u3044": [
321
+ "\u60aa\u540d",
322
+ "\u9ad8\u3044"
323
+ ]
324
+ },
325
+ "NOUN+ADP": {
326
+ "\u3072\u3068\u308a\u3067": [
327
+ "\u3072\u3068\u308a",
328
+ "\u3067"
329
+ ],
330
+ "\u4ee3\u8d70\u904b\u8ee2\u304c": [
331
+ "\u4ee3\u8d70\u904b\u8ee2",
332
+ "\u304c"
333
+ ],
334
+ "\u4f7f\u7528\u958b\u59cb\u307e\u3067": [
335
+ "\u4f7f\u7528\u958b\u59cb",
336
+ "\u307e\u3067"
337
+ ],
338
+ "\u516c\u8ee2\u304c": [
339
+ "\u516c\u8ee2",
340
+ "\u304c"
341
+ ],
342
+ "\u5171\u98df\u3044\u3082": [
343
+ "\u5171\u98df\u3044",
344
+ "\u3082"
345
+ ],
346
+ "\u9152\u6c17\u5e2f\u3073\u904b\u8ee2\u304c": [
347
+ "\u9152\u6c17\u5e2f\u3073\u904b\u8ee2",
348
+ "\u304c"
349
+ ]
350
+ },
351
+ "NOUN+AUX": {
352
+ "\u3072\u3068\u308a\u3067": [
353
+ "\u3072\u3068\u308a",
354
+ "\u3067"
355
+ ],
356
+ "\u601d\u3044\u3060": [
357
+ "\u601d\u3044",
358
+ "\u3060"
359
+ ]
360
+ },
361
+ "PRON+ADP": {
362
+ "\u306a\u3093\u304b": [
363
+ "\u306a\u3093",
364
+ "\u304b"
365
+ ]
366
+ },
367
+ "VERB+AUX": {
368
+ "\u3044\u308f\u308c": [
369
+ "\u3044\u308f",
370
+ "\u308c"
371
+ ],
372
+ "\u3044\u308f\u308c\u308b": [
373
+ "\u3044\u308f",
374
+ "\u308c\u308b"
375
+ ],
376
+ "\u304a\u3053\u306a\u308f\u308c": [
377
+ "\u304a\u3053\u306a\u308f",
378
+ "\u308c"
379
+ ],
380
+ "\u304b\u307e\u308f\u305a": [
381
+ "\u304b\u307e\u308f",
382
+ "\u305a"
383
+ ],
384
+ "\u3055\u3089\u308f\u308c": [
385
+ "\u3055\u3089\u308f",
386
+ "\u308c"
387
+ ],
388
+ "\u3057\u3053\u307e\u305b": [
389
+ "\u3057\u3053\u307e",
390
+ "\u305b"
391
+ ],
392
+ "\u3059\u3079\u304d": [
393
+ "\u3059",
394
+ "\u3079\u304d"
395
+ ],
396
+ "\u306a\u3055\u308c": [
397
+ "\u306a\u3055",
398
+ "\u308c"
399
+ ],
400
+ "\u306a\u3055\u308c\u308b": [
401
+ "\u306a\u3055",
402
+ "\u308c\u308b"
403
+ ],
404
+ "\u306a\u3058\u307e\u305b": [
405
+ "\u306a\u3058\u307e",
406
+ "\u305b"
407
+ ],
408
+ "\u306a\u3089\u305a": [
409
+ "\u306a\u3089",
410
+ "\u305a"
411
+ ],
412
+ "\u307f\u306a\u3055\u308c": [
413
+ "\u307f\u306a\u3055",
414
+ "\u308c"
415
+ ],
416
+ "\u30c1\u30a7\u30c3\u30af\u3059\u3079\u304f": [
417
+ "\u30c1\u30a7\u30c3\u30af\u3059",
418
+ "\u3079\u304f"
419
+ ],
420
+ "\u4e0b\u3055\u308c\u308b": [
421
+ "\u4e0b\u3055",
422
+ "\u308c\u308b"
423
+ ],
424
+ "\u4ecb\u5165\u3059\u3079\u3057": [
425
+ "\u4ecb\u5165\u3059",
426
+ "\u3079\u3057"
427
+ ],
428
+ "\u4ed5\u821e\u308f\u308c": [
429
+ "\u4ed5\u821e\u308f",
430
+ "\u308c"
431
+ ],
432
+ "\u4f11\u307e\u305b": [
433
+ "\u4f11\u307e",
434
+ "\u305b"
435
+ ],
436
+ "\u4f34\u308f\u308c": [
437
+ "\u4f34\u308f",
438
+ "\u308c"
439
+ ],
440
+ "\u5145\u5f53\u3059\u3079\u304f": [
441
+ "\u5145\u5f53\u3059",
442
+ "\u3079\u304f"
443
+ ],
444
+ "\u5165\u3063\u3061\u3083\u3044": [
445
+ "\u5165\u3063",
446
+ "\u3061\u3083\u3044"
447
+ ],
448
+ "\u52d5\u304b\u305b": [
449
+ "\u52d5\u304b",
450
+ "\u305b"
451
+ ],
452
+ "\u533f\u308f\u308c": [
453
+ "\u533f\u308f",
454
+ "\u308c"
455
+ ],
456
+ "\u542b\u307e\u305b": [
457
+ "\u542b\u307e",
458
+ "\u305b"
459
+ ],
460
+ "\u548c\u307e\u305b": [
461
+ "\u548c\u307e",
462
+ "\u305b"
463
+ ],
464
+ "\u554f\u308f\u305a": [
465
+ "\u554f\u308f",
466
+ "\u305a"
467
+ ],
468
+ "\u554f\u308f\u308c": [
469
+ "\u554f\u308f",
470
+ "\u308c"
471
+ ],
472
+ "\u554f\u308f\u308c\u308b": [
473
+ "\u554f\u308f",
474
+ "\u308c\u308b"
475
+ ],
476
+ "\u596a\u308f\u308c": [
477
+ "\u596a\u308f",
478
+ "\u308c"
479
+ ],
480
+ "\u596a\u308f\u308c\u308b": [
481
+ "\u596a\u308f",
482
+ "\u308c\u308b"
483
+ ],
484
+ "\u5acc\u308f\u308c": [
485
+ "\u5acc\u308f",
486
+ "\u308c"
487
+ ],
488
+ "\u601d\u3063\u3061\u3083\u3044": [
489
+ "\u601d\u3063",
490
+ "\u3061\u3083\u3044"
491
+ ],
492
+ "\u601d\u3063\u3061\u3083\u3046": [
493
+ "\u601d\u3063",
494
+ "\u3061\u3083\u3046"
495
+ ],
496
+ "\u60a9\u307e\u305b": [
497
+ "\u60a9\u307e",
498
+ "\u305b"
499
+ ],
500
+ "\u60a9\u307e\u305b\u308b": [
501
+ "\u60a9\u307e",
502
+ "\u305b\u308b"
503
+ ],
504
+ "\u6271\u308f\u308c": [
505
+ "\u6271\u308f",
506
+ "\u308c"
507
+ ],
508
+ "\u6271\u308f\u308c\u308b": [
509
+ "\u6271\u308f",
510
+ "\u308c\u308b"
511
+ ],
512
+ "\u6279\u5224\u3059\u3079\u304d": [
513
+ "\u6279\u5224\u3059",
514
+ "\u3079\u304d"
515
+ ],
516
+ "\u6392\u9664\u3059\u3079\u304d": [
517
+ "\u6392\u9664\u3059",
518
+ "\u3079\u304d"
519
+ ],
520
+ "\u6458\u767a\u3059\u3079\u304d": [
521
+ "\u6458\u767a\u3059",
522
+ "\u3079\u304d"
523
+ ],
524
+ "\u6551\u308f\u308c\u308b": [
525
+ "\u6551\u308f",
526
+ "\u308c\u308b"
527
+ ],
528
+ "\u66ae\u3089\u3059\u3079\u304d": [
529
+ "\u66ae\u3089\u3059",
530
+ "\u3079\u304d"
531
+ ],
532
+ "\u679c\u305f\u3059\u3079\u304f": [
533
+ "\u679c\u305f\u3059",
534
+ "\u3079\u304f"
535
+ ],
536
+ "\u69cb\u308f\u305a": [
537
+ "\u69cb\u308f",
538
+ "\u305a"
539
+ ],
540
+ "\u6b4c\u308f\u308c": [
541
+ "\u6b4c\u308f",
542
+ "\u308c"
543
+ ],
544
+ "\u6b6a\u307e\u305b\u308b": [
545
+ "\u6b6a\u307e",
546
+ "\u305b\u308b"
547
+ ],
548
+ "\u6e08\u307e\u305b": [
549
+ "\u6e08\u307e",
550
+ "\u305b"
551
+ ],
552
+ "\u72d9\u308f\u308c": [
553
+ "\u72d9\u308f",
554
+ "\u308c"
555
+ ],
556
+ "\u751f\u304b\u3059\u3079\u304f": [
557
+ "\u751f\u304b\u3059",
558
+ "\u3079\u304f"
559
+ ],
560
+ "\u77e5\u3089\u305b": [
561
+ "\u77e5\u3089",
562
+ "\u305b"
563
+ ],
564
+ "\u77e5\u3089\u305b\u308b": [
565
+ "\u77e5\u3089",
566
+ "\u305b\u308b"
567
+ ],
568
+ "\u7d42\u308f\u3063\u3061\u3083\u3046": [
569
+ "\u7d42\u308f\u3063",
570
+ "\u3061\u3083\u3046"
571
+ ],
572
+ "\u7d71\u4e00\u3059\u3079\u304f": [
573
+ "\u7d71\u4e00\u3059",
574
+ "\u3079\u304f"
575
+ ],
576
+ "\u884c\u3063\u3061\u3083\u3044": [
577
+ "\u884c\u3063",
578
+ "\u3061\u3083\u3044"
579
+ ],
580
+ "\u884c\u308f\u305a": [
581
+ "\u884c\u308f",
582
+ "\u305a"
583
+ ],
584
+ "\u884c\u308f\u308c": [
585
+ "\u884c\u308f",
586
+ "\u308c"
587
+ ],
588
+ "\u884c\u308f\u308c\u308b": [
589
+ "\u884c\u308f",
590
+ "\u308c\u308b"
591
+ ],
592
+ "\u88ab\u308f\u308c\u308b": [
593
+ "\u88ab\u308f",
594
+ "\u308c\u308b"
595
+ ],
596
+ "\u8972\u308f\u308c": [
597
+ "\u8972\u308f",
598
+ "\u308c"
599
+ ],
600
+ "\u8986\u308f\u308c": [
601
+ "\u8986\u308f",
602
+ "\u308c"
603
+ ],
604
+ "\u898b\u306a\u3055\u308c": [
605
+ "\u898b\u306a\u3055",
606
+ "\u308c"
607
+ ],
608
+ "\u898b\u821e\u308f\u308c": [
609
+ "\u898b\u821e\u308f",
610
+ "\u308c"
611
+ ],
612
+ "\u89e3\u304d\u660e\u304b\u3059\u3079\u304d": [
613
+ "\u89e3\u304d\u660e\u304b\u3059",
614
+ "\u3079\u304d"
615
+ ],
616
+ "\u8a60\u308f\u308c": [
617
+ "\u8a60\u308f",
618
+ "\u308c"
619
+ ],
620
+ "\u8a98\u308f\u308c": [
621
+ "\u8a98\u308f",
622
+ "\u308c"
623
+ ],
624
+ "\u8aac\u660e\u3059\u3079\u304d": [
625
+ "\u8aac\u660e\u3059",
626
+ "\u3079\u304d"
627
+ ],
628
+ "\u8cb7\u3063\u3061\u3083\u3044": [
629
+ "\u8cb7\u3063",
630
+ "\u3061\u3083\u3044"
631
+ ],
632
+ "\u8e0f\u307e\u305b": [
633
+ "\u8e0f\u307e",
634
+ "\u305b"
635
+ ],
636
+ "\u8f9e\u8077\u3059\u3079\u304d": [
637
+ "\u8f9e\u8077\u3059",
638
+ "\u3079\u304d"
639
+ ],
640
+ "\u990a\u308f\u308c\u308b": [
641
+ "\u990a\u308f",
642
+ "\u308c\u308b"
643
+ ],
644
+ "\u9cf4\u3089\u3059\u3079\u304f": [
645
+ "\u9cf4\u3089\u3059",
646
+ "\u3079\u304f"
647
+ ]
648
+ },
649
+ "VERB+AUX+AUX": {
650
+ "\u306a\u3063\u305f\u3089\u3057\u3044": [
651
+ "\u306a\u3063",
652
+ "\u305f",
653
+ "\u3089\u3057\u3044"
654
+ ],
655
+ "\u601d\u3063\u305f\u3089\u3057\u304f": [
656
+ "\u601d\u3063",
657
+ "\u305f",
658
+ "\u3089\u3057\u304f"
659
+ ],
660
+ "\u8cb7\u3063\u305f\u3089\u3057\u304f": [
661
+ "\u8cb7\u3063",
662
+ "\u305f",
663
+ "\u3089\u3057\u304f"
664
+ ]
665
+ },
666
+ "VERB+AUX+NOUN": {
667
+ "\u5909\u66f4\u3057\u305f\u305f\u3081": [
668
+ "\u5909\u66f4\u3057",
669
+ "\u305f",
670
+ "\u305f\u3081"
671
+ ],
672
+ "\u5931\u6557\u3057\u305f\u305f\u3081": [
673
+ "\u5931\u6557\u3057",
674
+ "\u305f",
675
+ "\u305f\u3081"
676
+ ],
677
+ "\u5bfe\u7acb\u3057\u305f\u305f\u3081": [
678
+ "\u5bfe\u7acb\u3057",
679
+ "\u305f",
680
+ "\u305f\u3081"
681
+ ],
682
+ "\u6f5c\u5165\u3057\u305f\u305f\u3081": [
683
+ "\u6f5c\u5165\u3057",
684
+ "\u305f",
685
+ "\u305f\u3081"
686
+ ],
687
+ "\u8131\u8d70\u3057\u305f\u305f\u3081": [
688
+ "\u8131\u8d70\u3057",
689
+ "\u305f",
690
+ "\u305f\u3081"
691
+ ],
692
+ "\u8868\u660e\u3057\u305f\u305f\u3081": [
693
+ "\u8868\u660e\u3057",
694
+ "\u305f",
695
+ "\u305f\u3081"
696
+ ],
697
+ "\u8981\u8acb\u3057\u305f\u305f\u3081": [
698
+ "\u8981\u8acb\u3057",
699
+ "\u305f",
700
+ "\u305f\u3081"
701
+ ]
702
+ },
703
+ "VERB+AUX+PART": {
704
+ "\u3042\u3063\u305f\u304b": [
705
+ "\u3042\u3063",
706
+ "\u305f",
707
+ "\u304b"
708
+ ]
709
+ },
710
+ "VERB+AUX+SCONJ": {
711
+ "\u592d\u6298\u3057\u305f\u305f\u3081\u306b": [
712
+ "\u592d\u6298\u3057",
713
+ "\u305f",
714
+ "\u305f\u3081\u306b"
715
+ ],
716
+ "\u5fb4\u767a\u3057\u305f\u305f\u3081\u306b": [
717
+ "\u5fb4\u767a\u3057",
718
+ "\u305f",
719
+ "\u305f\u3081\u306b"
720
+ ],
721
+ "\u62d2\u5426\u3057\u305f\u305f\u3081\u306b": [
722
+ "\u62d2\u5426\u3057",
723
+ "\u305f",
724
+ "\u305f\u3081\u306b"
725
+ ]
726
+ }
727
+ }
728
+ },
729
+ "tokenizer_class": "DebertaV2TokenizerFast",
730
+ "torch_dtype": "float32",
731
+ "transformers_version": "4.19.2",
732
+ "type_vocab_size": 0,
733
+ "vocab_size": 32000
734
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:967ca0bfeb703b0f683e69f84c37ad318b3fe0b80b19e5f8af34e667a8e6acec
3
+ size 1342932979
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
3
+ size 1
supar.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b414f2e7b443f8e2b4c5e40e684fe1c2c9791e4a863cf60a840354c7cc47ea4
3
+ size 1391428843
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": true, "keep_accents": true, "model_max_length": 512, "tokenizer_class": "DebertaV2TokenizerFast"}