KoichiYasuoka commited on
Commit
e1dfa8e
1 Parent(s): 0f0b59f

initial release

Browse files
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - "ja"
4
+ tags:
5
+ - "japanese"
6
+ - "token-classification"
7
+ - "pos"
8
+ - "dependency-parsing"
9
+ datasets:
10
+ - "universal_dependencies"
11
+ license: "cc-by-sa-4.0"
12
+ pipeline_tag: "token-classification"
13
+ widget:
14
+ - text: "国境の長いトンネルを抜けると雪国であった。"
15
+ ---
16
+
17
+ # deberta-base-japanese-wikipedia-luw-upos
18
+
19
+ ## Model Description
20
+
21
+ This is a DeBERTa(V2) model pre-trained on Japanese Wikipedia and 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-base-japanese-wikipedia](https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech).
22
+
23
+ ## How to Use
24
+
25
+ ```py
26
+ import torch
27
+ from transformers import AutoTokenizer,AutoModelForTokenClassification
28
+ tokenizer=AutoTokenizer.from_pretrained("KoichiYasuoka/deberta-base-japanese-wikipedia-luw-upos")
29
+ model=AutoModelForTokenClassification.from_pretrained("KoichiYasuoka/deberta-base-japanese-wikipedia-luw-upos")
30
+ s="国境の長いトンネルを抜けると雪国であった。"
31
+ t=tokenizer.tokenize(s)
32
+ p=[model.config.id2label[q] for q in torch.argmax(model(tokenizer.encode(s,return_tensors="pt"))["logits"],dim=2)[0].tolist()[1:-1]]
33
+ print(list(zip(t,p)))
34
+ ```
35
+
36
+ or
37
+
38
+ ```py
39
+ import esupar
40
+ nlp=esupar.load("KoichiYasuoka/deberta-base-japanese-wikipedia-luw-upos")
41
+ print(nlp("国境の長いトンネルを抜けると雪国であった。"))
42
+ ```
43
+
44
+ ## See Also
45
+
46
+ [esupar](https://github.com/KoichiYasuoka/esupar): Tokenizer POS-tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models
47
+
config.json ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "ADJ",
13
+ "1": "ADJ+AUX",
14
+ "2": "ADP",
15
+ "3": "ADV",
16
+ "4": "AUX",
17
+ "5": "AUX+VERB",
18
+ "6": "B-ADJ",
19
+ "7": "B-ADJ+ADJ",
20
+ "8": "B-ADJ+AUX+AUX",
21
+ "9": "B-ADJ+VERB",
22
+ "10": "B-ADP",
23
+ "11": "B-ADP+NOUN",
24
+ "12": "B-ADV",
25
+ "13": "B-ADV+ADP",
26
+ "14": "B-ADV+ADP+VERB",
27
+ "15": "B-AUX",
28
+ "16": "B-AUX+AUX+AUX",
29
+ "17": "B-AUX+AUX+PART",
30
+ "18": "B-AUX+SCONJ",
31
+ "19": "B-AUX+VERB",
32
+ "20": "B-CCONJ",
33
+ "21": "B-DET",
34
+ "22": "B-INTJ",
35
+ "23": "B-NOUN",
36
+ "24": "B-NOUN+ADJ",
37
+ "25": "B-NOUN+ADP",
38
+ "26": "B-NUM",
39
+ "27": "B-PART",
40
+ "28": "B-PRON",
41
+ "29": "B-PROPN",
42
+ "30": "B-PUNCT",
43
+ "31": "B-SCONJ",
44
+ "32": "B-SYM",
45
+ "33": "B-VERB",
46
+ "34": "B-VERB+AUX",
47
+ "35": "B-VERB+AUX+AUX",
48
+ "36": "B-VERB+AUX+NOUN",
49
+ "37": "B-VERB+AUX+PART",
50
+ "38": "B-VERB+AUX+SCONJ",
51
+ "39": "B-X",
52
+ "40": "CCONJ",
53
+ "41": "DET",
54
+ "42": "I-ADJ",
55
+ "43": "I-ADJ+ADJ",
56
+ "44": "I-ADJ+AUX+AUX",
57
+ "45": "I-ADJ+VERB",
58
+ "46": "I-ADP",
59
+ "47": "I-ADP+NOUN",
60
+ "48": "I-ADV",
61
+ "49": "I-ADV+ADP",
62
+ "50": "I-ADV+ADP+VERB",
63
+ "51": "I-AUX",
64
+ "52": "I-AUX+AUX+AUX",
65
+ "53": "I-AUX+AUX+PART",
66
+ "54": "I-AUX+SCONJ",
67
+ "55": "I-AUX+VERB",
68
+ "56": "I-CCONJ",
69
+ "57": "I-DET",
70
+ "58": "I-INTJ",
71
+ "59": "I-NOUN",
72
+ "60": "I-NOUN+ADJ",
73
+ "61": "I-NOUN+ADP",
74
+ "62": "I-NUM",
75
+ "63": "I-PART",
76
+ "64": "I-PRON",
77
+ "65": "I-PROPN",
78
+ "66": "I-PUNCT",
79
+ "67": "I-SCONJ",
80
+ "68": "I-SYM",
81
+ "69": "I-VERB",
82
+ "70": "I-VERB+AUX",
83
+ "71": "I-VERB+AUX+AUX",
84
+ "72": "I-VERB+AUX+NOUN",
85
+ "73": "I-VERB+AUX+PART",
86
+ "74": "I-VERB+AUX+SCONJ",
87
+ "75": "I-X",
88
+ "76": "INTJ",
89
+ "77": "NOUN",
90
+ "78": "NOUN+ADP",
91
+ "79": "NOUN+AUX",
92
+ "80": "NUM",
93
+ "81": "PART",
94
+ "82": "PRON",
95
+ "83": "PRON+ADP",
96
+ "84": "PROPN",
97
+ "85": "PUNCT",
98
+ "86": "SCONJ",
99
+ "87": "SYM",
100
+ "88": "VERB",
101
+ "89": "VERB+AUX",
102
+ "90": "X"
103
+ },
104
+ "initializer_range": 0.02,
105
+ "intermediate_size": 3072,
106
+ "label2id": {
107
+ "ADJ": 0,
108
+ "ADJ+AUX": 1,
109
+ "ADP": 2,
110
+ "ADV": 3,
111
+ "AUX": 4,
112
+ "AUX+VERB": 5,
113
+ "B-ADJ": 6,
114
+ "B-ADJ+ADJ": 7,
115
+ "B-ADJ+AUX+AUX": 8,
116
+ "B-ADJ+VERB": 9,
117
+ "B-ADP": 10,
118
+ "B-ADP+NOUN": 11,
119
+ "B-ADV": 12,
120
+ "B-ADV+ADP": 13,
121
+ "B-ADV+ADP+VERB": 14,
122
+ "B-AUX": 15,
123
+ "B-AUX+AUX+AUX": 16,
124
+ "B-AUX+AUX+PART": 17,
125
+ "B-AUX+SCONJ": 18,
126
+ "B-AUX+VERB": 19,
127
+ "B-CCONJ": 20,
128
+ "B-DET": 21,
129
+ "B-INTJ": 22,
130
+ "B-NOUN": 23,
131
+ "B-NOUN+ADJ": 24,
132
+ "B-NOUN+ADP": 25,
133
+ "B-NUM": 26,
134
+ "B-PART": 27,
135
+ "B-PRON": 28,
136
+ "B-PROPN": 29,
137
+ "B-PUNCT": 30,
138
+ "B-SCONJ": 31,
139
+ "B-SYM": 32,
140
+ "B-VERB": 33,
141
+ "B-VERB+AUX": 34,
142
+ "B-VERB+AUX+AUX": 35,
143
+ "B-VERB+AUX+NOUN": 36,
144
+ "B-VERB+AUX+PART": 37,
145
+ "B-VERB+AUX+SCONJ": 38,
146
+ "B-X": 39,
147
+ "CCONJ": 40,
148
+ "DET": 41,
149
+ "I-ADJ": 42,
150
+ "I-ADJ+ADJ": 43,
151
+ "I-ADJ+AUX+AUX": 44,
152
+ "I-ADJ+VERB": 45,
153
+ "I-ADP": 46,
154
+ "I-ADP+NOUN": 47,
155
+ "I-ADV": 48,
156
+ "I-ADV+ADP": 49,
157
+ "I-ADV+ADP+VERB": 50,
158
+ "I-AUX": 51,
159
+ "I-AUX+AUX+AUX": 52,
160
+ "I-AUX+AUX+PART": 53,
161
+ "I-AUX+SCONJ": 54,
162
+ "I-AUX+VERB": 55,
163
+ "I-CCONJ": 56,
164
+ "I-DET": 57,
165
+ "I-INTJ": 58,
166
+ "I-NOUN": 59,
167
+ "I-NOUN+ADJ": 60,
168
+ "I-NOUN+ADP": 61,
169
+ "I-NUM": 62,
170
+ "I-PART": 63,
171
+ "I-PRON": 64,
172
+ "I-PROPN": 65,
173
+ "I-PUNCT": 66,
174
+ "I-SCONJ": 67,
175
+ "I-SYM": 68,
176
+ "I-VERB": 69,
177
+ "I-VERB+AUX": 70,
178
+ "I-VERB+AUX+AUX": 71,
179
+ "I-VERB+AUX+NOUN": 72,
180
+ "I-VERB+AUX+PART": 73,
181
+ "I-VERB+AUX+SCONJ": 74,
182
+ "I-X": 75,
183
+ "INTJ": 76,
184
+ "NOUN": 77,
185
+ "NOUN+ADP": 78,
186
+ "NOUN+AUX": 79,
187
+ "NUM": 80,
188
+ "PART": 81,
189
+ "PRON": 82,
190
+ "PRON+ADP": 83,
191
+ "PROPN": 84,
192
+ "PUNCT": 85,
193
+ "SCONJ": 86,
194
+ "SYM": 87,
195
+ "VERB": 88,
196
+ "VERB+AUX": 89,
197
+ "X": 90
198
+ },
199
+ "layer_norm_eps": 1e-07,
200
+ "max_position_embeddings": 512,
201
+ "max_relative_positions": -1,
202
+ "model_type": "deberta-v2",
203
+ "num_attention_heads": 12,
204
+ "num_hidden_layers": 12,
205
+ "pad_token_id": 1,
206
+ "pooler_dropout": 0,
207
+ "pooler_hidden_act": "gelu",
208
+ "pooler_hidden_size": 768,
209
+ "pos_att_type": null,
210
+ "position_biased_input": true,
211
+ "relative_attention": false,
212
+ "task_specific_params": {
213
+ "upos_multiword": {
214
+ "ADJ+ADJ": {
215
+ "\u5c11\u306a\u304f\u306a\u3044": [
216
+ "\u5c11\u306a\u304f",
217
+ "\u306a\u3044"
218
+ ]
219
+ },
220
+ "ADJ+AUX": {
221
+ "\u679c\u6562\u306a": [
222
+ "\u679c\u6562",
223
+ "\u306a"
224
+ ]
225
+ },
226
+ "ADJ+AUX+AUX": {
227
+ "\u591a\u304b\u3063\u305f\u3089\u3057\u304f": [
228
+ "\u591a\u304b\u3063",
229
+ "\u305f",
230
+ "\u3089\u3057\u304f"
231
+ ]
232
+ },
233
+ "ADJ+VERB": {
234
+ "\u306a\u304f\u306a\u308b": [
235
+ "\u306a\u304f",
236
+ "\u306a\u308b"
237
+ ],
238
+ "\u5c11\u306a\u304f\u306a\u3063": [
239
+ "\u5c11\u306a\u304f",
240
+ "\u306a\u3063"
241
+ ]
242
+ },
243
+ "ADP+NOUN": {
244
+ "\u306f\u3057\u3083\u304d\u3057\u3083\u304d\u30ec\u30bf\u30b9": [
245
+ "\u306f",
246
+ "\u3057\u3083\u304d\u3057\u3083\u304d\u30ec\u30bf\u30b9"
247
+ ]
248
+ },
249
+ "ADV+ADP": {
250
+ "\u3082\u3061\u3063\u3068": [
251
+ "\u3082\u3061\u3063",
252
+ "\u3068"
253
+ ]
254
+ },
255
+ "ADV+ADP+VERB": {
256
+ "\u3082\u3057\u304b\u3057": [
257
+ "\u3082\u3057",
258
+ "\u304b",
259
+ "\u3057"
260
+ ]
261
+ },
262
+ "AUX+AUX+AUX": {
263
+ "\u3060\u3063\u305f\u3089\u3057\u3044": [
264
+ "\u3060\u3063",
265
+ "\u305f",
266
+ "\u3089\u3057\u3044"
267
+ ],
268
+ "\u3060\u3063\u305f\u3089\u3057\u304f": [
269
+ "\u3060\u3063",
270
+ "\u305f",
271
+ "\u3089\u3057\u304f"
272
+ ]
273
+ },
274
+ "AUX+AUX+PART": {
275
+ "\u3066\u3044\u305f\u3060\u3051": [
276
+ "\u3066\u3044",
277
+ "\u305f",
278
+ "\u3060\u3051"
279
+ ]
280
+ },
281
+ "AUX+SCONJ": {
282
+ "\u3060\u3051\u3069": [
283
+ "\u3060",
284
+ "\u3051\u3069"
285
+ ],
286
+ "\u306e\u3060\u3051\u3069": [
287
+ "\u306e\u3060",
288
+ "\u3051\u3069"
289
+ ],
290
+ "\u3093\u3060\u3051\u3069": [
291
+ "\u3093\u3060",
292
+ "\u3051\u3069"
293
+ ]
294
+ },
295
+ "AUX+VERB": {
296
+ "\u306a\u304f\u306a\u3063": [
297
+ "\u306a\u304f",
298
+ "\u306a\u3063"
299
+ ],
300
+ "\u306a\u304f\u306a\u308a": [
301
+ "\u306a\u304f",
302
+ "\u306a\u308a"
303
+ ],
304
+ "\u306a\u304f\u306a\u308b": [
305
+ "\u306a\u304f",
306
+ "\u306a\u308b"
307
+ ]
308
+ },
309
+ "NOUN+ADJ": {
310
+ "\u60aa\u540d\u9ad8\u3044": [
311
+ "\u60aa\u540d",
312
+ "\u9ad8\u3044"
313
+ ]
314
+ },
315
+ "NOUN+ADP": {
316
+ "\u3072\u3068\u308a\u3067": [
317
+ "\u3072\u3068\u308a",
318
+ "\u3067"
319
+ ],
320
+ "\u4ee3\u8d70\u904b\u8ee2\u304c": [
321
+ "\u4ee3\u8d70\u904b\u8ee2",
322
+ "\u304c"
323
+ ],
324
+ "\u4f7f\u7528\u958b\u59cb\u307e\u3067": [
325
+ "\u4f7f\u7528\u958b\u59cb",
326
+ "\u307e\u3067"
327
+ ],
328
+ "\u516c\u8ee2\u304c": [
329
+ "\u516c\u8ee2",
330
+ "\u304c"
331
+ ],
332
+ "\u5171\u98df\u3044\u3082": [
333
+ "\u5171\u98df\u3044",
334
+ "\u3082"
335
+ ],
336
+ "\u9152\u6c17\u5e2f\u3073\u904b\u8ee2\u304c": [
337
+ "\u9152\u6c17\u5e2f\u3073\u904b\u8ee2",
338
+ "\u304c"
339
+ ]
340
+ },
341
+ "NOUN+AUX": {
342
+ "\u3072\u3068\u308a\u3067": [
343
+ "\u3072\u3068\u308a",
344
+ "\u3067"
345
+ ],
346
+ "\u601d\u3044\u3060": [
347
+ "\u601d\u3044",
348
+ "\u3060"
349
+ ]
350
+ },
351
+ "PRON+ADP": {
352
+ "\u306a\u3093\u304b": [
353
+ "\u306a\u3093",
354
+ "\u304b"
355
+ ]
356
+ },
357
+ "VERB+AUX": {
358
+ "\u3044\u308f\u308c": [
359
+ "\u3044\u308f",
360
+ "\u308c"
361
+ ],
362
+ "\u3044\u308f\u308c\u308b": [
363
+ "\u3044\u308f",
364
+ "\u308c\u308b"
365
+ ],
366
+ "\u304a\u3053\u306a\u308f\u308c": [
367
+ "\u304a\u3053\u306a\u308f",
368
+ "\u308c"
369
+ ],
370
+ "\u304b\u307e\u308f\u305a": [
371
+ "\u304b\u307e\u308f",
372
+ "\u305a"
373
+ ],
374
+ "\u3055\u3089\u308f\u308c": [
375
+ "\u3055\u3089\u308f",
376
+ "\u308c"
377
+ ],
378
+ "\u3057\u3053\u307e\u305b": [
379
+ "\u3057\u3053\u307e",
380
+ "\u305b"
381
+ ],
382
+ "\u3059\u3079\u304d": [
383
+ "\u3059",
384
+ "\u3079\u304d"
385
+ ],
386
+ "\u306a\u3055\u308c": [
387
+ "\u306a\u3055",
388
+ "\u308c"
389
+ ],
390
+ "\u306a\u3055\u308c\u308b": [
391
+ "\u306a\u3055",
392
+ "\u308c\u308b"
393
+ ],
394
+ "\u306a\u3058\u307e\u305b": [
395
+ "\u306a\u3058\u307e",
396
+ "\u305b"
397
+ ],
398
+ "\u306a\u3089\u305a": [
399
+ "\u306a\u3089",
400
+ "\u305a"
401
+ ],
402
+ "\u307f\u306a\u3055\u308c": [
403
+ "\u307f\u306a\u3055",
404
+ "\u308c"
405
+ ],
406
+ "\u30c1\u30a7\u30c3\u30af\u3059\u3079\u304f": [
407
+ "\u30c1\u30a7\u30c3\u30af\u3059",
408
+ "\u3079\u304f"
409
+ ],
410
+ "\u30e4\u30e9\u30ec": [
411
+ "\u30e4\u30e9",
412
+ "\u30ec"
413
+ ],
414
+ "\u4e0b\u3055\u308c\u308b": [
415
+ "\u4e0b\u3055",
416
+ "\u308c\u308b"
417
+ ],
418
+ "\u4ecb\u5165\u3059\u3079\u3057": [
419
+ "\u4ecb\u5165\u3059",
420
+ "\u3079\u3057"
421
+ ],
422
+ "\u4ed5\u821e\u308f\u308c": [
423
+ "\u4ed5\u821e\u308f",
424
+ "\u308c"
425
+ ],
426
+ "\u4f11\u307e\u305b": [
427
+ "\u4f11\u307e",
428
+ "\u305b"
429
+ ],
430
+ "\u4f34\u308f\u308c": [
431
+ "\u4f34\u308f",
432
+ "\u308c"
433
+ ],
434
+ "\u5145\u5f53\u3059\u3079\u304f": [
435
+ "\u5145\u5f53\u3059",
436
+ "\u3079\u304f"
437
+ ],
438
+ "\u5165\u3063\u3061\u3083\u3044": [
439
+ "\u5165\u3063",
440
+ "\u3061\u3083\u3044"
441
+ ],
442
+ "\u52d5\u304b\u305b": [
443
+ "\u52d5\u304b",
444
+ "\u305b"
445
+ ],
446
+ "\u533f\u308f\u308c": [
447
+ "\u533f\u308f",
448
+ "\u308c"
449
+ ],
450
+ "\u542b\u307e\u305b": [
451
+ "\u542b\u307e",
452
+ "\u305b"
453
+ ],
454
+ "\u548c\u307e\u305b": [
455
+ "\u548c\u307e",
456
+ "\u305b"
457
+ ],
458
+ "\u554f\u308f\u305a": [
459
+ "\u554f\u308f",
460
+ "\u305a"
461
+ ],
462
+ "\u554f\u308f\u308c": [
463
+ "\u554f\u308f",
464
+ "\u308c"
465
+ ],
466
+ "\u554f\u308f\u308c\u308b": [
467
+ "\u554f\u308f",
468
+ "\u308c\u308b"
469
+ ],
470
+ "\u596a\u308f\u308c": [
471
+ "\u596a\u308f",
472
+ "\u308c"
473
+ ],
474
+ "\u596a\u308f\u308c\u308b": [
475
+ "\u596a\u308f",
476
+ "\u308c\u308b"
477
+ ],
478
+ "\u5acc\u308f\u308c": [
479
+ "\u5acc\u308f",
480
+ "\u308c"
481
+ ],
482
+ "\u601d\u3063\u3061\u3083\u3044": [
483
+ "\u601d\u3063",
484
+ "\u3061\u3083\u3044"
485
+ ],
486
+ "\u601d\u3063\u3061\u3083\u3046": [
487
+ "\u601d\u3063",
488
+ "\u3061\u3083\u3046"
489
+ ],
490
+ "\u60a9\u307e\u305b": [
491
+ "\u60a9\u307e",
492
+ "\u305b"
493
+ ],
494
+ "\u60a9\u307e\u305b\u308b": [
495
+ "\u60a9\u307e",
496
+ "\u305b\u308b"
497
+ ],
498
+ "\u6271\u308f\u308c": [
499
+ "\u6271\u308f",
500
+ "\u308c"
501
+ ],
502
+ "\u6271\u308f\u308c\u308b": [
503
+ "\u6271\u308f",
504
+ "\u308c\u308b"
505
+ ],
506
+ "\u6279\u5224\u3059\u3079\u304d": [
507
+ "\u6279\u5224\u3059",
508
+ "\u3079\u304d"
509
+ ],
510
+ "\u6392\u9664\u3059\u3079\u304d": [
511
+ "\u6392\u9664\u3059",
512
+ "\u3079\u304d"
513
+ ],
514
+ "\u6458\u767a\u3059\u3079\u304d": [
515
+ "\u6458\u767a\u3059",
516
+ "\u3079\u304d"
517
+ ],
518
+ "\u6551\u308f\u308c\u308b": [
519
+ "\u6551\u308f",
520
+ "\u308c\u308b"
521
+ ],
522
+ "\u66ae\u3089\u3059\u3079\u304d": [
523
+ "\u66ae\u3089\u3059",
524
+ "\u3079\u304d"
525
+ ],
526
+ "\u679c\u305f\u3059\u3079\u304f": [
527
+ "\u679c\u305f\u3059",
528
+ "\u3079\u304f"
529
+ ],
530
+ "\u69cb\u308f\u305a": [
531
+ "\u69cb\u308f",
532
+ "\u305a"
533
+ ],
534
+ "\u6b4c\u308f\u308c": [
535
+ "\u6b4c\u308f",
536
+ "\u308c"
537
+ ],
538
+ "\u6b6a\u307e\u305b\u308b": [
539
+ "\u6b6a\u307e",
540
+ "\u305b\u308b"
541
+ ],
542
+ "\u6e08\u307e\u305b": [
543
+ "\u6e08\u307e",
544
+ "\u305b"
545
+ ],
546
+ "\u72d9\u308f\u308c": [
547
+ "\u72d9\u308f",
548
+ "\u308c"
549
+ ],
550
+ "\u751f\u304b\u3059\u3079\u304f": [
551
+ "\u751f\u304b\u3059",
552
+ "\u3079\u304f"
553
+ ],
554
+ "\u77e5\u3089\u305b": [
555
+ "\u77e5\u3089",
556
+ "\u305b"
557
+ ],
558
+ "\u77e5\u3089\u305b\u308b": [
559
+ "\u77e5\u3089",
560
+ "\u305b\u308b"
561
+ ],
562
+ "\u7d42\u308f\u3063\u3061\u3083\u3046": [
563
+ "\u7d42\u308f\u3063",
564
+ "\u3061\u3083\u3046"
565
+ ],
566
+ "\u7d71\u4e00\u3059\u3079\u304f": [
567
+ "\u7d71\u4e00\u3059",
568
+ "\u3079\u304f"
569
+ ],
570
+ "\u884c\u3063\u3061\u3083\u3044": [
571
+ "\u884c\u3063",
572
+ "\u3061\u3083\u3044"
573
+ ],
574
+ "\u884c\u308f\u305a": [
575
+ "\u884c\u308f",
576
+ "\u305a"
577
+ ],
578
+ "\u884c\u308f\u308c": [
579
+ "\u884c\u308f",
580
+ "\u308c"
581
+ ],
582
+ "\u884c\u308f\u308c\u308b": [
583
+ "\u884c\u308f",
584
+ "\u308c\u308b"
585
+ ],
586
+ "\u88ab\u308f\u308c\u308b": [
587
+ "\u88ab\u308f",
588
+ "\u308c\u308b"
589
+ ],
590
+ "\u8972\u308f\u308c": [
591
+ "\u8972\u308f",
592
+ "\u308c"
593
+ ],
594
+ "\u8986\u308f\u308c": [
595
+ "\u8986\u308f",
596
+ "\u308c"
597
+ ],
598
+ "\u898b\u306a\u3055\u308c": [
599
+ "\u898b\u306a\u3055",
600
+ "\u308c"
601
+ ],
602
+ "\u898b\u821e\u308f\u308c": [
603
+ "\u898b\u821e\u308f",
604
+ "\u308c"
605
+ ],
606
+ "\u89e3\u304d\u660e\u304b\u3059\u3079\u304d": [
607
+ "\u89e3\u304d\u660e\u304b\u3059",
608
+ "\u3079\u304d"
609
+ ],
610
+ "\u8a60\u308f\u308c": [
611
+ "\u8a60\u308f",
612
+ "\u308c"
613
+ ],
614
+ "\u8a98\u308f\u308c": [
615
+ "\u8a98\u308f",
616
+ "\u308c"
617
+ ],
618
+ "\u8aac\u660e\u3059\u3079\u304d": [
619
+ "\u8aac\u660e\u3059",
620
+ "\u3079\u304d"
621
+ ],
622
+ "\u8cb7\u3063\u3061\u3083\u3044": [
623
+ "\u8cb7\u3063",
624
+ "\u3061\u3083\u3044"
625
+ ],
626
+ "\u8e0f\u307e\u305b": [
627
+ "\u8e0f\u307e",
628
+ "\u305b"
629
+ ],
630
+ "\u8f9e\u8077\u3059\u3079\u304d": [
631
+ "\u8f9e\u8077\u3059",
632
+ "\u3079\u304d"
633
+ ],
634
+ "\u990a\u308f\u308c\u308b": [
635
+ "\u990a\u308f",
636
+ "\u308c\u308b"
637
+ ],
638
+ "\u9cf4\u3089\u3059\u3079\u304f": [
639
+ "\u9cf4\u3089\u3059",
640
+ "\u3079\u304f"
641
+ ]
642
+ },
643
+ "VERB+AUX+AUX": {
644
+ "\u306a\u3063\u305f\u3089\u3057\u3044": [
645
+ "\u306a\u3063",
646
+ "\u305f",
647
+ "\u3089\u3057\u3044"
648
+ ],
649
+ "\u601d\u3063\u305f\u3089\u3057\u304f": [
650
+ "\u601d\u3063",
651
+ "\u305f",
652
+ "\u3089\u3057\u304f"
653
+ ],
654
+ "\u8cb7\u3063\u305f\u3089\u3057\u304f": [
655
+ "\u8cb7\u3063",
656
+ "\u305f",
657
+ "\u3089\u3057\u304f"
658
+ ]
659
+ },
660
+ "VERB+AUX+NOUN": {
661
+ "\u5909\u66f4\u3057\u305f\u305f\u3081": [
662
+ "\u5909\u66f4\u3057",
663
+ "\u305f",
664
+ "\u305f\u3081"
665
+ ],
666
+ "\u5931\u6557\u3057\u305f\u305f\u3081": [
667
+ "\u5931\u6557\u3057",
668
+ "\u305f",
669
+ "\u305f\u3081"
670
+ ],
671
+ "\u5bfe\u7acb\u3057\u305f\u305f\u3081": [
672
+ "\u5bfe\u7acb\u3057",
673
+ "\u305f",
674
+ "\u305f\u3081"
675
+ ],
676
+ "\u6f5c\u5165\u3057\u305f\u305f\u3081": [
677
+ "\u6f5c\u5165\u3057",
678
+ "\u305f",
679
+ "\u305f\u3081"
680
+ ],
681
+ "\u8131\u8d70\u3057\u305f\u305f\u3081": [
682
+ "\u8131\u8d70\u3057",
683
+ "\u305f",
684
+ "\u305f\u3081"
685
+ ],
686
+ "\u8868\u660e\u3057\u305f\u305f\u3081": [
687
+ "\u8868\u660e\u3057",
688
+ "\u305f",
689
+ "\u305f\u3081"
690
+ ],
691
+ "\u8981\u8acb\u3057\u305f\u305f\u3081": [
692
+ "\u8981\u8acb\u3057",
693
+ "\u305f",
694
+ "\u305f\u3081"
695
+ ]
696
+ },
697
+ "VERB+AUX+SCONJ": {
698
+ "\u592d\u6298\u3057\u305f\u305f\u3081\u306b": [
699
+ "\u592d\u6298\u3057",
700
+ "\u305f",
701
+ "\u305f\u3081\u306b"
702
+ ],
703
+ "\u5fb4\u767a\u3057\u305f\u305f\u3081\u306b": [
704
+ "\u5fb4\u767a\u3057",
705
+ "\u305f",
706
+ "\u305f\u3081\u306b"
707
+ ],
708
+ "\u62d2\u5426\u3057\u305f\u305f\u3081\u306b": [
709
+ "\u62d2\u5426\u3057",
710
+ "\u305f",
711
+ "\u305f\u3081\u306b"
712
+ ]
713
+ }
714
+ }
715
+ },
716
+ "tokenizer_class": "DebertaV2TokenizerFast",
717
+ "torch_dtype": "float32",
718
+ "transformers_version": "4.19.4",
719
+ "type_vocab_size": 0,
720
+ "vocab_size": 32000
721
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9dc283f344a5962ffc3d73145d7d13aa93061dcffa80c204ca9cda53170536
3
+ size 440451827
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
3
+ size 1
supar.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1418fb8011870c9767cd696aae417a242fa43f12873d299f49a93550c091b56d
3
+ size 488951787
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": true, "keep_accents": true, "model_max_length": 512, "tokenizer_class": "DebertaV2TokenizerFast"}