KoichiYasuoka commited on
Commit
ded46fd
1 Parent(s): aee7e74

model improved

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. config.json +211 -193
  3. pytorch_model.bin +2 -2
  4. supar.model +2 -2
README.md CHANGED
@@ -19,7 +19,7 @@ widget:
19
 
20
  ## Model Description
21
 
22
- This is a DeBERTa(V2) model pre-trained on Japanese Wikipedia and 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-base-japanese-wikipedia](https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech).
23
 
24
  ## How to Use
25
 
 
19
 
20
  ## Model Description
21
 
22
+ This is a DeBERTa(V2) model pre-trained on Japanese Wikipedia and 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-base-japanese-wikipedia](https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech) and [FEATS](https://universaldependencies.org/u/feat/).
23
 
24
  ## How to Use
25
 
config.json CHANGED
@@ -14,92 +14,100 @@
14
  "2": "ADP",
15
  "3": "ADV",
16
  "4": "AUX",
17
- "5": "AUX+VERB",
18
- "6": "B-ADJ",
19
- "7": "B-ADJ+ADJ",
20
- "8": "B-ADJ+AUX+AUX",
21
- "9": "B-ADJ+VERB",
22
- "10": "B-ADP",
23
- "11": "B-ADP+NOUN",
24
- "12": "B-ADV",
25
- "13": "B-ADV+ADP",
26
- "14": "B-ADV+ADP+VERB",
27
- "15": "B-AUX",
28
- "16": "B-AUX+AUX+AUX",
29
- "17": "B-AUX+AUX+PART",
30
- "18": "B-AUX+SCONJ",
31
- "19": "B-AUX+VERB",
32
- "20": "B-CCONJ",
33
- "21": "B-DET",
34
- "22": "B-INTJ",
35
- "23": "B-NOUN",
36
- "24": "B-NOUN+ADJ",
37
- "25": "B-NOUN+ADP",
38
- "26": "B-NUM",
39
- "27": "B-PART",
40
- "28": "B-PRON",
41
- "29": "B-PROPN",
42
- "30": "B-PUNCT",
43
- "31": "B-SCONJ",
44
- "32": "B-SYM",
45
- "33": "B-VERB",
46
- "34": "B-VERB+AUX",
47
- "35": "B-VERB+AUX+AUX",
48
- "36": "B-VERB+AUX+NOUN",
49
- "37": "B-VERB+AUX+PART",
50
- "38": "B-VERB+AUX+SCONJ",
51
- "39": "B-X",
52
- "40": "CCONJ",
53
- "41": "DET",
54
- "42": "I-ADJ",
55
- "43": "I-ADJ+ADJ",
56
- "44": "I-ADJ+AUX+AUX",
57
- "45": "I-ADJ+VERB",
58
- "46": "I-ADP",
59
- "47": "I-ADP+NOUN",
60
- "48": "I-ADV",
61
- "49": "I-ADV+ADP",
62
- "50": "I-ADV+ADP+VERB",
63
- "51": "I-AUX",
64
- "52": "I-AUX+AUX+AUX",
65
- "53": "I-AUX+AUX+PART",
66
- "54": "I-AUX+SCONJ",
67
- "55": "I-AUX+VERB",
68
- "56": "I-CCONJ",
69
- "57": "I-DET",
70
- "58": "I-INTJ",
71
- "59": "I-NOUN",
72
- "60": "I-NOUN+ADJ",
73
- "61": "I-NOUN+ADP",
74
- "62": "I-NUM",
75
- "63": "I-PART",
76
- "64": "I-PRON",
77
- "65": "I-PROPN",
78
- "66": "I-PUNCT",
79
- "67": "I-SCONJ",
80
- "68": "I-SYM",
81
- "69": "I-VERB",
82
- "70": "I-VERB+AUX",
83
- "71": "I-VERB+AUX+AUX",
84
- "72": "I-VERB+AUX+NOUN",
85
- "73": "I-VERB+AUX+PART",
86
- "74": "I-VERB+AUX+SCONJ",
87
- "75": "I-X",
88
- "76": "INTJ",
89
- "77": "NOUN",
90
- "78": "NOUN+ADP",
91
- "79": "NOUN+AUX",
92
- "80": "NUM",
93
- "81": "PART",
94
- "82": "PRON",
95
- "83": "PRON+ADP",
96
- "84": "PROPN",
97
- "85": "PUNCT",
98
- "86": "SCONJ",
99
- "87": "SYM",
100
- "88": "VERB",
101
- "89": "VERB+AUX",
102
- "90": "X"
 
 
 
 
 
 
 
 
103
  },
104
  "initializer_range": 0.02,
105
  "intermediate_size": 3072,
@@ -109,92 +117,100 @@
109
  "ADP": 2,
110
  "ADV": 3,
111
  "AUX": 4,
112
- "AUX+VERB": 5,
113
- "B-ADJ": 6,
114
- "B-ADJ+ADJ": 7,
115
- "B-ADJ+AUX+AUX": 8,
116
- "B-ADJ+VERB": 9,
117
- "B-ADP": 10,
118
- "B-ADP+NOUN": 11,
119
- "B-ADV": 12,
120
- "B-ADV+ADP": 13,
121
- "B-ADV+ADP+VERB": 14,
122
- "B-AUX": 15,
123
- "B-AUX+AUX+AUX": 16,
124
- "B-AUX+AUX+PART": 17,
125
- "B-AUX+SCONJ": 18,
126
- "B-AUX+VERB": 19,
127
- "B-CCONJ": 20,
128
- "B-DET": 21,
129
- "B-INTJ": 22,
130
- "B-NOUN": 23,
131
- "B-NOUN+ADJ": 24,
132
- "B-NOUN+ADP": 25,
133
- "B-NUM": 26,
134
- "B-PART": 27,
135
- "B-PRON": 28,
136
- "B-PROPN": 29,
137
- "B-PUNCT": 30,
138
- "B-SCONJ": 31,
139
- "B-SYM": 32,
140
- "B-VERB": 33,
141
- "B-VERB+AUX": 34,
142
- "B-VERB+AUX+AUX": 35,
143
- "B-VERB+AUX+NOUN": 36,
144
- "B-VERB+AUX+PART": 37,
145
- "B-VERB+AUX+SCONJ": 38,
146
- "B-X": 39,
147
- "CCONJ": 40,
148
- "DET": 41,
149
- "I-ADJ": 42,
150
- "I-ADJ+ADJ": 43,
151
- "I-ADJ+AUX+AUX": 44,
152
- "I-ADJ+VERB": 45,
153
- "I-ADP": 46,
154
- "I-ADP+NOUN": 47,
155
- "I-ADV": 48,
156
- "I-ADV+ADP": 49,
157
- "I-ADV+ADP+VERB": 50,
158
- "I-AUX": 51,
159
- "I-AUX+AUX+AUX": 52,
160
- "I-AUX+AUX+PART": 53,
161
- "I-AUX+SCONJ": 54,
162
- "I-AUX+VERB": 55,
163
- "I-CCONJ": 56,
164
- "I-DET": 57,
165
- "I-INTJ": 58,
166
- "I-NOUN": 59,
167
- "I-NOUN+ADJ": 60,
168
- "I-NOUN+ADP": 61,
169
- "I-NUM": 62,
170
- "I-PART": 63,
171
- "I-PRON": 64,
172
- "I-PROPN": 65,
173
- "I-PUNCT": 66,
174
- "I-SCONJ": 67,
175
- "I-SYM": 68,
176
- "I-VERB": 69,
177
- "I-VERB+AUX": 70,
178
- "I-VERB+AUX+AUX": 71,
179
- "I-VERB+AUX+NOUN": 72,
180
- "I-VERB+AUX+PART": 73,
181
- "I-VERB+AUX+SCONJ": 74,
182
- "I-X": 75,
183
- "INTJ": 76,
184
- "NOUN": 77,
185
- "NOUN+ADP": 78,
186
- "NOUN+AUX": 79,
187
- "NUM": 80,
188
- "PART": 81,
189
- "PRON": 82,
190
- "PRON+ADP": 83,
191
- "PROPN": 84,
192
- "PUNCT": 85,
193
- "SCONJ": 86,
194
- "SYM": 87,
195
- "VERB": 88,
196
- "VERB+AUX": 89,
197
- "X": 90
 
 
 
 
 
 
 
 
198
  },
199
  "layer_norm_eps": 1e-07,
200
  "max_position_embeddings": 512,
@@ -292,7 +308,7 @@
292
  "\u3051\u3069"
293
  ]
294
  },
295
- "AUX+VERB": {
296
  "\u306a\u304f\u306a\u3063": [
297
  "\u306a\u304f",
298
  "\u306a\u3063"
@@ -367,10 +383,6 @@
367
  "\u304a\u3053\u306a\u308f",
368
  "\u308c"
369
  ],
370
- "\u304b\u307e\u308f\u305a": [
371
- "\u304b\u307e\u308f",
372
- "\u305a"
373
- ],
374
  "\u3055\u3089\u308f\u308c": [
375
  "\u3055\u3089\u308f",
376
  "\u308c"
@@ -395,10 +407,6 @@
395
  "\u306a\u3058\u307e",
396
  "\u305b"
397
  ],
398
- "\u306a\u3089\u305a": [
399
- "\u306a\u3089",
400
- "\u305a"
401
- ],
402
  "\u307f\u306a\u3055\u308c": [
403
  "\u307f\u306a\u3055",
404
  "\u308c"
@@ -455,10 +463,6 @@
455
  "\u548c\u307e",
456
  "\u305b"
457
  ],
458
- "\u554f\u308f\u305a": [
459
- "\u554f\u308f",
460
- "\u305a"
461
- ],
462
  "\u554f\u308f\u308c": [
463
  "\u554f\u308f",
464
  "\u308c"
@@ -527,10 +531,6 @@
527
  "\u679c\u305f\u3059",
528
  "\u3079\u304f"
529
  ],
530
- "\u69cb\u308f\u305a": [
531
- "\u69cb\u308f",
532
- "\u305a"
533
- ],
534
  "\u6b4c\u308f\u308c": [
535
  "\u6b4c\u308f",
536
  "\u308c"
@@ -571,10 +571,6 @@
571
  "\u884c\u3063",
572
  "\u3061\u3083\u3044"
573
  ],
574
- "\u884c\u308f\u305a": [
575
- "\u884c\u308f",
576
- "\u305a"
577
- ],
578
  "\u884c\u308f\u308c": [
579
  "\u884c\u308f",
580
  "\u308c"
@@ -710,6 +706,28 @@
710
  "\u305f",
711
  "\u305f\u3081\u306b"
712
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
  }
714
  }
715
  },
 
14
  "2": "ADP",
15
  "3": "ADV",
16
  "4": "AUX",
17
+ "5": "AUX|Polarity=Neg",
18
+ "6": "AUX|Polarity=Neg+VERB",
19
+ "7": "B-ADJ",
20
+ "8": "B-ADJ+ADJ",
21
+ "9": "B-ADJ+AUX+AUX",
22
+ "10": "B-ADJ+VERB",
23
+ "11": "B-ADP",
24
+ "12": "B-ADP+NOUN",
25
+ "13": "B-ADV",
26
+ "14": "B-ADV+ADP",
27
+ "15": "B-ADV+ADP+VERB",
28
+ "16": "B-AUX",
29
+ "17": "B-AUX+AUX+AUX",
30
+ "18": "B-AUX+AUX+PART",
31
+ "19": "B-AUX+SCONJ",
32
+ "20": "B-AUX|Polarity=Neg",
33
+ "21": "B-AUX|Polarity=Neg+VERB",
34
+ "22": "B-CCONJ",
35
+ "23": "B-DET",
36
+ "24": "B-INTJ",
37
+ "25": "B-NOUN",
38
+ "26": "B-NOUN+ADJ",
39
+ "27": "B-NOUN+ADP",
40
+ "28": "B-NOUN|Polarity=Neg",
41
+ "29": "B-NUM",
42
+ "30": "B-PART",
43
+ "31": "B-PRON",
44
+ "32": "B-PROPN",
45
+ "33": "B-PUNCT",
46
+ "34": "B-SCONJ",
47
+ "35": "B-SYM",
48
+ "36": "B-VERB",
49
+ "37": "B-VERB+AUX",
50
+ "38": "B-VERB+AUX+AUX",
51
+ "39": "B-VERB+AUX+NOUN",
52
+ "40": "B-VERB+AUX+PART",
53
+ "41": "B-VERB+AUX+SCONJ",
54
+ "42": "B-VERB+AUX|Polarity=Neg",
55
+ "43": "B-X",
56
+ "44": "CCONJ",
57
+ "45": "DET",
58
+ "46": "I-ADJ",
59
+ "47": "I-ADJ+ADJ",
60
+ "48": "I-ADJ+AUX+AUX",
61
+ "49": "I-ADJ+VERB",
62
+ "50": "I-ADP",
63
+ "51": "I-ADP+NOUN",
64
+ "52": "I-ADV",
65
+ "53": "I-ADV+ADP",
66
+ "54": "I-ADV+ADP+VERB",
67
+ "55": "I-AUX",
68
+ "56": "I-AUX+AUX+AUX",
69
+ "57": "I-AUX+AUX+PART",
70
+ "58": "I-AUX+SCONJ",
71
+ "59": "I-AUX|Polarity=Neg",
72
+ "60": "I-AUX|Polarity=Neg+VERB",
73
+ "61": "I-CCONJ",
74
+ "62": "I-DET",
75
+ "63": "I-INTJ",
76
+ "64": "I-NOUN",
77
+ "65": "I-NOUN+ADJ",
78
+ "66": "I-NOUN+ADP",
79
+ "67": "I-NOUN|Polarity=Neg",
80
+ "68": "I-NUM",
81
+ "69": "I-PART",
82
+ "70": "I-PRON",
83
+ "71": "I-PROPN",
84
+ "72": "I-PUNCT",
85
+ "73": "I-SCONJ",
86
+ "74": "I-SYM",
87
+ "75": "I-VERB",
88
+ "76": "I-VERB+AUX",
89
+ "77": "I-VERB+AUX+AUX",
90
+ "78": "I-VERB+AUX+NOUN",
91
+ "79": "I-VERB+AUX+PART",
92
+ "80": "I-VERB+AUX+SCONJ",
93
+ "81": "I-VERB+AUX|Polarity=Neg",
94
+ "82": "I-X",
95
+ "83": "INTJ",
96
+ "84": "NOUN",
97
+ "85": "NOUN+ADP",
98
+ "86": "NOUN+AUX",
99
+ "87": "NUM",
100
+ "88": "PART",
101
+ "89": "PRON",
102
+ "90": "PRON+ADP",
103
+ "91": "PROPN",
104
+ "92": "PUNCT",
105
+ "93": "SCONJ",
106
+ "94": "SYM",
107
+ "95": "VERB",
108
+ "96": "VERB+AUX",
109
+ "97": "VERB+AUX|Polarity=Neg",
110
+ "98": "X"
111
  },
112
  "initializer_range": 0.02,
113
  "intermediate_size": 3072,
 
117
  "ADP": 2,
118
  "ADV": 3,
119
  "AUX": 4,
120
+ "AUX|Polarity=Neg": 5,
121
+ "AUX|Polarity=Neg+VERB": 6,
122
+ "B-ADJ": 7,
123
+ "B-ADJ+ADJ": 8,
124
+ "B-ADJ+AUX+AUX": 9,
125
+ "B-ADJ+VERB": 10,
126
+ "B-ADP": 11,
127
+ "B-ADP+NOUN": 12,
128
+ "B-ADV": 13,
129
+ "B-ADV+ADP": 14,
130
+ "B-ADV+ADP+VERB": 15,
131
+ "B-AUX": 16,
132
+ "B-AUX+AUX+AUX": 17,
133
+ "B-AUX+AUX+PART": 18,
134
+ "B-AUX+SCONJ": 19,
135
+ "B-AUX|Polarity=Neg": 20,
136
+ "B-AUX|Polarity=Neg+VERB": 21,
137
+ "B-CCONJ": 22,
138
+ "B-DET": 23,
139
+ "B-INTJ": 24,
140
+ "B-NOUN": 25,
141
+ "B-NOUN+ADJ": 26,
142
+ "B-NOUN+ADP": 27,
143
+ "B-NOUN|Polarity=Neg": 28,
144
+ "B-NUM": 29,
145
+ "B-PART": 30,
146
+ "B-PRON": 31,
147
+ "B-PROPN": 32,
148
+ "B-PUNCT": 33,
149
+ "B-SCONJ": 34,
150
+ "B-SYM": 35,
151
+ "B-VERB": 36,
152
+ "B-VERB+AUX": 37,
153
+ "B-VERB+AUX+AUX": 38,
154
+ "B-VERB+AUX+NOUN": 39,
155
+ "B-VERB+AUX+PART": 40,
156
+ "B-VERB+AUX+SCONJ": 41,
157
+ "B-VERB+AUX|Polarity=Neg": 42,
158
+ "B-X": 43,
159
+ "CCONJ": 44,
160
+ "DET": 45,
161
+ "I-ADJ": 46,
162
+ "I-ADJ+ADJ": 47,
163
+ "I-ADJ+AUX+AUX": 48,
164
+ "I-ADJ+VERB": 49,
165
+ "I-ADP": 50,
166
+ "I-ADP+NOUN": 51,
167
+ "I-ADV": 52,
168
+ "I-ADV+ADP": 53,
169
+ "I-ADV+ADP+VERB": 54,
170
+ "I-AUX": 55,
171
+ "I-AUX+AUX+AUX": 56,
172
+ "I-AUX+AUX+PART": 57,
173
+ "I-AUX+SCONJ": 58,
174
+ "I-AUX|Polarity=Neg": 59,
175
+ "I-AUX|Polarity=Neg+VERB": 60,
176
+ "I-CCONJ": 61,
177
+ "I-DET": 62,
178
+ "I-INTJ": 63,
179
+ "I-NOUN": 64,
180
+ "I-NOUN+ADJ": 65,
181
+ "I-NOUN+ADP": 66,
182
+ "I-NOUN|Polarity=Neg": 67,
183
+ "I-NUM": 68,
184
+ "I-PART": 69,
185
+ "I-PRON": 70,
186
+ "I-PROPN": 71,
187
+ "I-PUNCT": 72,
188
+ "I-SCONJ": 73,
189
+ "I-SYM": 74,
190
+ "I-VERB": 75,
191
+ "I-VERB+AUX": 76,
192
+ "I-VERB+AUX+AUX": 77,
193
+ "I-VERB+AUX+NOUN": 78,
194
+ "I-VERB+AUX+PART": 79,
195
+ "I-VERB+AUX+SCONJ": 80,
196
+ "I-VERB+AUX|Polarity=Neg": 81,
197
+ "I-X": 82,
198
+ "INTJ": 83,
199
+ "NOUN": 84,
200
+ "NOUN+ADP": 85,
201
+ "NOUN+AUX": 86,
202
+ "NUM": 87,
203
+ "PART": 88,
204
+ "PRON": 89,
205
+ "PRON+ADP": 90,
206
+ "PROPN": 91,
207
+ "PUNCT": 92,
208
+ "SCONJ": 93,
209
+ "SYM": 94,
210
+ "VERB": 95,
211
+ "VERB+AUX": 96,
212
+ "VERB+AUX|Polarity=Neg": 97,
213
+ "X": 98
214
  },
215
  "layer_norm_eps": 1e-07,
216
  "max_position_embeddings": 512,
 
308
  "\u3051\u3069"
309
  ]
310
  },
311
+ "AUX|Polarity=Neg+VERB": {
312
  "\u306a\u304f\u306a\u3063": [
313
  "\u306a\u304f",
314
  "\u306a\u3063"
 
383
  "\u304a\u3053\u306a\u308f",
384
  "\u308c"
385
  ],
 
 
 
 
386
  "\u3055\u3089\u308f\u308c": [
387
  "\u3055\u3089\u308f",
388
  "\u308c"
 
407
  "\u306a\u3058\u307e",
408
  "\u305b"
409
  ],
 
 
 
 
410
  "\u307f\u306a\u3055\u308c": [
411
  "\u307f\u306a\u3055",
412
  "\u308c"
 
463
  "\u548c\u307e",
464
  "\u305b"
465
  ],
 
 
 
 
466
  "\u554f\u308f\u308c": [
467
  "\u554f\u308f",
468
  "\u308c"
 
531
  "\u679c\u305f\u3059",
532
  "\u3079\u304f"
533
  ],
 
 
 
 
534
  "\u6b4c\u308f\u308c": [
535
  "\u6b4c\u308f",
536
  "\u308c"
 
571
  "\u884c\u3063",
572
  "\u3061\u3083\u3044"
573
  ],
 
 
 
 
574
  "\u884c\u308f\u308c": [
575
  "\u884c\u308f",
576
  "\u308c"
 
706
  "\u305f",
707
  "\u305f\u3081\u306b"
708
  ]
709
+ },
710
+ "VERB+AUX|Polarity=Neg": {
711
+ "\u304b\u307e\u308f\u305a": [
712
+ "\u304b\u307e\u308f",
713
+ "\u305a"
714
+ ],
715
+ "\u306a\u3089\u305a": [
716
+ "\u306a\u3089",
717
+ "\u305a"
718
+ ],
719
+ "\u554f\u308f\u305a": [
720
+ "\u554f\u308f",
721
+ "\u305a"
722
+ ],
723
+ "\u69cb\u308f\u305a": [
724
+ "\u69cb\u308f",
725
+ "\u305a"
726
+ ],
727
+ "\u884c\u308f\u305a": [
728
+ "\u884c\u308f",
729
+ "\u305a"
730
+ ]
731
  }
732
  }
733
  },
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d9dc283f344a5962ffc3d73145d7d13aa93061dcffa80c204ca9cda53170536
3
- size 440451827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5a1be4ff30e5f2537a335a038ef26c5c4e3e8c3df937927415153639d5e5f9
3
+ size 440476467
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1418fb8011870c9767cd696aae417a242fa43f12873d299f49a93550c091b56d
3
- size 488951787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e568b444794597ad6c070ed5bd229d942eb6e73edab6bf277c5e8191c830ba5
3
+ size 488951723