KoichiYasuoka commited on
Commit
ccf19cb
1 Parent(s): 70ac635

model improved

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. config.json +216 -198
  3. pytorch_model.bin +2 -2
  4. supar.model +2 -2
README.md CHANGED
@@ -18,7 +18,7 @@ widget:
18
 
19
  ## Model Description
20
 
21
- This is a DeBERTa(V2) model pre-trained on 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-large-japanese-aozora](https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech).
22
 
23
  ## How to Use
24
 
18
 
19
  ## Model Description
20
 
21
+ This is a DeBERTa(V2) model pre-trained on 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-large-japanese-aozora](https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech) and [FEATS](https://universaldependencies.org/u/feat/).
22
 
23
  ## How to Use
24
 
config.json CHANGED
@@ -14,94 +14,102 @@
14
  "2": "ADP",
15
  "3": "ADV",
16
  "4": "AUX",
17
- "5": "AUX+VERB",
18
- "6": "B-ADJ",
19
- "7": "B-ADJ+ADJ",
20
- "8": "B-ADJ+AUX+AUX",
21
- "9": "B-ADJ+VERB",
22
- "10": "B-ADP",
23
- "11": "B-ADP+NOUN",
24
- "12": "B-ADV",
25
- "13": "B-ADV+ADP",
26
- "14": "B-ADV+ADP+VERB",
27
- "15": "B-AUX",
28
- "16": "B-AUX+AUX",
29
- "17": "B-AUX+AUX+AUX",
30
- "18": "B-AUX+AUX+PART",
31
- "19": "B-AUX+SCONJ",
32
- "20": "B-AUX+VERB",
33
- "21": "B-CCONJ",
34
- "22": "B-DET",
35
- "23": "B-INTJ",
36
- "24": "B-NOUN",
37
- "25": "B-NOUN+ADJ",
38
- "26": "B-NOUN+ADP",
39
- "27": "B-NUM",
40
- "28": "B-PART",
41
- "29": "B-PRON",
42
- "30": "B-PROPN",
43
- "31": "B-PUNCT",
44
- "32": "B-SCONJ",
45
- "33": "B-SYM",
46
- "34": "B-VERB",
47
- "35": "B-VERB+AUX",
48
- "36": "B-VERB+AUX+AUX",
49
- "37": "B-VERB+AUX+NOUN",
50
- "38": "B-VERB+AUX+PART",
51
- "39": "B-VERB+AUX+SCONJ",
52
- "40": "B-X",
53
- "41": "CCONJ",
54
- "42": "DET",
55
- "43": "I-ADJ",
56
- "44": "I-ADJ+ADJ",
57
- "45": "I-ADJ+AUX+AUX",
58
- "46": "I-ADJ+VERB",
59
- "47": "I-ADP",
60
- "48": "I-ADP+NOUN",
61
- "49": "I-ADV",
62
- "50": "I-ADV+ADP",
63
- "51": "I-ADV+ADP+VERB",
64
- "52": "I-AUX",
65
- "53": "I-AUX+AUX",
66
- "54": "I-AUX+AUX+AUX",
67
- "55": "I-AUX+AUX+PART",
68
- "56": "I-AUX+SCONJ",
69
- "57": "I-AUX+VERB",
70
- "58": "I-CCONJ",
71
- "59": "I-DET",
72
- "60": "I-INTJ",
73
- "61": "I-NOUN",
74
- "62": "I-NOUN+ADJ",
75
- "63": "I-NOUN+ADP",
76
- "64": "I-NUM",
77
- "65": "I-PART",
78
- "66": "I-PRON",
79
- "67": "I-PROPN",
80
- "68": "I-PUNCT",
81
- "69": "I-SCONJ",
82
- "70": "I-SYM",
83
- "71": "I-VERB",
84
- "72": "I-VERB+AUX",
85
- "73": "I-VERB+AUX+AUX",
86
- "74": "I-VERB+AUX+NOUN",
87
- "75": "I-VERB+AUX+PART",
88
- "76": "I-VERB+AUX+SCONJ",
89
- "77": "I-X",
90
- "78": "NOUN",
91
- "79": "NOUN+ADP",
92
- "80": "NOUN+AUX",
93
- "81": "NUM",
94
- "82": "PART",
95
- "83": "PRON",
96
- "84": "PRON+ADP",
97
- "85": "PROPN",
98
- "86": "PUNCT",
99
- "87": "SCONJ",
100
- "88": "SYM",
101
- "89": "VERB",
102
- "90": "VERB+AUX",
103
- "91": "VERB+AUX+PART",
104
- "92": "X"
 
 
 
 
 
 
 
 
105
  },
106
  "initializer_range": 0.02,
107
  "intermediate_size": 4096,
@@ -111,94 +119,102 @@
111
  "ADP": 2,
112
  "ADV": 3,
113
  "AUX": 4,
114
- "AUX+VERB": 5,
115
- "B-ADJ": 6,
116
- "B-ADJ+ADJ": 7,
117
- "B-ADJ+AUX+AUX": 8,
118
- "B-ADJ+VERB": 9,
119
- "B-ADP": 10,
120
- "B-ADP+NOUN": 11,
121
- "B-ADV": 12,
122
- "B-ADV+ADP": 13,
123
- "B-ADV+ADP+VERB": 14,
124
- "B-AUX": 15,
125
- "B-AUX+AUX": 16,
126
- "B-AUX+AUX+AUX": 17,
127
- "B-AUX+AUX+PART": 18,
128
- "B-AUX+SCONJ": 19,
129
- "B-AUX+VERB": 20,
130
- "B-CCONJ": 21,
131
- "B-DET": 22,
132
- "B-INTJ": 23,
133
- "B-NOUN": 24,
134
- "B-NOUN+ADJ": 25,
135
- "B-NOUN+ADP": 26,
136
- "B-NUM": 27,
137
- "B-PART": 28,
138
- "B-PRON": 29,
139
- "B-PROPN": 30,
140
- "B-PUNCT": 31,
141
- "B-SCONJ": 32,
142
- "B-SYM": 33,
143
- "B-VERB": 34,
144
- "B-VERB+AUX": 35,
145
- "B-VERB+AUX+AUX": 36,
146
- "B-VERB+AUX+NOUN": 37,
147
- "B-VERB+AUX+PART": 38,
148
- "B-VERB+AUX+SCONJ": 39,
149
- "B-X": 40,
150
- "CCONJ": 41,
151
- "DET": 42,
152
- "I-ADJ": 43,
153
- "I-ADJ+ADJ": 44,
154
- "I-ADJ+AUX+AUX": 45,
155
- "I-ADJ+VERB": 46,
156
- "I-ADP": 47,
157
- "I-ADP+NOUN": 48,
158
- "I-ADV": 49,
159
- "I-ADV+ADP": 50,
160
- "I-ADV+ADP+VERB": 51,
161
- "I-AUX": 52,
162
- "I-AUX+AUX": 53,
163
- "I-AUX+AUX+AUX": 54,
164
- "I-AUX+AUX+PART": 55,
165
- "I-AUX+SCONJ": 56,
166
- "I-AUX+VERB": 57,
167
- "I-CCONJ": 58,
168
- "I-DET": 59,
169
- "I-INTJ": 60,
170
- "I-NOUN": 61,
171
- "I-NOUN+ADJ": 62,
172
- "I-NOUN+ADP": 63,
173
- "I-NUM": 64,
174
- "I-PART": 65,
175
- "I-PRON": 66,
176
- "I-PROPN": 67,
177
- "I-PUNCT": 68,
178
- "I-SCONJ": 69,
179
- "I-SYM": 70,
180
- "I-VERB": 71,
181
- "I-VERB+AUX": 72,
182
- "I-VERB+AUX+AUX": 73,
183
- "I-VERB+AUX+NOUN": 74,
184
- "I-VERB+AUX+PART": 75,
185
- "I-VERB+AUX+SCONJ": 76,
186
- "I-X": 77,
187
- "NOUN": 78,
188
- "NOUN+ADP": 79,
189
- "NOUN+AUX": 80,
190
- "NUM": 81,
191
- "PART": 82,
192
- "PRON": 83,
193
- "PRON+ADP": 84,
194
- "PROPN": 85,
195
- "PUNCT": 86,
196
- "SCONJ": 87,
197
- "SYM": 88,
198
- "VERB": 89,
199
- "VERB+AUX": 90,
200
- "VERB+AUX+PART": 91,
201
- "X": 92
 
 
 
 
 
 
 
 
202
  },
203
  "layer_norm_eps": 1e-07,
204
  "max_position_embeddings": 512,
@@ -302,7 +318,7 @@
302
  "\u3051\u3069"
303
  ]
304
  },
305
- "AUX+VERB": {
306
  "\u306a\u304f\u306a\u3063": [
307
  "\u306a\u304f",
308
  "\u306a\u3063"
@@ -377,10 +393,6 @@
377
  "\u304a\u3053\u306a\u308f",
378
  "\u308c"
379
  ],
380
- "\u304b\u307e\u308f\u305a": [
381
- "\u304b\u307e\u308f",
382
- "\u305a"
383
- ],
384
  "\u3055\u3089\u308f\u308c": [
385
  "\u3055\u3089\u308f",
386
  "\u308c"
@@ -405,10 +417,6 @@
405
  "\u306a\u3058\u307e",
406
  "\u305b"
407
  ],
408
- "\u306a\u3089\u305a": [
409
- "\u306a\u3089",
410
- "\u305a"
411
- ],
412
  "\u307f\u306a\u3055\u308c": [
413
  "\u307f\u306a\u3055",
414
  "\u308c"
@@ -461,10 +469,6 @@
461
  "\u548c\u307e",
462
  "\u305b"
463
  ],
464
- "\u554f\u308f\u305a": [
465
- "\u554f\u308f",
466
- "\u305a"
467
- ],
468
  "\u554f\u308f\u308c": [
469
  "\u554f\u308f",
470
  "\u308c"
@@ -533,10 +537,6 @@
533
  "\u679c\u305f\u3059",
534
  "\u3079\u304f"
535
  ],
536
- "\u69cb\u308f\u305a": [
537
- "\u69cb\u308f",
538
- "\u305a"
539
- ],
540
  "\u6b4c\u308f\u308c": [
541
  "\u6b4c\u308f",
542
  "\u308c"
@@ -577,10 +577,6 @@
577
  "\u884c\u3063",
578
  "\u3061\u3083\u3044"
579
  ],
580
- "\u884c\u308f\u305a": [
581
- "\u884c\u308f",
582
- "\u305a"
583
- ],
584
  "\u884c\u308f\u308c": [
585
  "\u884c\u308f",
586
  "\u308c"
@@ -723,12 +719,34 @@
723
  "\u305f",
724
  "\u305f\u3081\u306b"
725
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  }
727
  }
728
  },
729
  "tokenizer_class": "DebertaV2TokenizerFast",
730
  "torch_dtype": "float32",
731
- "transformers_version": "4.19.2",
732
  "type_vocab_size": 0,
733
  "vocab_size": 32000
734
  }
14
  "2": "ADP",
15
  "3": "ADV",
16
  "4": "AUX",
17
+ "5": "AUX|Polarity=Neg",
18
+ "6": "AUX|Polarity=Neg+VERB",
19
+ "7": "B-ADJ",
20
+ "8": "B-ADJ+ADJ",
21
+ "9": "B-ADJ+AUX+AUX",
22
+ "10": "B-ADJ+VERB",
23
+ "11": "B-ADP",
24
+ "12": "B-ADP+NOUN",
25
+ "13": "B-ADV",
26
+ "14": "B-ADV+ADP",
27
+ "15": "B-ADV+ADP+VERB",
28
+ "16": "B-AUX",
29
+ "17": "B-AUX+AUX",
30
+ "18": "B-AUX+AUX+AUX",
31
+ "19": "B-AUX+AUX+PART",
32
+ "20": "B-AUX+SCONJ",
33
+ "21": "B-AUX|Polarity=Neg",
34
+ "22": "B-AUX|Polarity=Neg+VERB",
35
+ "23": "B-CCONJ",
36
+ "24": "B-DET",
37
+ "25": "B-INTJ",
38
+ "26": "B-NOUN",
39
+ "27": "B-NOUN+ADJ",
40
+ "28": "B-NOUN+ADP",
41
+ "29": "B-NOUN|Polarity=Neg",
42
+ "30": "B-NUM",
43
+ "31": "B-PART",
44
+ "32": "B-PRON",
45
+ "33": "B-PROPN",
46
+ "34": "B-PUNCT",
47
+ "35": "B-SCONJ",
48
+ "36": "B-SYM",
49
+ "37": "B-VERB",
50
+ "38": "B-VERB+AUX",
51
+ "39": "B-VERB+AUX+AUX",
52
+ "40": "B-VERB+AUX+NOUN",
53
+ "41": "B-VERB+AUX+PART",
54
+ "42": "B-VERB+AUX+SCONJ",
55
+ "43": "B-VERB+AUX|Polarity=Neg",
56
+ "44": "B-X",
57
+ "45": "CCONJ",
58
+ "46": "DET",
59
+ "47": "I-ADJ",
60
+ "48": "I-ADJ+ADJ",
61
+ "49": "I-ADJ+AUX+AUX",
62
+ "50": "I-ADJ+VERB",
63
+ "51": "I-ADP",
64
+ "52": "I-ADP+NOUN",
65
+ "53": "I-ADV",
66
+ "54": "I-ADV+ADP",
67
+ "55": "I-ADV+ADP+VERB",
68
+ "56": "I-AUX",
69
+ "57": "I-AUX+AUX",
70
+ "58": "I-AUX+AUX+AUX",
71
+ "59": "I-AUX+AUX+PART",
72
+ "60": "I-AUX+SCONJ",
73
+ "61": "I-AUX|Polarity=Neg",
74
+ "62": "I-AUX|Polarity=Neg+VERB",
75
+ "63": "I-CCONJ",
76
+ "64": "I-DET",
77
+ "65": "I-INTJ",
78
+ "66": "I-NOUN",
79
+ "67": "I-NOUN+ADJ",
80
+ "68": "I-NOUN+ADP",
81
+ "69": "I-NOUN|Polarity=Neg",
82
+ "70": "I-NUM",
83
+ "71": "I-PART",
84
+ "72": "I-PRON",
85
+ "73": "I-PROPN",
86
+ "74": "I-PUNCT",
87
+ "75": "I-SCONJ",
88
+ "76": "I-SYM",
89
+ "77": "I-VERB",
90
+ "78": "I-VERB+AUX",
91
+ "79": "I-VERB+AUX+AUX",
92
+ "80": "I-VERB+AUX+NOUN",
93
+ "81": "I-VERB+AUX+PART",
94
+ "82": "I-VERB+AUX+SCONJ",
95
+ "83": "I-VERB+AUX|Polarity=Neg",
96
+ "84": "I-X",
97
+ "85": "NOUN",
98
+ "86": "NOUN+ADP",
99
+ "87": "NOUN+AUX",
100
+ "88": "NUM",
101
+ "89": "PART",
102
+ "90": "PRON",
103
+ "91": "PRON+ADP",
104
+ "92": "PROPN",
105
+ "93": "PUNCT",
106
+ "94": "SCONJ",
107
+ "95": "SYM",
108
+ "96": "VERB",
109
+ "97": "VERB+AUX",
110
+ "98": "VERB+AUX+PART",
111
+ "99": "VERB+AUX|Polarity=Neg",
112
+ "100": "X"
113
  },
114
  "initializer_range": 0.02,
115
  "intermediate_size": 4096,
119
  "ADP": 2,
120
  "ADV": 3,
121
  "AUX": 4,
122
+ "AUX|Polarity=Neg": 5,
123
+ "AUX|Polarity=Neg+VERB": 6,
124
+ "B-ADJ": 7,
125
+ "B-ADJ+ADJ": 8,
126
+ "B-ADJ+AUX+AUX": 9,
127
+ "B-ADJ+VERB": 10,
128
+ "B-ADP": 11,
129
+ "B-ADP+NOUN": 12,
130
+ "B-ADV": 13,
131
+ "B-ADV+ADP": 14,
132
+ "B-ADV+ADP+VERB": 15,
133
+ "B-AUX": 16,
134
+ "B-AUX+AUX": 17,
135
+ "B-AUX+AUX+AUX": 18,
136
+ "B-AUX+AUX+PART": 19,
137
+ "B-AUX+SCONJ": 20,
138
+ "B-AUX|Polarity=Neg": 21,
139
+ "B-AUX|Polarity=Neg+VERB": 22,
140
+ "B-CCONJ": 23,
141
+ "B-DET": 24,
142
+ "B-INTJ": 25,
143
+ "B-NOUN": 26,
144
+ "B-NOUN+ADJ": 27,
145
+ "B-NOUN+ADP": 28,
146
+ "B-NOUN|Polarity=Neg": 29,
147
+ "B-NUM": 30,
148
+ "B-PART": 31,
149
+ "B-PRON": 32,
150
+ "B-PROPN": 33,
151
+ "B-PUNCT": 34,
152
+ "B-SCONJ": 35,
153
+ "B-SYM": 36,
154
+ "B-VERB": 37,
155
+ "B-VERB+AUX": 38,
156
+ "B-VERB+AUX+AUX": 39,
157
+ "B-VERB+AUX+NOUN": 40,
158
+ "B-VERB+AUX+PART": 41,
159
+ "B-VERB+AUX+SCONJ": 42,
160
+ "B-VERB+AUX|Polarity=Neg": 43,
161
+ "B-X": 44,
162
+ "CCONJ": 45,
163
+ "DET": 46,
164
+ "I-ADJ": 47,
165
+ "I-ADJ+ADJ": 48,
166
+ "I-ADJ+AUX+AUX": 49,
167
+ "I-ADJ+VERB": 50,
168
+ "I-ADP": 51,
169
+ "I-ADP+NOUN": 52,
170
+ "I-ADV": 53,
171
+ "I-ADV+ADP": 54,
172
+ "I-ADV+ADP+VERB": 55,
173
+ "I-AUX": 56,
174
+ "I-AUX+AUX": 57,
175
+ "I-AUX+AUX+AUX": 58,
176
+ "I-AUX+AUX+PART": 59,
177
+ "I-AUX+SCONJ": 60,
178
+ "I-AUX|Polarity=Neg": 61,
179
+ "I-AUX|Polarity=Neg+VERB": 62,
180
+ "I-CCONJ": 63,
181
+ "I-DET": 64,
182
+ "I-INTJ": 65,
183
+ "I-NOUN": 66,
184
+ "I-NOUN+ADJ": 67,
185
+ "I-NOUN+ADP": 68,
186
+ "I-NOUN|Polarity=Neg": 69,
187
+ "I-NUM": 70,
188
+ "I-PART": 71,
189
+ "I-PRON": 72,
190
+ "I-PROPN": 73,
191
+ "I-PUNCT": 74,
192
+ "I-SCONJ": 75,
193
+ "I-SYM": 76,
194
+ "I-VERB": 77,
195
+ "I-VERB+AUX": 78,
196
+ "I-VERB+AUX+AUX": 79,
197
+ "I-VERB+AUX+NOUN": 80,
198
+ "I-VERB+AUX+PART": 81,
199
+ "I-VERB+AUX+SCONJ": 82,
200
+ "I-VERB+AUX|Polarity=Neg": 83,
201
+ "I-X": 84,
202
+ "NOUN": 85,
203
+ "NOUN+ADP": 86,
204
+ "NOUN+AUX": 87,
205
+ "NUM": 88,
206
+ "PART": 89,
207
+ "PRON": 90,
208
+ "PRON+ADP": 91,
209
+ "PROPN": 92,
210
+ "PUNCT": 93,
211
+ "SCONJ": 94,
212
+ "SYM": 95,
213
+ "VERB": 96,
214
+ "VERB+AUX": 97,
215
+ "VERB+AUX+PART": 98,
216
+ "VERB+AUX|Polarity=Neg": 99,
217
+ "X": 100
218
  },
219
  "layer_norm_eps": 1e-07,
220
  "max_position_embeddings": 512,
318
  "\u3051\u3069"
319
  ]
320
  },
321
+ "AUX|Polarity=Neg+VERB": {
322
  "\u306a\u304f\u306a\u3063": [
323
  "\u306a\u304f",
324
  "\u306a\u3063"
393
  "\u304a\u3053\u306a\u308f",
394
  "\u308c"
395
  ],
 
 
 
 
396
  "\u3055\u3089\u308f\u308c": [
397
  "\u3055\u3089\u308f",
398
  "\u308c"
417
  "\u306a\u3058\u307e",
418
  "\u305b"
419
  ],
 
 
 
 
420
  "\u307f\u306a\u3055\u308c": [
421
  "\u307f\u306a\u3055",
422
  "\u308c"
469
  "\u548c\u307e",
470
  "\u305b"
471
  ],
 
 
 
 
472
  "\u554f\u308f\u308c": [
473
  "\u554f\u308f",
474
  "\u308c"
537
  "\u679c\u305f\u3059",
538
  "\u3079\u304f"
539
  ],
 
 
 
 
540
  "\u6b4c\u308f\u308c": [
541
  "\u6b4c\u308f",
542
  "\u308c"
577
  "\u884c\u3063",
578
  "\u3061\u3083\u3044"
579
  ],
 
 
 
 
580
  "\u884c\u308f\u308c": [
581
  "\u884c\u308f",
582
  "\u308c"
719
  "\u305f",
720
  "\u305f\u3081\u306b"
721
  ]
722
+ },
723
+ "VERB+AUX|Polarity=Neg": {
724
+ "\u304b\u307e\u308f\u305a": [
725
+ "\u304b\u307e\u308f",
726
+ "\u305a"
727
+ ],
728
+ "\u306a\u3089\u305a": [
729
+ "\u306a\u3089",
730
+ "\u305a"
731
+ ],
732
+ "\u554f\u308f\u305a": [
733
+ "\u554f\u308f",
734
+ "\u305a"
735
+ ],
736
+ "\u69cb\u308f\u305a": [
737
+ "\u69cb\u308f",
738
+ "\u305a"
739
+ ],
740
+ "\u884c\u308f\u305a": [
741
+ "\u884c\u308f",
742
+ "\u305a"
743
+ ]
744
  }
745
  }
746
  },
747
  "tokenizer_class": "DebertaV2TokenizerFast",
748
  "torch_dtype": "float32",
749
+ "transformers_version": "4.19.4",
750
  "type_vocab_size": 0,
751
  "vocab_size": 32000
752
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:967ca0bfeb703b0f683e69f84c37ad318b3fe0b80b19e5f8af34e667a8e6acec
3
- size 1342932979
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:141c5df5ee7bd2676fea778d6ac7a97bfbdde0c5920254ba4afd91ad1ad65276
3
+ size 1342965811
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b414f2e7b443f8e2b4c5e40e684fe1c2c9791e4a863cf60a840354c7cc47ea4
3
- size 1391428843
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea810c64722637e4da616ecda3a31fe87001a296f7153b3dd546b82ca3990584
3
+ size 1391428779