KoichiYasuoka commited on
Commit
9194e7b
1 Parent(s): cf5f74d

model improved

Browse files
Files changed (4) hide show
  1. config.json +302 -103
  2. pytorch_model.bin +2 -2
  3. supar.model +2 -2
  4. tokenizer_config.json +1 -1
config.json CHANGED
@@ -8,112 +8,176 @@
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 1024,
10
  "id2label": {
11
- "0": "B-SYM",
12
- "1": "B-INTJ",
13
- "2": "I-ADP",
14
- "3": "I-X",
15
- "4": "ADP",
16
- "5": "PUNCT",
17
- "6": "B-VERB",
18
- "7": "I-VERB",
19
- "8": "I-NUM",
20
- "9": "VERB",
21
- "10": "PRON",
22
- "11": "I-DET",
23
- "12": "B-ADP",
24
- "13": "PROPN",
25
- "14": "I-PUNCT",
26
- "15": "I-CCONJ",
27
- "16": "NUM",
28
- "17": "I-INTJ",
29
- "18": "AUX",
30
- "19": "INTJ",
31
- "20": "CCONJ",
32
- "21": "I-PRON",
33
- "22": "B-CCONJ",
34
- "23": "X",
35
- "24": "B-PUNCT",
36
- "25": "I-SYM",
37
- "26": "I-SCONJ",
38
- "27": "SCONJ",
39
- "28": "NOUN",
40
- "29": "DET",
41
- "30": "ADV",
42
- "31": "PART",
43
- "32": "B-PRON",
44
- "33": "I-AUX",
45
- "34": "B-NUM",
46
- "35": "I-ADJ",
47
- "36": "B-SCONJ",
48
- "37": "I-PART",
49
- "38": "I-NOUN",
50
- "39": "I-ADV",
51
- "40": "ADJ",
52
- "41": "B-X",
53
- "42": "B-AUX",
54
- "43": "B-PROPN",
55
- "44": "B-DET",
56
- "45": "B-ADV",
57
- "46": "I-PROPN",
58
- "47": "B-NOUN",
59
- "48": "SYM",
60
- "49": "B-PART",
61
- "50": "B-ADJ"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  },
63
  "initializer_range": 0.02,
64
  "intermediate_size": 4096,
65
  "label2id": {
66
- "ADJ": 40,
67
- "ADP": 4,
68
- "ADV": 30,
69
- "AUX": 18,
70
- "B-ADJ": 50,
71
- "B-ADP": 12,
72
- "B-ADV": 45,
73
- "B-AUX": 42,
74
- "B-CCONJ": 22,
75
- "B-DET": 44,
76
- "B-INTJ": 1,
77
- "B-NOUN": 47,
78
- "B-NUM": 34,
79
- "B-PART": 49,
80
- "B-PRON": 32,
81
- "B-PROPN": 43,
82
- "B-PUNCT": 24,
83
- "B-SCONJ": 36,
84
- "B-SYM": 0,
85
- "B-VERB": 6,
86
- "B-X": 41,
87
- "CCONJ": 20,
88
- "DET": 29,
89
- "I-ADJ": 35,
90
- "I-ADP": 2,
91
- "I-ADV": 39,
92
- "I-AUX": 33,
93
- "I-CCONJ": 15,
94
- "I-DET": 11,
95
- "I-INTJ": 17,
96
- "I-NOUN": 38,
97
- "I-NUM": 8,
98
- "I-PART": 37,
99
- "I-PRON": 21,
100
- "I-PROPN": 46,
101
- "I-PUNCT": 14,
102
- "I-SCONJ": 26,
103
- "I-SYM": 25,
104
- "I-VERB": 7,
105
- "I-X": 3,
106
- "INTJ": 19,
107
- "NOUN": 28,
108
- "NUM": 16,
109
- "PART": 31,
110
- "PRON": 10,
111
- "PROPN": 13,
112
- "PUNCT": 5,
113
- "SCONJ": 27,
114
- "SYM": 48,
115
- "VERB": 9,
116
- "X": 23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  },
118
  "layer_norm_eps": 1e-12,
119
  "max_position_embeddings": 512,
@@ -122,9 +186,144 @@
122
  "num_hidden_layers": 24,
123
  "pad_token_id": 0,
124
  "position_embedding_type": "absolute",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  "tokenizer_class": "BertJapaneseTokenizer",
126
  "torch_dtype": "float32",
127
- "transformers_version": "4.11.3",
128
  "type_vocab_size": 2,
129
  "use_cache": true,
130
  "vocab_size": 32768
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 1024,
10
  "id2label": {
11
+ "0": "ADJ",
12
+ "1": "ADJ+VERB",
13
+ "2": "ADP",
14
+ "3": "ADP+ADP",
15
+ "4": "ADP+VERB",
16
+ "5": "ADV",
17
+ "6": "AUX",
18
+ "7": "B-ADJ",
19
+ "8": "B-ADJ+VERB",
20
+ "9": "B-ADP",
21
+ "10": "B-ADP+ADJ",
22
+ "11": "B-ADP+NOUN+ADP",
23
+ "12": "B-ADV",
24
+ "13": "B-AUX",
25
+ "14": "B-AUX+AUX",
26
+ "15": "B-AUX+NOUN",
27
+ "16": "B-CCONJ",
28
+ "17": "B-DET",
29
+ "18": "B-INTJ",
30
+ "19": "B-NOUN",
31
+ "20": "B-NOUN+ADP",
32
+ "21": "B-NOUN+NOUN",
33
+ "22": "B-NOUN+PUNCT",
34
+ "23": "B-NUM",
35
+ "24": "B-NUM+NOUN",
36
+ "25": "B-PART",
37
+ "26": "B-PRON",
38
+ "27": "B-PROPN",
39
+ "28": "B-PROPN+ADP",
40
+ "29": "B-PROPN+PROPN",
41
+ "30": "B-PUNCT",
42
+ "31": "B-SCONJ",
43
+ "32": "B-SYM",
44
+ "33": "B-VERB",
45
+ "34": "B-VERB+AUX",
46
+ "35": "B-VERB+SCONJ",
47
+ "36": "B-X",
48
+ "37": "CCONJ",
49
+ "38": "DET",
50
+ "39": "DET+NOUN",
51
+ "40": "I-ADJ",
52
+ "41": "I-ADJ+VERB",
53
+ "42": "I-ADP",
54
+ "43": "I-ADP+ADJ",
55
+ "44": "I-ADP+NOUN+ADP",
56
+ "45": "I-ADV",
57
+ "46": "I-AUX",
58
+ "47": "I-AUX+AUX",
59
+ "48": "I-AUX+NOUN",
60
+ "49": "I-CCONJ",
61
+ "50": "I-DET",
62
+ "51": "I-INTJ",
63
+ "52": "I-NOUN",
64
+ "53": "I-NOUN+ADP",
65
+ "54": "I-NOUN+NOUN",
66
+ "55": "I-NOUN+PUNCT",
67
+ "56": "I-NUM",
68
+ "57": "I-NUM+NOUN",
69
+ "58": "I-PART",
70
+ "59": "I-PRON",
71
+ "60": "I-PROPN",
72
+ "61": "I-PROPN+ADP",
73
+ "62": "I-PROPN+PROPN",
74
+ "63": "I-PUNCT",
75
+ "64": "I-SCONJ",
76
+ "65": "I-SYM",
77
+ "66": "I-VERB",
78
+ "67": "I-VERB+AUX",
79
+ "68": "I-VERB+SCONJ",
80
+ "69": "I-X",
81
+ "70": "INTJ",
82
+ "71": "NOUN",
83
+ "72": "NUM",
84
+ "73": "PART",
85
+ "74": "PRON",
86
+ "75": "PROPN",
87
+ "76": "PUNCT",
88
+ "77": "SCONJ",
89
+ "78": "SYM",
90
+ "79": "SYM+PUNCT",
91
+ "80": "VERB",
92
+ "81": "VERB+AUX",
93
+ "82": "X"
94
  },
95
  "initializer_range": 0.02,
96
  "intermediate_size": 4096,
97
  "label2id": {
98
+ "ADJ": 0,
99
+ "ADJ+VERB": 1,
100
+ "ADP": 2,
101
+ "ADP+ADP": 3,
102
+ "ADP+VERB": 4,
103
+ "ADV": 5,
104
+ "AUX": 6,
105
+ "B-ADJ": 7,
106
+ "B-ADJ+VERB": 8,
107
+ "B-ADP": 9,
108
+ "B-ADP+ADJ": 10,
109
+ "B-ADP+NOUN+ADP": 11,
110
+ "B-ADV": 12,
111
+ "B-AUX": 13,
112
+ "B-AUX+AUX": 14,
113
+ "B-AUX+NOUN": 15,
114
+ "B-CCONJ": 16,
115
+ "B-DET": 17,
116
+ "B-INTJ": 18,
117
+ "B-NOUN": 19,
118
+ "B-NOUN+ADP": 20,
119
+ "B-NOUN+NOUN": 21,
120
+ "B-NOUN+PUNCT": 22,
121
+ "B-NUM": 23,
122
+ "B-NUM+NOUN": 24,
123
+ "B-PART": 25,
124
+ "B-PRON": 26,
125
+ "B-PROPN": 27,
126
+ "B-PROPN+ADP": 28,
127
+ "B-PROPN+PROPN": 29,
128
+ "B-PUNCT": 30,
129
+ "B-SCONJ": 31,
130
+ "B-SYM": 32,
131
+ "B-VERB": 33,
132
+ "B-VERB+AUX": 34,
133
+ "B-VERB+SCONJ": 35,
134
+ "B-X": 36,
135
+ "CCONJ": 37,
136
+ "DET": 38,
137
+ "DET+NOUN": 39,
138
+ "I-ADJ": 40,
139
+ "I-ADJ+VERB": 41,
140
+ "I-ADP": 42,
141
+ "I-ADP+ADJ": 43,
142
+ "I-ADP+NOUN+ADP": 44,
143
+ "I-ADV": 45,
144
+ "I-AUX": 46,
145
+ "I-AUX+AUX": 47,
146
+ "I-AUX+NOUN": 48,
147
+ "I-CCONJ": 49,
148
+ "I-DET": 50,
149
+ "I-INTJ": 51,
150
+ "I-NOUN": 52,
151
+ "I-NOUN+ADP": 53,
152
+ "I-NOUN+NOUN": 54,
153
+ "I-NOUN+PUNCT": 55,
154
+ "I-NUM": 56,
155
+ "I-NUM+NOUN": 57,
156
+ "I-PART": 58,
157
+ "I-PRON": 59,
158
+ "I-PROPN": 60,
159
+ "I-PROPN+ADP": 61,
160
+ "I-PROPN+PROPN": 62,
161
+ "I-PUNCT": 63,
162
+ "I-SCONJ": 64,
163
+ "I-SYM": 65,
164
+ "I-VERB": 66,
165
+ "I-VERB+AUX": 67,
166
+ "I-VERB+SCONJ": 68,
167
+ "I-X": 69,
168
+ "INTJ": 70,
169
+ "NOUN": 71,
170
+ "NUM": 72,
171
+ "PART": 73,
172
+ "PRON": 74,
173
+ "PROPN": 75,
174
+ "PUNCT": 76,
175
+ "SCONJ": 77,
176
+ "SYM": 78,
177
+ "SYM+PUNCT": 79,
178
+ "VERB": 80,
179
+ "VERB+AUX": 81,
180
+ "X": 82
181
  },
182
  "layer_norm_eps": 1e-12,
183
  "max_position_embeddings": 512,
186
  "num_hidden_layers": 24,
187
  "pad_token_id": 0,
188
  "position_embedding_type": "absolute",
189
+ "task_specific_params": {
190
+ "upos_multiword": {
191
+ "ADJ+VERB": {
192
+ "\u7a0b\u306a\u304f\u3057": [
193
+ "\u7a0b\u306a\u304f",
194
+ "\u3057"
195
+ ]
196
+ },
197
+ "ADP+ADJ": {
198
+ "\u306f\u3089\u30fb\u3080\u3046\u3093\u4f5c\u54c1\u5171\u901a": [
199
+ "\u306f",
200
+ "\u3089\u30fb\u3080\u3046\u3093\u4f5c\u54c1\u5171\u901a"
201
+ ]
202
+ },
203
+ "ADP+ADP": {
204
+ "\u3068\u3082": [
205
+ "\u3068",
206
+ "\u3082"
207
+ ]
208
+ },
209
+ "ADP+NOUN+ADP": {
210
+ "\u306e\u307f\u305d\u306e": [
211
+ "\u306e",
212
+ "\u307f\u305d",
213
+ "\u306e"
214
+ ]
215
+ },
216
+ "ADP+VERB": {
217
+ "\u3067\u304d": [
218
+ "\u3067",
219
+ "\u304d"
220
+ ],
221
+ "\u3067\u3057": [
222
+ "\u3067",
223
+ "\u3057"
224
+ ],
225
+ "\u306f\u3057": [
226
+ "\u306f",
227
+ "\u3057"
228
+ ]
229
+ },
230
+ "AUX+AUX": {
231
+ "\u30c1\u30e3\u30c3\u30bf": [
232
+ "\u30c1\u30e3\u30c3",
233
+ "\u30bf"
234
+ ]
235
+ },
236
+ "AUX+NOUN": {
237
+ "\u306a\u304a\u5024\u6bb5": [
238
+ "\u306a",
239
+ "\u304a\u5024\u6bb5"
240
+ ],
241
+ "\u306a\u304a\u5e03\u65bd": [
242
+ "\u306a",
243
+ "\u304a\u5e03\u65bd"
244
+ ]
245
+ },
246
+ "DET+NOUN": {
247
+ "\u3053\u306e\u9803": [
248
+ "\u3053\u306e",
249
+ "\u9803"
250
+ ]
251
+ },
252
+ "NOUN+ADP": {
253
+ "\u3046\u305d\u306e": [
254
+ "\u3046\u305d",
255
+ "\u306e"
256
+ ]
257
+ },
258
+ "NOUN+NOUN": {
259
+ "\u9ce5\u53d6\u770c\u6559\u80b2\u59d4\u54e1\u4f1a\u793e\u4f1a\u6559\u80b2\u59d4\u54e1": [
260
+ "\u9ce5\u53d6\u770c\u6559\u80b2\u59d4\u54e1\u4f1a",
261
+ "\u793e\u4f1a\u6559\u80b2\u59d4\u54e1"
262
+ ]
263
+ },
264
+ "NOUN+PUNCT": {
265
+ "\u5b87\u5b99\u4ea4\u97ff\u8a69-\u300f": [
266
+ "\u5b87\u5b99\u4ea4\u97ff\u8a69-",
267
+ "\u300f"
268
+ ],
269
+ "\u7259\u72fc-GARO-\u300f": [
270
+ "\u7259\u72fc-GARO-",
271
+ "\u300f"
272
+ ]
273
+ },
274
+ "NUM+NOUN": {
275
+ "3.0LV6\u30a8\u30f3\u30b8\u30f3": [
276
+ "3.0L",
277
+ "V6\u30a8\u30f3\u30b8\u30f3"
278
+ ],
279
+ "4.7LV8\u30a8\u30f3\u30b8\u30f3": [
280
+ "4.7L",
281
+ "V8\u30a8\u30f3\u30b8\u30f3"
282
+ ]
283
+ },
284
+ "PROPN+ADP": {
285
+ "\u3061\u307b\u306e": [
286
+ "\u3061\u307b",
287
+ "\u306e"
288
+ ],
289
+ "\u3088\u3090\u3053\u306e": [
290
+ "\u3088\u3090\u3053",
291
+ "\u306e"
292
+ ]
293
+ },
294
+ "PROPN+PROPN": {
295
+ "\u5e83\u5cf6\u5e02\u4e2d\u533a": [
296
+ "\u5e83\u5cf6\u5e02",
297
+ "\u4e2d\u533a"
298
+ ]
299
+ },
300
+ "SYM+PUNCT": {
301
+ "-)": [
302
+ "-",
303
+ ")"
304
+ ]
305
+ },
306
+ "VERB+AUX": {
307
+ "\u305f\u3063\u305f": [
308
+ "\u305f\u3063",
309
+ "\u305f"
310
+ ],
311
+ "\u5c0f\u5c4b\u639b\u3051\u3055\u305b": [
312
+ "\u5c0f\u5c4b\u639b\u3051\u3055",
313
+ "\u305b"
314
+ ]
315
+ },
316
+ "VERB+SCONJ": {
317
+ "\u8cb8\u5207\u3063\u3066": [
318
+ "\u8cb8\u5207\u3063",
319
+ "\u3066"
320
+ ]
321
+ }
322
+ }
323
+ },
324
  "tokenizer_class": "BertJapaneseTokenizer",
325
  "torch_dtype": "float32",
326
+ "transformers_version": "4.19.2",
327
  "type_vocab_size": 2,
328
  "use_cache": true,
329
  "vocab_size": 32768
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c26b02e8926191ac7f7fb3340d3d286e08fe213b8b34a0dafe120ad0b2f00bec
3
- size 1345944210
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215468db9aeba6a4f9bfc28f4e7ea62924a41734f78c6454114ac3edbd0826b4
3
+ size 1346049874
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2e1383c4cbb6c4a5f2a2a09cb8a89e726415291e030194772840dbcc68f68d9
3
- size 1399761610
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638e2ba0224ebebca9801f7a47e3327cae29e0a2889e40bc5eb61642d2efb903
3
+ size 1398774350
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "do_lower_case": false, "do_word_tokenize": true, "do_subword_tokenize": true, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "wordpiece", "never_split": null, "mecab_kwargs": {"mecab_dic": "unidic_lite"}, "model_max_length": 512, "tokenizer_class": "BertJapaneseTokenizer"}
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "do_lower_case": false, "do_word_tokenize": true, "do_subword_tokenize": true, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "wordpiece", "never_split": null, "mecab_kwargs": {"mecab_dic": "unidic_lite"}, "model_max_length": 512, "tokenizer_class": "BertJapaneseTokenizerFast"}