KoichiYasuoka commited on
Commit
d4b2510
1 Parent(s): 9dd6ecd

model improved

Browse files
Files changed (5) hide show
  1. README.md +1 -0
  2. config.json +495 -125
  3. pytorch_model.bin +2 -2
  4. supar.model +2 -2
  5. tokenizer.json +0 -0
README.md CHANGED
@@ -9,6 +9,7 @@ tags:
9
  - "dependency-parsing"
10
  datasets:
11
  - "universal_dependencies"
 
12
  license: "cc-by-sa-4.0"
13
  pipeline_tag: "token-classification"
14
  widget:
 
9
  - "dependency-parsing"
10
  datasets:
11
  - "universal_dependencies"
12
+ - "ukr-models/Ukr-Synth"
13
  license: "cc-by-sa-4.0"
14
  pipeline_tag: "token-classification"
15
  widget:
config.json CHANGED
@@ -11,135 +11,233 @@
11
  "hidden_size": 768,
12
  "id2label": {
13
  "0": "ADJ",
14
- "1": "ADP",
15
- "2": "ADV",
16
- "3": "AUX",
17
- "4": "B-ADJ",
18
- "5": "B-ADP",
19
- "6": "B-ADV",
20
- "7": "B-AUX",
21
- "8": "B-CCONJ",
22
- "9": "B-DET",
23
- "10": "B-INTJ",
24
- "11": "B-NOUN",
25
- "12": "B-NOUN+NUM",
26
- "13": "B-NUM",
27
- "14": "B-NUM+NOUN",
28
- "15": "B-PART",
29
- "16": "B-PRON",
30
- "17": "B-PROPN",
31
- "18": "B-PUNCT",
32
- "19": "B-SCONJ",
33
- "20": "B-SYM",
34
- "21": "B-VERB",
35
- "22": "B-VERB+ADV",
36
- "23": "B-VERB+PRON",
37
- "24": "B-X",
38
- "25": "CCONJ",
39
- "26": "DET",
40
- "27": "I-ADJ",
41
- "28": "I-ADP",
42
- "29": "I-ADV",
43
- "30": "I-AUX",
44
- "31": "I-CCONJ",
45
- "32": "I-DET",
46
- "33": "I-INTJ",
47
- "34": "I-NOUN",
48
- "35": "I-NOUN+NUM",
49
- "36": "I-NUM",
50
- "37": "I-NUM+NOUN",
51
- "38": "I-PART",
52
- "39": "I-PRON",
53
- "40": "I-PROPN",
54
- "41": "I-PUNCT",
55
- "42": "I-SCONJ",
56
- "43": "I-SYM",
57
- "44": "I-VERB",
58
- "45": "I-VERB+ADV",
59
- "46": "I-VERB+PRON",
60
- "47": "I-X",
61
- "48": "INTJ",
62
- "49": "NOUN",
63
- "50": "NOUN+NUM",
64
- "51": "NUM",
65
- "52": "NUM+NOUN",
66
- "53": "PART",
67
- "54": "PRON",
68
- "55": "PROPN",
69
- "56": "PUNCT",
70
- "57": "SCONJ",
71
- "58": "SYM",
72
- "59": "VERB",
73
- "60": "VERB+ADV",
74
- "61": "VERB+PRON",
75
- "62": "X"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  },
77
  "initializer_range": 0.02,
78
  "intermediate_size": 3072,
79
  "label2id": {
80
  "ADJ": 0,
81
- "ADP": 1,
82
- "ADV": 2,
83
- "AUX": 3,
84
- "B-ADJ": 4,
85
- "B-ADP": 5,
86
- "B-ADV": 6,
87
- "B-AUX": 7,
88
- "B-CCONJ": 8,
89
- "B-DET": 9,
90
- "B-INTJ": 10,
91
- "B-NOUN": 11,
92
- "B-NOUN+NUM": 12,
93
- "B-NUM": 13,
94
- "B-NUM+NOUN": 14,
95
- "B-PART": 15,
96
- "B-PRON": 16,
97
- "B-PROPN": 17,
98
- "B-PUNCT": 18,
99
- "B-SCONJ": 19,
100
- "B-SYM": 20,
101
- "B-VERB": 21,
102
- "B-VERB+ADV": 22,
103
- "B-VERB+PRON": 23,
104
- "B-X": 24,
105
- "CCONJ": 25,
106
- "DET": 26,
107
- "I-ADJ": 27,
108
- "I-ADP": 28,
109
- "I-ADV": 29,
110
- "I-AUX": 30,
111
- "I-CCONJ": 31,
112
- "I-DET": 32,
113
- "I-INTJ": 33,
114
- "I-NOUN": 34,
115
- "I-NOUN+NUM": 35,
116
- "I-NUM": 36,
117
- "I-NUM+NOUN": 37,
118
- "I-PART": 38,
119
- "I-PRON": 39,
120
- "I-PROPN": 40,
121
- "I-PUNCT": 41,
122
- "I-SCONJ": 42,
123
- "I-SYM": 43,
124
- "I-VERB": 44,
125
- "I-VERB+ADV": 45,
126
- "I-VERB+PRON": 46,
127
- "I-X": 47,
128
- "INTJ": 48,
129
- "NOUN": 49,
130
- "NOUN+NUM": 50,
131
- "NUM": 51,
132
- "NUM+NOUN": 52,
133
- "PART": 53,
134
- "PRON": 54,
135
- "PROPN": 55,
136
- "PUNCT": 56,
137
- "SCONJ": 57,
138
- "SYM": 58,
139
- "VERB": 59,
140
- "VERB+ADV": 60,
141
- "VERB+PRON": 61,
142
- "X": 62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  },
144
  "layer_norm_eps": 1e-12,
145
  "max_position_embeddings": 512,
@@ -150,6 +248,203 @@
150
  "position_embedding_type": "absolute",
151
  "task_specific_params": {
152
  "upos_multiword": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  "NUM+NOUN": {
154
  "\u043f\u0456\u0432'\u044f\u0440\u0434\u0430": [
155
  "\u043f\u0456\u0432",
@@ -172,6 +467,36 @@
172
  "\u044f\u0449\u0438\u043a\u0430"
173
  ]
174
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  "VERB+ADV": {
176
  "\u043d\u0456\u0434\u0435": [
177
  "\u043d\u0435\u043c\u0430\u0454",
@@ -192,6 +517,34 @@
192
  "\u043d\u0456\u044f\u043a": [
193
  "\u043d\u0435\u043c\u0430\u0454",
194
  "\u044f\u043a"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  ]
196
  },
197
  "VERB+PRON": {
@@ -211,12 +564,29 @@
211
  "\u043d\u0435\u043c\u0430\u0454",
212
  "\u0447\u0438\u043c"
213
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  }
215
  }
216
  },
217
  "tokenizer_class": "BertTokenizerFast",
218
  "torch_dtype": "float32",
219
- "transformers_version": "4.14.1",
220
  "type_vocab_size": 2,
221
  "use_cache": true,
222
  "vocab_size": 30000
 
11
  "hidden_size": 768,
12
  "id2label": {
13
  "0": "ADJ",
14
+ "1": "ADJ+ADJ",
15
+ "2": "ADJ+NOUN",
16
+ "3": "ADP",
17
+ "4": "ADP+NOUN",
18
+ "5": "ADV",
19
+ "6": "ADV+VERB",
20
+ "7": "AUX",
21
+ "8": "B-ADJ",
22
+ "9": "B-ADJ+NOUN",
23
+ "10": "B-ADP",
24
+ "11": "B-ADP+ADJ",
25
+ "12": "B-ADP+DET",
26
+ "13": "B-ADP+NOUN",
27
+ "14": "B-ADP+PROPN",
28
+ "15": "B-ADV",
29
+ "16": "B-ADV+ADJ",
30
+ "17": "B-AUX",
31
+ "18": "B-AUX+ADJ",
32
+ "19": "B-CCONJ",
33
+ "20": "B-DET",
34
+ "21": "B-DET+ADJ",
35
+ "22": "B-INTJ",
36
+ "23": "B-NOUN",
37
+ "24": "B-NOUN+ADJ",
38
+ "25": "B-NOUN+ADP",
39
+ "26": "B-NOUN+ADV+VERB",
40
+ "27": "B-NOUN+NUM",
41
+ "28": "B-NUM",
42
+ "29": "B-NUM+NOUN",
43
+ "30": "B-NUM+X",
44
+ "31": "B-PART",
45
+ "32": "B-PRON",
46
+ "33": "B-PROPN",
47
+ "34": "B-PROPN+NOUN",
48
+ "35": "B-PUNCT",
49
+ "36": "B-SCONJ",
50
+ "37": "B-SCONJ+ADJ",
51
+ "38": "B-SCONJ+NOUN",
52
+ "39": "B-SYM",
53
+ "40": "B-VERB",
54
+ "41": "B-VERB+ADP",
55
+ "42": "B-VERB+ADV",
56
+ "43": "B-VERB+NOUN",
57
+ "44": "B-VERB+PRON",
58
+ "45": "B-VERB+PROPN+PRON",
59
+ "46": "B-X",
60
+ "47": "CCONJ",
61
+ "48": "CCONJ+ADJ",
62
+ "49": "DET",
63
+ "50": "DET+NOUN",
64
+ "51": "I-ADJ",
65
+ "52": "I-ADJ+NOUN",
66
+ "53": "I-ADP",
67
+ "54": "I-ADP+ADJ",
68
+ "55": "I-ADP+DET",
69
+ "56": "I-ADP+NOUN",
70
+ "57": "I-ADP+PROPN",
71
+ "58": "I-ADV",
72
+ "59": "I-ADV+ADJ",
73
+ "60": "I-AUX",
74
+ "61": "I-AUX+ADJ",
75
+ "62": "I-CCONJ",
76
+ "63": "I-DET",
77
+ "64": "I-DET+ADJ",
78
+ "65": "I-INTJ",
79
+ "66": "I-NOUN",
80
+ "67": "I-NOUN+ADJ",
81
+ "68": "I-NOUN+ADP",
82
+ "69": "I-NOUN+ADV+VERB",
83
+ "70": "I-NOUN+NUM",
84
+ "71": "I-NUM",
85
+ "72": "I-NUM+NOUN",
86
+ "73": "I-NUM+X",
87
+ "74": "I-PART",
88
+ "75": "I-PRON",
89
+ "76": "I-PROPN",
90
+ "77": "I-PROPN+NOUN",
91
+ "78": "I-PUNCT",
92
+ "79": "I-SCONJ",
93
+ "80": "I-SCONJ+ADJ",
94
+ "81": "I-SCONJ+NOUN",
95
+ "82": "I-SYM",
96
+ "83": "I-VERB",
97
+ "84": "I-VERB+ADP",
98
+ "85": "I-VERB+ADV",
99
+ "86": "I-VERB+NOUN",
100
+ "87": "I-VERB+PRON",
101
+ "88": "I-VERB+PROPN+PRON",
102
+ "89": "I-X",
103
+ "90": "INTJ",
104
+ "91": "NOUN",
105
+ "92": "NOUN+ADJ",
106
+ "93": "NOUN+NOUN",
107
+ "94": "NOUN+NUM",
108
+ "95": "NOUN+PROPN",
109
+ "96": "NOUN+PUNCT",
110
+ "97": "NUM",
111
+ "98": "NUM+NOUN",
112
+ "99": "PART",
113
+ "100": "PRON",
114
+ "101": "PROPN",
115
+ "102": "PUNCT",
116
+ "103": "SCONJ",
117
+ "104": "SYM",
118
+ "105": "VERB",
119
+ "106": "VERB+ADV",
120
+ "107": "VERB+NOUN",
121
+ "108": "VERB+NUM",
122
+ "109": "VERB+PRON",
123
+ "110": "VERB+VERB",
124
+ "111": "X"
125
  },
126
  "initializer_range": 0.02,
127
  "intermediate_size": 3072,
128
  "label2id": {
129
  "ADJ": 0,
130
+ "ADJ+ADJ": 1,
131
+ "ADJ+NOUN": 2,
132
+ "ADP": 3,
133
+ "ADP+NOUN": 4,
134
+ "ADV": 5,
135
+ "ADV+VERB": 6,
136
+ "AUX": 7,
137
+ "B-ADJ": 8,
138
+ "B-ADJ+NOUN": 9,
139
+ "B-ADP": 10,
140
+ "B-ADP+ADJ": 11,
141
+ "B-ADP+DET": 12,
142
+ "B-ADP+NOUN": 13,
143
+ "B-ADP+PROPN": 14,
144
+ "B-ADV": 15,
145
+ "B-ADV+ADJ": 16,
146
+ "B-AUX": 17,
147
+ "B-AUX+ADJ": 18,
148
+ "B-CCONJ": 19,
149
+ "B-DET": 20,
150
+ "B-DET+ADJ": 21,
151
+ "B-INTJ": 22,
152
+ "B-NOUN": 23,
153
+ "B-NOUN+ADJ": 24,
154
+ "B-NOUN+ADP": 25,
155
+ "B-NOUN+ADV+VERB": 26,
156
+ "B-NOUN+NUM": 27,
157
+ "B-NUM": 28,
158
+ "B-NUM+NOUN": 29,
159
+ "B-NUM+X": 30,
160
+ "B-PART": 31,
161
+ "B-PRON": 32,
162
+ "B-PROPN": 33,
163
+ "B-PROPN+NOUN": 34,
164
+ "B-PUNCT": 35,
165
+ "B-SCONJ": 36,
166
+ "B-SCONJ+ADJ": 37,
167
+ "B-SCONJ+NOUN": 38,
168
+ "B-SYM": 39,
169
+ "B-VERB": 40,
170
+ "B-VERB+ADP": 41,
171
+ "B-VERB+ADV": 42,
172
+ "B-VERB+NOUN": 43,
173
+ "B-VERB+PRON": 44,
174
+ "B-VERB+PROPN+PRON": 45,
175
+ "B-X": 46,
176
+ "CCONJ": 47,
177
+ "CCONJ+ADJ": 48,
178
+ "DET": 49,
179
+ "DET+NOUN": 50,
180
+ "I-ADJ": 51,
181
+ "I-ADJ+NOUN": 52,
182
+ "I-ADP": 53,
183
+ "I-ADP+ADJ": 54,
184
+ "I-ADP+DET": 55,
185
+ "I-ADP+NOUN": 56,
186
+ "I-ADP+PROPN": 57,
187
+ "I-ADV": 58,
188
+ "I-ADV+ADJ": 59,
189
+ "I-AUX": 60,
190
+ "I-AUX+ADJ": 61,
191
+ "I-CCONJ": 62,
192
+ "I-DET": 63,
193
+ "I-DET+ADJ": 64,
194
+ "I-INTJ": 65,
195
+ "I-NOUN": 66,
196
+ "I-NOUN+ADJ": 67,
197
+ "I-NOUN+ADP": 68,
198
+ "I-NOUN+ADV+VERB": 69,
199
+ "I-NOUN+NUM": 70,
200
+ "I-NUM": 71,
201
+ "I-NUM+NOUN": 72,
202
+ "I-NUM+X": 73,
203
+ "I-PART": 74,
204
+ "I-PRON": 75,
205
+ "I-PROPN": 76,
206
+ "I-PROPN+NOUN": 77,
207
+ "I-PUNCT": 78,
208
+ "I-SCONJ": 79,
209
+ "I-SCONJ+ADJ": 80,
210
+ "I-SCONJ+NOUN": 81,
211
+ "I-SYM": 82,
212
+ "I-VERB": 83,
213
+ "I-VERB+ADP": 84,
214
+ "I-VERB+ADV": 85,
215
+ "I-VERB+NOUN": 86,
216
+ "I-VERB+PRON": 87,
217
+ "I-VERB+PROPN+PRON": 88,
218
+ "I-X": 89,
219
+ "INTJ": 90,
220
+ "NOUN": 91,
221
+ "NOUN+ADJ": 92,
222
+ "NOUN+NOUN": 93,
223
+ "NOUN+NUM": 94,
224
+ "NOUN+PROPN": 95,
225
+ "NOUN+PUNCT": 96,
226
+ "NUM": 97,
227
+ "NUM+NOUN": 98,
228
+ "PART": 99,
229
+ "PRON": 100,
230
+ "PROPN": 101,
231
+ "PUNCT": 102,
232
+ "SCONJ": 103,
233
+ "SYM": 104,
234
+ "VERB": 105,
235
+ "VERB+ADV": 106,
236
+ "VERB+NOUN": 107,
237
+ "VERB+NUM": 108,
238
+ "VERB+PRON": 109,
239
+ "VERB+VERB": 110,
240
+ "X": 111
241
  },
242
  "layer_norm_eps": 1e-12,
243
  "max_position_embeddings": 512,
 
248
  "position_embedding_type": "absolute",
249
  "task_specific_params": {
250
  "upos_multiword": {
251
+ "ADJ+ADJ": {
252
+ "\u041c\u043e\u043a\u0440\u043e\u043a\u0430\u043b\u0438\u0433\u0456\u0440\u0441\u044c\u043a\u0456\u0439\u043e\u0431\u02bc\u0454\u0434\u043d\u0430\u043d\u0456\u0439": [
253
+ "\u041c\u043e\u043a\u0440\u043e\u043a\u0430\u043b\u0438\u0433\u0456\u0440\u0441\u044c\u043a\u0456\u0439",
254
+ "\u043e\u0431\u02bc\u0454\u0434\u043d\u0430\u043d\u0456\u0439"
255
+ ]
256
+ },
257
+ "ADJ+NOUN": {
258
+ "27-\u0430\u043d\u0433\u043b\u0456\u0454\u0446\u044c": [
259
+ "27-\u0430\u043d\u0433\u043b\u0456",
260
+ "\u0454\u0446\u044c"
261
+ ],
262
+ "\u0433\u0440\u043e\u043c\u0430\u0434\u0441\u044c\u043a\u043e\u0433\u043e\u0437\u0434\u043e\u0440\u043e\u0432\u02bc\u044f": [
263
+ "\u0433\u0440\u043e\u043c\u0430\u0434\u0441\u044c\u043a\u043e\u0433\u043e",
264
+ "\u0437\u0434\u043e\u0440\u043e\u0432\u02bc\u044f"
265
+ ],
266
+ "\u043c\u0456\u0441\u044c\u043a\u0430\u043f\u0440\u043e\u043a\u0443\u0440\u0430\u0442\u0443\u0440\u0430\u041f\u02bc\u044f\u0442\u043d\u0438\u0446\u044f": [
267
+ "\u043c\u0456\u0441\u044c\u043a\u0430",
268
+ "\u043f\u0440\u043e\u043a\u0443\u0440\u0430\u0442\u0443\u0440\u0430\u041f\u02bc\u044f\u0442\u043d\u0438\u0446\u044f"
269
+ ],
270
+ "\u043f\u0435\u0440\u0441\u043e\u043d\u0430\u043b\u044c\u043d\u043e\u0433\u043e\u043a\u043e\u043c\u043f\u02bc\u044e\u0442\u0435\u0440\u0430": [
271
+ "\u043f\u0435\u0440\u0441\u043e\u043d\u0430\u043b\u044c\u043d\u043e\u0433\u043e",
272
+ "\u043a\u043e\u043c\u043f\u02bc\u044e\u0442\u0435\u0440\u0430"
273
+ ],
274
+ "\u043f\u0440\u0438\u0440\u043e\u0434\u043e\u043e\u0445\u043e\u0440\u043e\u043d\u043d\u0456\u0442\u0435\u0440\u0438\u0442\u043e\u0440\u0456\u0456": [
275
+ "\u043f\u0440\u0438\u0440\u043e\u0434\u043e\u043e\u0445\u043e\u0440\u043e\u043d\u043d\u0456",
276
+ "\u0442\u0435\u0440\u0438\u0442\u043e\u0440\u0456\u0456"
277
+ ],
278
+ "\u0447\u0435\u0440\u0432\u043e\u043d\u0443\u0433\u0430\u043d\u0447": [
279
+ "\u0447\u0435\u0440\u0432\u043e\u043d\u0443",
280
+ "\u0433\u0430\u043d\u0447"
281
+ ]
282
+ },
283
+ "ADP+ADJ": {
284
+ "\u0434\u043e\u0412\u0435\u0440\u0445\u043e\u0432\u043d\u043e\u0456": [
285
+ "\u0434\u043e",
286
+ "\u0412\u0435\u0440\u0445\u043e\u0432\u043d\u043e\u0456"
287
+ ],
288
+ "\u0434\u043e\u043a\u0440\u0438\u043c\u0456\u043d\u0430\u043b\u044c\u043d\u043e\u0456": [
289
+ "\u0434\u043e",
290
+ "\u043a\u0440\u0438\u043c\u0456\u043d\u0430\u043b\u044c\u043d\u043e\u0456"
291
+ ],
292
+ "\u0434\u043e\u043c\u0456\u043d\u0435\u0440\u0430\u043b\u0456\u0437\u043e\u0432\u0430\u043d\u043e\u0456": [
293
+ "\u0434\u043e",
294
+ "\u043c\u0456\u043d\u0435\u0440\u0430\u043b\u0456\u0437\u043e\u0432\u0430\u043d\u043e\u0456"
295
+ ],
296
+ "\u043d\u0430\u043c\u0430\u0439\u0431\u0443\u0442\u043d": [
297
+ "\u043d\u0430",
298
+ "\u043c\u0430\u0439\u0431\u0443\u0442\u043d"
299
+ ]
300
+ },
301
+ "ADP+DET": {
302
+ "\u043f\u043e\u0441\u0432\u043e\u0454\u043c\u0443": [
303
+ "\u043f\u043e",
304
+ "\u0441\u0432\u043e\u0454\u043c\u0443"
305
+ ]
306
+ },
307
+ "ADP+NOUN": {
308
+ "\u0434\u043e\u043d\u0435\u0437\u0430\u043b\u0435\u0436\u043d\u043e\u0441\u0442": [
309
+ "\u0434\u043e",
310
+ "\u043d\u0435\u0437\u0430\u043b\u0435\u0436\u043d\u043e\u0441\u0442"
311
+ ],
312
+ "\u0434\u043e\u043f\u043e\u0435\u0437": [
313
+ "\u0434\u043e",
314
+ "\u043f\u043e\u0435\u0437"
315
+ ],
316
+ "\u0434\u043e\u043f\u043e\u043b\u0456\u0446\u0456\u0456": [
317
+ "\u0434\u043e",
318
+ "\u043f\u043e\u043b\u0456\u0446\u0456\u0456"
319
+ ],
320
+ "\u0434\u043e\u0441\u044c\u043e\u0433\u043e\u0434\u043d": [
321
+ "\u0434\u043e",
322
+ "\u0441\u044c\u043e\u0433\u043e\u0434\u043d"
323
+ ],
324
+ "\u0437\u0430\u0441\u043d\u0443\u0432\u0430\u043d\u043d\u044f": [
325
+ "\u0437\u0430",
326
+ "\u0441\u043d\u0443\u0432\u0430\u043d\u043d\u044f"
327
+ ],
328
+ "\u043d\u0430\u0443\u0432\u0430\u0437": [
329
+ "\u043d\u0430",
330
+ "\u0443\u0432\u0430\u0437"
331
+ ],
332
+ "\u043f\u0440\u043e\u043e\u0431\u02bc\u0454\u043a\u0442": [
333
+ "\u043f\u0440\u043e",
334
+ "\u043e\u0431\u02bc\u0454\u043a\u0442"
335
+ ]
336
+ },
337
+ "ADP+PROPN": {
338
+ "\u0434\u043e\u0420\u043e\u0441\u0456\u0456": [
339
+ "\u0434\u043e",
340
+ "\u0420\u043e\u0441\u0456\u0456"
341
+ ]
342
+ },
343
+ "ADV+ADJ": {
344
+ "\u043e\u0434\u043d\u043e\u0441\u0442\u0430\u0439\u043d\u043e\u043f\u043e\u0432\u0438\u043d\u043d": [
345
+ "\u043e\u0434\u043d\u043e\u0441\u0442\u0430\u0439\u043d\u043e",
346
+ "\u043f\u043e\u0432\u0438\u043d\u043d"
347
+ ]
348
+ },
349
+ "ADV+VERB": {
350
+ "\u0437\u0430\u0440\u0430\u0437\u0437\u02bc\u044f\u0432\u0438\u043b\u0438\u0441\u044f": [
351
+ "\u0437\u0430\u0440\u0430\u0437",
352
+ "\u0437\u02bc\u044f\u0432\u0438\u043b\u0438\u0441\u044f"
353
+ ],
354
+ "\u0441\u0442\u0456\u043b\u044c\u043a\u0438\u0440\u043e\u0437\u02bc\u044f\u0441\u043d\u0438\u0442\u0438": [
355
+ "\u0441\u0442\u0456\u043b\u044c\u043a\u0438",
356
+ "\u0440\u043e\u0437\u02bc\u044f\u0441\u043d\u0438\u0442\u0438"
357
+ ],
358
+ "\u0442\u0443\u0442\u0437\u02bc\u044f\u0432\u0438\u0442\u044c\u0441\u044f": [
359
+ "\u0442\u0443\u0442",
360
+ "\u0437\u02bc\u044f\u0432\u0438\u0442\u044c\u0441\u044f"
361
+ ]
362
+ },
363
+ "AUX+ADJ": {
364
+ "\u0411\u0443\u0434\u044c\u0442\u0435\u043f\u0435\u0432\u043d": [
365
+ "\u0411\u0443\u0434\u044c\u0442\u0435",
366
+ "\u043f\u0435\u0432\u043d"
367
+ ]
368
+ },
369
+ "CCONJ+ADJ": {
370
+ "\u0430\u0431\u043e\u043a\u043e\u043c\u043f\u02bc\u044e\u0442\u0435\u0440\u043d\u0443": [
371
+ "\u0430\u0431\u043e",
372
+ "\u043a\u043e\u043c\u043f\u02bc\u044e\u0442\u0435\u0440\u043d\u0443"
373
+ ]
374
+ },
375
+ "DET+ADJ": {
376
+ "\u041e\u0442\u0430\u043a\u0435\u0441\u0442\u043e\u0440\u0438\u0447\u043d\u0435": [
377
+ "\u041e\u0442\u0430\u043a\u0435",
378
+ "\u0441\u0442\u043e\u0440\u0438\u0447\u043d\u0435"
379
+ ]
380
+ },
381
+ "DET+NOUN": {
382
+ "\u0439\u043e\u0433\u043e\u0437\u043e\u0431\u043e\u0432\u02bc\u044f\u0437\u0430\u043d\u044c": [
383
+ "\u0439\u043e\u0433\u043e",
384
+ "\u0437\u043e\u0431\u043e\u0432\u02bc\u044f\u0437\u0430\u043d\u044c"
385
+ ],
386
+ "\u0446\u0438\u0445\u0437\u043c\u0430\u0433\u0430\u043d\u043d\u00a4\u0445": [
387
+ "\u0446\u0438\u0445",
388
+ "\u0437\u043c\u0430\u0433\u0430\u043d\u043d\u00a4\u0445"
389
+ ],
390
+ "\u0457\u0445\u043d\u044c\u043e\u0457\u0441\u0456\u043c\u02bc\u0457": [
391
+ "\u0457\u0445\u043d\u044c\u043e\u0457",
392
+ "\u0441\u0456\u043c\u02bc\u0457"
393
+ ]
394
+ },
395
+ "NOUN+ADJ": {
396
+ "\u0430\u0432\u0442\u043e\u0440\u043a\u0430\u0437\u0430\u0441\u043b\u0443\u0433\u043e\u0432\u0443": [
397
+ "\u0430\u0432\u0442\u043e\u0440\u043a\u0430",
398
+ "\u0437\u0430\u0441\u043b\u0443\u0433\u043e\u0432\u0443"
399
+ ],
400
+ "\u0431\u0443\u043d\u0442\u0443\u0443\u0432\u02bc\u044f\u0437\u043d\u0435\u043d\u0438\u0445": [
401
+ "\u0431\u0443\u043d\u0442\u0443",
402
+ "\u0443\u0432\u02bc\u044f\u0437\u043d\u0435\u043d\u0438\u0445"
403
+ ],
404
+ "\u043f\u043e\u0445\u0438\u0431\u043a\u0430\u043f\u043e\u0432\u02bc\u044f\u0437\u0430\u043d\u0430": [
405
+ "\u043f\u043e\u0445\u0438\u0431\u043a\u0430",
406
+ "\u043f\u043e\u0432\u02bc\u044f\u0437\u0430\u043d\u0430"
407
+ ]
408
+ },
409
+ "NOUN+ADP": {
410
+ "\u044f\u043b\u0438\u043d\u043a\u0430\u043d\u0430": [
411
+ "\u044f\u043b\u0438\u043d\u043a\u0430",
412
+ "\u043d\u0430"
413
+ ]
414
+ },
415
+ "NOUN+ADV+VERB": {
416
+ "\u0447\u0430\u0441\u0442\u0438\u043d\u0430\u0437\u0430\u043d\u0435\u043f\u043e\u043a\u043e\u0456\u043b\u0430\u0441\u044f": [
417
+ "\u0447\u0430\u0441\u0442\u0438\u043d\u0430",
418
+ "\u0437\u0430\u043d\u0435\u043f\u043e\u043a\u043e\u0456",
419
+ "\u043b\u0430\u0441\u044f"
420
+ ]
421
+ },
422
+ "NOUN+NOUN": {
423
+ "\u043f\u0440\u0435\u0437\u0438\u0434\u0435\u043d\u0442": [
424
+ "\u043f",
425
+ "\u0440\u0435\u0437\u0438\u0434\u0435\u043d\u0442"
426
+ ],
427
+ "\u0440\u043e\u043a\u0456\u0432\u0443\u0432\u02bc\u044f\u0437\u043d\u0435\u043d\u043d\u044f": [
428
+ "\u0440\u043e\u043a\u0456\u0432",
429
+ "\u0443\u0432\u02bc\u044f\u0437\u043d\u0435\u043d\u043d\u044f"
430
+ ],
431
+ "\u0442\u0435\u0430\u0442\u0440\u0443": [
432
+ "\u0442\u0435\u0430",
433
+ "\u0442\u0440\u0443"
434
+ ]
435
+ },
436
+ "NOUN+PROPN": {
437
+ "\u043f\u0440\u0438\u0437\u043d\u0430\u0447\u0435\u043d\u043d\u044f\u0412\u02bc\u044f\u0447\u0435\u0441\u043b\u0430\u0432\u0430": [
438
+ "\u043f\u0440\u0438\u0437\u043d\u0430\u0447\u0435\u043d\u043d\u044f",
439
+ "\u0412\u02bc\u044f\u0447\u0435\u0441\u043b\u0430\u0432\u0430"
440
+ ]
441
+ },
442
+ "NOUN+PUNCT": {
443
+ "\u0431\u0440\u0430\u0442\u0441\u0442\u0432\u0430\u02ba\u0421\u0442\u0430\u0432\u0440\u043e\u0441\u02ba": [
444
+ "\u0431\u0440\u0430\u0442\u0441\u0442\u0432\u0430",
445
+ "\u02ba\u0421\u0442\u0430\u0432\u0440\u043e\u0441\u02ba"
446
+ ]
447
+ },
448
  "NUM+NOUN": {
449
  "\u043f\u0456\u0432'\u044f\u0440\u0434\u0430": [
450
  "\u043f\u0456\u0432",
 
467
  "\u044f\u0449\u0438\u043a\u0430"
468
  ]
469
  },
470
+ "NUM+X": {
471
+ "6HP": [
472
+ "6",
473
+ "HP"
474
+ ]
475
+ },
476
+ "PROPN+NOUN": {
477
+ "\u041c\u043e\u0440\u043e\u0437\u043a\u043e\u043f\u0440\u043e\u0431\u0443": [
478
+ "\u041c\u043e\u0440\u043e\u0437\u043a\u043e",
479
+ "\u043f\u0440\u043e\u0431\u0443"
480
+ ]
481
+ },
482
+ "SCONJ+ADJ": {
483
+ "\u0449\u043e\u0440\u043e\u0437\u0441\u0435\u043b\u0435\u043d": [
484
+ "\u0449\u043e",
485
+ "\u0440\u043e\u0437\u0441\u0435\u043b\u0435\u043d"
486
+ ]
487
+ },
488
+ "SCONJ+NOUN": {
489
+ "\u0449\u043e\u0441\u044c\u043e\u0433\u043e\u0434\u043d": [
490
+ "\u0449\u043e",
491
+ "\u0441\u044c\u043e\u0433\u043e\u0434\u043d"
492
+ ]
493
+ },
494
+ "VERB+ADP": {
495
+ "\u043f\u043b\u0435\u043a\u0430\u044e\u043d\u0430\u0434": [
496
+ "\u043f\u043b\u0435\u043a\u0430\u044e",
497
+ "\u043d\u0430\u0434"
498
+ ]
499
+ },
500
  "VERB+ADV": {
501
  "\u043d\u0456\u0434\u0435": [
502
  "\u043d\u0435\u043c\u0430\u0454",
 
517
  "\u043d\u0456\u044f\u043a": [
518
  "\u043d\u0435\u043c\u0430\u0454",
519
  "\u044f\u043a"
520
+ ],
521
+ "\u0441\u043f\u0456\u043b\u043a\u0443\u0432\u0430\u0442\u0438\u0441\u044f\u043e\u0431\u043e\u0432\u02bc\u044f\u0437\u043a\u043e\u0432\u043e": [
522
+ "\u0441\u043f\u0456\u043b\u043a\u0443\u0432\u0430\u0442\u0438\u0441\u044f",
523
+ "\u043e\u0431\u043e\u0432\u02bc\u044f\u0437\u043a\u043e\u0432\u043e"
524
+ ]
525
+ },
526
+ "VERB+NOUN": {
527
+ "\u0432\u0438\u043a\u043e\u043d\u0443\u0432\u0430\u0432\u043e\u0431\u043e\u0432\u02bc\u044f\u0437\u043a\u0438": [
528
+ "\u0432\u0438\u043a\u043e\u043d\u0443\u0432\u0430\u0432",
529
+ "\u043e\u0431\u043e\u0432\u02bc\u044f\u0437\u043a\u0438"
530
+ ],
531
+ "\u0432\u0440\u0443\u0447\u0438\u0432\u043e\u0433\u043b\u044f\u0434\u043e\u0432": [
532
+ "\u0432\u0440\u0443\u0447\u0438\u0432",
533
+ "\u043e\u0433\u043b\u044f\u0434\u043e\u0432"
534
+ ],
535
+ "\u043a\u0430\u0436\u0435\u043c\u043e\u0441\u044c\u043e\u0433\u043e\u0434\u043d": [
536
+ "\u043a\u0430\u0436\u0435\u043c\u043e",
537
+ "\u0441\u044c\u043e\u0433\u043e\u0434\u043d"
538
+ ],
539
+ "\u0440\u043e\u0431\u0438\u043c\u043e\u0441\u044c\u043e\u0433\u043e\u0434\u043d": [
540
+ "\u0440\u043e\u0431\u0438\u043c\u043e",
541
+ "\u0441\u044c\u043e\u0433\u043e\u0434\u043d"
542
+ ]
543
+ },
544
+ "VERB+NUM": {
545
+ "\u0432\u0438\u043f\u043e\u0432\u043d\u0438\u043b\u043e\u0441\u044f\u0434\u0435\u0432\u02bc\u044f\u0442\u043d\u0430\u0434\u0446\u044f\u0442\u044c": [
546
+ "\u0432\u0438\u043f\u043e\u0432\u043d\u0438\u043b\u043e\u0441\u044f",
547
+ "\u0434\u0435\u0432\u02bc\u044f\u0442\u043d\u0430\u0434\u0446\u044f\u0442\u044c"
548
  ]
549
  },
550
  "VERB+PRON": {
 
564
  "\u043d\u0435\u043c\u0430\u0454",
565
  "\u0447\u0438\u043c"
566
  ]
567
+ },
568
+ "VERB+PROPN+PRON": {
569
+ "\u043e\u043f\u0438\u043d\u0438\u043b\u0430\u0441\u044f\u0410\u043d\u0430\u0441\u0442\u0430\u0441\u044f": [
570
+ "\u043e\u043f\u0438\u043d\u0438\u043b\u0430\u0441\u044f",
571
+ "\u0410\u043d\u0430\u0441\u0442\u0430\u0441",
572
+ "\u044f"
573
+ ]
574
+ },
575
+ "VERB+VERB": {
576
+ "\u0437\u043c\u043e\u0436\u0435\u043c\u043e\u043e\u0431\u02bc\u0454\u0434\u043d\u0430\u0442\u0438": [
577
+ "\u0437\u043c\u043e\u0436\u0435\u043c\u043e",
578
+ "\u043e\u0431\u02bc\u0454\u0434\u043d\u0430\u0442\u0438"
579
+ ],
580
+ "\u043c\u043e\u0433\u043b\u0430\u043f\u0456\u0434\u02bc\u0457\u0445\u0430\u0442\u0438": [
581
+ "\u043c\u043e\u0433\u043b\u0430",
582
+ "\u043f\u0456\u0434\u02bc\u0457\u0445\u0430\u0442\u0438"
583
+ ]
584
  }
585
  }
586
  },
587
  "tokenizer_class": "BertTokenizerFast",
588
  "torch_dtype": "float32",
589
+ "transformers_version": "4.18.0",
590
  "type_vocab_size": 2,
591
  "use_cache": true,
592
  "vocab_size": 30000
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:735ce76697fea88bd807ed7cf843d11a691c2fb81c9637a5f42f9f6848ceb7ac
3
- size 434241393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59124a679ed7e8a21c1af7e5492523d31f566c8266291d6b681970dd85193f05
3
+ size 434378481
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16e19fd58bfb82b801655327a1b4ded01417a453e643091ea6baef06bf5ad6b4
3
- size 487153701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:743f9e0b64332261598f5989a76afcbab67e5569c4286496f8f775fed2a6a14f
3
+ size 594683109
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff