KoichiYasuoka commited on
Commit
526ead2
1 Parent(s): 0f8c02f

initial release

Browse files
Files changed (8) hide show
  1. README.md +60 -0
  2. config.json +1026 -0
  3. maker.py +54 -0
  4. pytorch_model.bin +3 -0
  5. special_tokens_map.json +7 -0
  6. tokenizer.json +0 -0
  7. tokenizer_config.json +15 -0
  8. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - "lzh"
4
+ tags:
5
+ - "classical chinese"
6
+ - "literary chinese"
7
+ - "ancient chinese"
8
+ - "token-classification"
9
+ - "pos"
10
+ - "dependency-parsing"
11
+ datasets:
12
+ - "universal_dependencies"
13
+ license: "apache-2.0"
14
+ pipeline_tag: "token-classification"
15
+ widget:
16
+ - text: "孟子見梁惠王"
17
+ ---
18
+
19
+ # roberta-classical-chinese-large-ud-goeswith
20
+
21
+ ## Model Description
22
+
23
+ This is a RoBERTa model pre-trained on Classical Chinese texts for POS-tagging and dependency-parsing (using `goeswith` for subwords), derived from [roberta-classical-chinese-large-char](https://huggingface.co/KoichiYasuoka/roberta-classical-chinese-large-char) and [UD_Classical_Chinese-Kyoto](https://github.com/UniversalDependencies/UD_Classical_Chinese-Kyoto).
24
+
25
+ ## How to Use
26
+
27
+ ```py
28
+ class UDgoeswith(object):
29
+ def __init__(self,bert):
30
+ from transformers import AutoTokenizer,AutoModelForTokenClassification
31
+ self.tokenizer=AutoTokenizer.from_pretrained(bert)
32
+ self.model=AutoModelForTokenClassification.from_pretrained(bert)
33
+ def __call__(self,text):
34
+ import numpy,torch,ufal.chu_liu_edmonds
35
+ w=self.tokenizer(text,return_offsets_mapping=True)
36
+ v=w["input_ids"]
37
+ n=len(v)-1
38
+ with torch.no_grad():
39
+ d=self.model(input_ids=torch.tensor([v[0:i]+[self.tokenizer.mask_token_id]+v[i+1:]+[v[i]] for i in range(1,n)]))
40
+ e=d.logits.numpy()[:,1:n,:]
41
+ e[:,:,0]=numpy.nan
42
+ m=numpy.full((n,n),numpy.nan)
43
+ m[1:,1:]=numpy.nanmax(e,axis=2).transpose()
44
+ p=numpy.zeros((n,n))
45
+ p[1:,1:]=numpy.nanargmax(e,axis=2).transpose()
46
+ for i in range(1,n):
47
+ m[i,0],m[i,i],p[i,0]=m[i,i],numpy.nan,p[i,i]
48
+ h=ufal.chu_liu_edmonds.chu_liu_edmonds(m)[0]
49
+ u="# text = "+text+"\n"
50
+ v=[(s,e) for s,e in w["offset_mapping"] if s<e]
51
+ for i,(s,e) in enumerate(v,1):
52
+ q=self.model.config.id2label[p[i,h[i]]].split("|")
53
+ u+="\t".join([str(i),text[s:e],"_",q[0],"_","|".join(q[1:-1]),str(h[i]),q[-1],"_","_" if i<len(v) and e<v[i][0] else "SpaceAfter=No"])+"\n"
54
+ return u+"\n"
55
+
56
+ nlp=UDgoeswith("KoichiYasuoka/roberta-classical-chinese-large-ud-goeswith")
57
+ print(nlp("孟子見梁惠王"))
58
+ ```
59
+
60
+ [ufal.chu-liu-edmonds](https://pypi.org/project/ufal.chu-liu-edmonds/) is required.
config.json ADDED
@@ -0,0 +1,1026 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "-|_|dep",
15
+ "1": "ADP|Degree=Equ|cc",
16
+ "2": "ADP|_|amod",
17
+ "3": "ADP|_|case",
18
+ "4": "ADP|_|cc",
19
+ "5": "ADP|_|fixed",
20
+ "6": "ADP|_|mark",
21
+ "7": "ADP|_|nsubj",
22
+ "8": "ADP|_|obj",
23
+ "9": "ADP|_|root",
24
+ "10": "ADV|AdvType=Cau|advmod",
25
+ "11": "ADV|AdvType=Cau|amod",
26
+ "12": "ADV|AdvType=Cau|obj",
27
+ "13": "ADV|AdvType=Deg|Degree=Cmp|advmod",
28
+ "14": "ADV|AdvType=Deg|Degree=Cmp|amod",
29
+ "15": "ADV|AdvType=Deg|Degree=Cmp|conj",
30
+ "16": "ADV|AdvType=Deg|Degree=Pos|advmod",
31
+ "17": "ADV|AdvType=Deg|Degree=Pos|amod",
32
+ "18": "ADV|AdvType=Deg|Degree=Pos|ccomp",
33
+ "19": "ADV|AdvType=Deg|Degree=Pos|conj",
34
+ "20": "ADV|AdvType=Deg|Degree=Pos|flat:vv",
35
+ "21": "ADV|AdvType=Deg|Degree=Pos|parataxis",
36
+ "22": "ADV|AdvType=Deg|Degree=Pos|root",
37
+ "23": "ADV|AdvType=Deg|Degree=Sup|advmod",
38
+ "24": "ADV|AdvType=Deg|Degree=Sup|amod",
39
+ "25": "ADV|AdvType=Deg|Degree=Sup|conj",
40
+ "26": "ADV|AdvType=Deg|Degree=Sup|nsubj",
41
+ "27": "ADV|AdvType=Deg|Degree=Sup|root",
42
+ "28": "ADV|AdvType=Tim|Aspect=Perf|advmod",
43
+ "29": "ADV|AdvType=Tim|Aspect=Perf|amod",
44
+ "30": "ADV|AdvType=Tim|Aspect=Perf|parataxis",
45
+ "31": "ADV|AdvType=Tim|Tense=Fut|advmod",
46
+ "32": "ADV|AdvType=Tim|Tense=Fut|amod",
47
+ "33": "ADV|AdvType=Tim|Tense=Fut|nsubj",
48
+ "34": "ADV|AdvType=Tim|Tense=Fut|nsubj:outer",
49
+ "35": "ADV|AdvType=Tim|Tense=Fut|root",
50
+ "36": "ADV|AdvType=Tim|Tense=Past|advmod",
51
+ "37": "ADV|AdvType=Tim|Tense=Past|amod",
52
+ "38": "ADV|AdvType=Tim|Tense=Pres|advmod",
53
+ "39": "ADV|AdvType=Tim|Tense=Pres|amod",
54
+ "40": "ADV|AdvType=Tim|Tense=Pres|root",
55
+ "41": "ADV|AdvType=Tim|advcl",
56
+ "42": "ADV|AdvType=Tim|advmod",
57
+ "43": "ADV|AdvType=Tim|amod",
58
+ "44": "ADV|AdvType=Tim|ccomp",
59
+ "45": "ADV|AdvType=Tim|compound:redup",
60
+ "46": "ADV|AdvType=Tim|conj",
61
+ "47": "ADV|AdvType=Tim|flat:vv",
62
+ "48": "ADV|AdvType=Tim|nsubj",
63
+ "49": "ADV|AdvType=Tim|root",
64
+ "50": "ADV|Degree=Equ|VerbForm=Conv|advmod",
65
+ "51": "ADV|Degree=Pos|VerbForm=Conv|advmod",
66
+ "52": "ADV|Polarity=Neg|VerbForm=Conv|advmod",
67
+ "53": "ADV|Polarity=Neg|advmod",
68
+ "54": "ADV|Polarity=Neg|amod",
69
+ "55": "ADV|Polarity=Neg|conj",
70
+ "56": "ADV|Polarity=Neg|nsubj",
71
+ "57": "ADV|Polarity=Neg|obj",
72
+ "58": "ADV|Polarity=Neg|parataxis",
73
+ "59": "ADV|Polarity=Neg|root",
74
+ "60": "ADV|VerbForm=Conv|advmod",
75
+ "61": "ADV|_|acl",
76
+ "62": "ADV|_|advcl",
77
+ "63": "ADV|_|advmod",
78
+ "64": "ADV|_|amod",
79
+ "65": "ADV|_|cc",
80
+ "66": "ADV|_|ccomp",
81
+ "67": "ADV|_|conj",
82
+ "68": "ADV|_|flat:vv",
83
+ "69": "ADV|_|nsubj",
84
+ "70": "ADV|_|obj",
85
+ "71": "ADV|_|root",
86
+ "72": "AUX|Mood=Des|aux",
87
+ "73": "AUX|Mood=Des|conj",
88
+ "74": "AUX|Mood=Des|csubj",
89
+ "75": "AUX|Mood=Des|flat:vv",
90
+ "76": "AUX|Mood=Des|parataxis",
91
+ "77": "AUX|Mood=Des|root",
92
+ "78": "AUX|Mood=Nec|acl",
93
+ "79": "AUX|Mood=Nec|amod",
94
+ "80": "AUX|Mood=Nec|aux",
95
+ "81": "AUX|Mood=Nec|root",
96
+ "82": "AUX|Mood=Pot|acl",
97
+ "83": "AUX|Mood=Pot|advcl",
98
+ "84": "AUX|Mood=Pot|amod",
99
+ "85": "AUX|Mood=Pot|aux",
100
+ "86": "AUX|Mood=Pot|ccomp",
101
+ "87": "AUX|Mood=Pot|conj",
102
+ "88": "AUX|Mood=Pot|nsubj",
103
+ "89": "AUX|Mood=Pot|obj",
104
+ "90": "AUX|Mood=Pot|parataxis",
105
+ "91": "AUX|Mood=Pot|root",
106
+ "92": "AUX|VerbType=Cop|cop",
107
+ "93": "AUX|Voice=Pass|aux",
108
+ "94": "AUX|Voice=Pass|conj",
109
+ "95": "AUX|Voice=Pass|root",
110
+ "96": "CCONJ|_|advmod",
111
+ "97": "CCONJ|_|amod",
112
+ "98": "CCONJ|_|cc",
113
+ "99": "CCONJ|_|fixed",
114
+ "100": "CCONJ|_|obj",
115
+ "101": "CCONJ|_|orphan",
116
+ "102": "INTJ|_|advcl",
117
+ "103": "INTJ|_|compound:redup",
118
+ "104": "INTJ|_|conj",
119
+ "105": "INTJ|_|csubj",
120
+ "106": "INTJ|_|discourse",
121
+ "107": "INTJ|_|discourse:sp",
122
+ "108": "INTJ|_|dislocated",
123
+ "109": "INTJ|_|flat:vv",
124
+ "110": "INTJ|_|nsubj",
125
+ "111": "INTJ|_|obj",
126
+ "112": "INTJ|_|root",
127
+ "113": "NOUN|Case=Loc|acl",
128
+ "114": "NOUN|Case=Loc|advcl",
129
+ "115": "NOUN|Case=Loc|amod",
130
+ "116": "NOUN|Case=Loc|ccomp",
131
+ "117": "NOUN|Case=Loc|clf",
132
+ "118": "NOUN|Case=Loc|compound",
133
+ "119": "NOUN|Case=Loc|compound:redup",
134
+ "120": "NOUN|Case=Loc|conj",
135
+ "121": "NOUN|Case=Loc|csubj",
136
+ "122": "NOUN|Case=Loc|dislocated",
137
+ "123": "NOUN|Case=Loc|flat",
138
+ "124": "NOUN|Case=Loc|flat:vv",
139
+ "125": "NOUN|Case=Loc|iobj",
140
+ "126": "NOUN|Case=Loc|list",
141
+ "127": "NOUN|Case=Loc|nmod",
142
+ "128": "NOUN|Case=Loc|nsubj",
143
+ "129": "NOUN|Case=Loc|nsubj:outer",
144
+ "130": "NOUN|Case=Loc|obj",
145
+ "131": "NOUN|Case=Loc|obl",
146
+ "132": "NOUN|Case=Loc|obl:lmod",
147
+ "133": "NOUN|Case=Loc|obl:tmod",
148
+ "134": "NOUN|Case=Loc|parataxis",
149
+ "135": "NOUN|Case=Loc|root",
150
+ "136": "NOUN|Case=Loc|xcomp",
151
+ "137": "NOUN|Case=Tem|acl",
152
+ "138": "NOUN|Case=Tem|advcl",
153
+ "139": "NOUN|Case=Tem|amod",
154
+ "140": "NOUN|Case=Tem|ccomp",
155
+ "141": "NOUN|Case=Tem|clf",
156
+ "142": "NOUN|Case=Tem|compound",
157
+ "143": "NOUN|Case=Tem|compound:redup",
158
+ "144": "NOUN|Case=Tem|conj",
159
+ "145": "NOUN|Case=Tem|csubj",
160
+ "146": "NOUN|Case=Tem|flat",
161
+ "147": "NOUN|Case=Tem|flat:vv",
162
+ "148": "NOUN|Case=Tem|iobj",
163
+ "149": "NOUN|Case=Tem|list",
164
+ "150": "NOUN|Case=Tem|nmod",
165
+ "151": "NOUN|Case=Tem|nsubj",
166
+ "152": "NOUN|Case=Tem|nsubj:outer",
167
+ "153": "NOUN|Case=Tem|obj",
168
+ "154": "NOUN|Case=Tem|obl:tmod",
169
+ "155": "NOUN|Case=Tem|parataxis",
170
+ "156": "NOUN|Case=Tem|root",
171
+ "157": "NOUN|Case=Tem|xcomp",
172
+ "158": "NOUN|NounType=Clf|ccomp",
173
+ "159": "NOUN|NounType=Clf|clf",
174
+ "160": "NOUN|NounType=Clf|conj",
175
+ "161": "NOUN|NounType=Clf|flat",
176
+ "162": "NOUN|NounType=Clf|nmod",
177
+ "163": "NOUN|NounType=Clf|nsubj",
178
+ "164": "NOUN|NounType=Clf|obj",
179
+ "165": "NOUN|NounType=Clf|parataxis",
180
+ "166": "NOUN|NounType=Clf|root",
181
+ "167": "NOUN|_|acl",
182
+ "168": "NOUN|_|advcl",
183
+ "169": "NOUN|_|amod",
184
+ "170": "NOUN|_|ccomp",
185
+ "171": "NOUN|_|clf",
186
+ "172": "NOUN|_|compound",
187
+ "173": "NOUN|_|compound:redup",
188
+ "174": "NOUN|_|conj",
189
+ "175": "NOUN|_|csubj",
190
+ "176": "NOUN|_|csubj:outer",
191
+ "177": "NOUN|_|dislocated",
192
+ "178": "NOUN|_|flat",
193
+ "179": "NOUN|_|flat:foreign",
194
+ "180": "NOUN|_|flat:vv",
195
+ "181": "NOUN|_|iobj",
196
+ "182": "NOUN|_|list",
197
+ "183": "NOUN|_|nmod",
198
+ "184": "NOUN|_|nsubj",
199
+ "185": "NOUN|_|nsubj:outer",
200
+ "186": "NOUN|_|nsubj:pass",
201
+ "187": "NOUN|_|obj",
202
+ "188": "NOUN|_|obl",
203
+ "189": "NOUN|_|obl:lmod",
204
+ "190": "NOUN|_|obl:tmod",
205
+ "191": "NOUN|_|parataxis",
206
+ "192": "NOUN|_|root",
207
+ "193": "NOUN|_|vocative",
208
+ "194": "NOUN|_|xcomp",
209
+ "195": "NUM|NumType=Ord|conj",
210
+ "196": "NUM|NumType=Ord|flat",
211
+ "197": "NUM|NumType=Ord|nsubj",
212
+ "198": "NUM|NumType=Ord|nummod",
213
+ "199": "NUM|NumType=Ord|obj",
214
+ "200": "NUM|NumType=Ord|obl",
215
+ "201": "NUM|NumType=Ord|obl:lmod",
216
+ "202": "NUM|NumType=Ord|obl:tmod",
217
+ "203": "NUM|NumType=Ord|root",
218
+ "204": "NUM|_|acl",
219
+ "205": "NUM|_|advcl",
220
+ "206": "NUM|_|ccomp",
221
+ "207": "NUM|_|clf",
222
+ "208": "NUM|_|compound",
223
+ "209": "NUM|_|conj",
224
+ "210": "NUM|_|csubj",
225
+ "211": "NUM|_|dislocated",
226
+ "212": "NUM|_|flat",
227
+ "213": "NUM|_|iobj",
228
+ "214": "NUM|_|list",
229
+ "215": "NUM|_|nsubj",
230
+ "216": "NUM|_|nsubj:outer",
231
+ "217": "NUM|_|nummod",
232
+ "218": "NUM|_|obj",
233
+ "219": "NUM|_|obl",
234
+ "220": "NUM|_|obl:lmod",
235
+ "221": "NUM|_|obl:tmod",
236
+ "222": "NUM|_|parataxis",
237
+ "223": "NUM|_|root",
238
+ "224": "NUM|_|xcomp",
239
+ "225": "PART|_|acl",
240
+ "226": "PART|_|advcl",
241
+ "227": "PART|_|advmod",
242
+ "228": "PART|_|case",
243
+ "229": "PART|_|cc",
244
+ "230": "PART|_|ccomp",
245
+ "231": "PART|_|clf",
246
+ "232": "PART|_|conj",
247
+ "233": "PART|_|csubj:outer",
248
+ "234": "PART|_|discourse",
249
+ "235": "PART|_|discourse:sp",
250
+ "236": "PART|_|dislocated",
251
+ "237": "PART|_|fixed",
252
+ "238": "PART|_|flat",
253
+ "239": "PART|_|iobj",
254
+ "240": "PART|_|list",
255
+ "241": "PART|_|mark",
256
+ "242": "PART|_|nmod",
257
+ "243": "PART|_|nsubj",
258
+ "244": "PART|_|nsubj:outer",
259
+ "245": "PART|_|nsubj:pass",
260
+ "246": "PART|_|obj",
261
+ "247": "PART|_|obl",
262
+ "248": "PART|_|obl:lmod",
263
+ "249": "PART|_|parataxis",
264
+ "250": "PART|_|root",
265
+ "251": "PRON|Person=1|PronType=Prs|acl",
266
+ "252": "PRON|Person=1|PronType=Prs|advcl",
267
+ "253": "PRON|Person=1|PronType=Prs|ccomp",
268
+ "254": "PRON|Person=1|PronType=Prs|conj",
269
+ "255": "PRON|Person=1|PronType=Prs|det",
270
+ "256": "PRON|Person=1|PronType=Prs|iobj",
271
+ "257": "PRON|Person=1|PronType=Prs|nsubj",
272
+ "258": "PRON|Person=1|PronType=Prs|nsubj:outer",
273
+ "259": "PRON|Person=1|PronType=Prs|obj",
274
+ "260": "PRON|Person=1|PronType=Prs|obl",
275
+ "261": "PRON|Person=1|PronType=Prs|obl:lmod",
276
+ "262": "PRON|Person=1|PronType=Prs|root",
277
+ "263": "PRON|Person=1|PronType=Prs|vocative",
278
+ "264": "PRON|Person=2|PronType=Prs|conj",
279
+ "265": "PRON|Person=2|PronType=Prs|det",
280
+ "266": "PRON|Person=2|PronType=Prs|iobj",
281
+ "267": "PRON|Person=2|PronType=Prs|nsubj",
282
+ "268": "PRON|Person=2|PronType=Prs|obj",
283
+ "269": "PRON|Person=2|PronType=Prs|obl",
284
+ "270": "PRON|Person=2|PronType=Prs|root",
285
+ "271": "PRON|Person=2|PronType=Prs|vocative",
286
+ "272": "PRON|Person=3|PronType=Prs|advcl",
287
+ "273": "PRON|Person=3|PronType=Prs|amod",
288
+ "274": "PRON|Person=3|PronType=Prs|conj",
289
+ "275": "PRON|Person=3|PronType=Prs|det",
290
+ "276": "PRON|Person=3|PronType=Prs|dislocated",
291
+ "277": "PRON|Person=3|PronType=Prs|expl",
292
+ "278": "PRON|Person=3|PronType=Prs|iobj",
293
+ "279": "PRON|Person=3|PronType=Prs|nsubj",
294
+ "280": "PRON|Person=3|PronType=Prs|nsubj:pass",
295
+ "281": "PRON|Person=3|PronType=Prs|obj",
296
+ "282": "PRON|Person=3|PronType=Prs|obl",
297
+ "283": "PRON|Person=3|PronType=Prs|root",
298
+ "284": "PRON|PronType=Dem|acl",
299
+ "285": "PRON|PronType=Dem|advcl",
300
+ "286": "PRON|PronType=Dem|amod",
301
+ "287": "PRON|PronType=Dem|compound",
302
+ "288": "PRON|PronType=Dem|conj",
303
+ "289": "PRON|PronType=Dem|det",
304
+ "290": "PRON|PronType=Dem|dislocated",
305
+ "291": "PRON|PronType=Dem|expl",
306
+ "292": "PRON|PronType=Dem|flat",
307
+ "293": "PRON|PronType=Dem|iobj",
308
+ "294": "PRON|PronType=Dem|nsubj",
309
+ "295": "PRON|PronType=Dem|nsubj:outer",
310
+ "296": "PRON|PronType=Dem|obj",
311
+ "297": "PRON|PronType=Dem|obl",
312
+ "298": "PRON|PronType=Dem|obl:lmod",
313
+ "299": "PRON|PronType=Dem|root",
314
+ "300": "PRON|PronType=Int|amod",
315
+ "301": "PRON|PronType=Int|ccomp",
316
+ "302": "PRON|PronType=Int|conj",
317
+ "303": "PRON|PronType=Int|det",
318
+ "304": "PRON|PronType=Int|dislocated",
319
+ "305": "PRON|PronType=Int|nsubj",
320
+ "306": "PRON|PronType=Int|nsubj:outer",
321
+ "307": "PRON|PronType=Int|obj",
322
+ "308": "PRON|PronType=Int|obl",
323
+ "309": "PRON|PronType=Int|parataxis",
324
+ "310": "PRON|PronType=Int|root",
325
+ "311": "PRON|PronType=Int|vocative",
326
+ "312": "PRON|PronType=Int|xcomp",
327
+ "313": "PRON|PronType=Prs|Reflex=Yes|det",
328
+ "314": "PRON|PronType=Prs|Reflex=Yes|dislocated",
329
+ "315": "PRON|PronType=Prs|Reflex=Yes|nsubj",
330
+ "316": "PRON|PronType=Prs|Reflex=Yes|obj",
331
+ "317": "PRON|PronType=Prs|Reflex=Yes|obl",
332
+ "318": "PRON|PronType=Prs|Reflex=Yes|root",
333
+ "319": "PRON|PronType=Prs|conj",
334
+ "320": "PRON|PronType=Prs|det",
335
+ "321": "PRON|PronType=Prs|iobj",
336
+ "322": "PRON|PronType=Prs|nsubj",
337
+ "323": "PRON|PronType=Prs|nsubj:outer",
338
+ "324": "PRON|PronType=Prs|obj",
339
+ "325": "PROPN|Case=Loc|NameType=Geo|acl",
340
+ "326": "PROPN|Case=Loc|NameType=Geo|advcl",
341
+ "327": "PROPN|Case=Loc|NameType=Geo|amod",
342
+ "328": "PROPN|Case=Loc|NameType=Geo|compound",
343
+ "329": "PROPN|Case=Loc|NameType=Geo|conj",
344
+ "330": "PROPN|Case=Loc|NameType=Geo|csubj",
345
+ "331": "PROPN|Case=Loc|NameType=Geo|dislocated",
346
+ "332": "PROPN|Case=Loc|NameType=Geo|flat",
347
+ "333": "PROPN|Case=Loc|NameType=Geo|iobj",
348
+ "334": "PROPN|Case=Loc|NameType=Geo|nmod",
349
+ "335": "PROPN|Case=Loc|NameType=Geo|nsubj",
350
+ "336": "PROPN|Case=Loc|NameType=Geo|nsubj:outer",
351
+ "337": "PROPN|Case=Loc|NameType=Geo|obj",
352
+ "338": "PROPN|Case=Loc|NameType=Geo|obl",
353
+ "339": "PROPN|Case=Loc|NameType=Geo|obl:lmod",
354
+ "340": "PROPN|Case=Loc|NameType=Geo|parataxis",
355
+ "341": "PROPN|Case=Loc|NameType=Geo|root",
356
+ "342": "PROPN|Case=Loc|NameType=Geo|xcomp",
357
+ "343": "PROPN|Case=Loc|NameType=Nat|acl",
358
+ "344": "PROPN|Case=Loc|NameType=Nat|advcl",
359
+ "345": "PROPN|Case=Loc|NameType=Nat|amod",
360
+ "346": "PROPN|Case=Loc|NameType=Nat|ccomp",
361
+ "347": "PROPN|Case=Loc|NameType=Nat|compound",
362
+ "348": "PROPN|Case=Loc|NameType=Nat|conj",
363
+ "349": "PROPN|Case=Loc|NameType=Nat|flat",
364
+ "350": "PROPN|Case=Loc|NameType=Nat|iobj",
365
+ "351": "PROPN|Case=Loc|NameType=Nat|nmod",
366
+ "352": "PROPN|Case=Loc|NameType=Nat|nsubj",
367
+ "353": "PROPN|Case=Loc|NameType=Nat|nsubj:outer",
368
+ "354": "PROPN|Case=Loc|NameType=Nat|obj",
369
+ "355": "PROPN|Case=Loc|NameType=Nat|obl",
370
+ "356": "PROPN|Case=Loc|NameType=Nat|obl:lmod",
371
+ "357": "PROPN|Case=Loc|NameType=Nat|parataxis",
372
+ "358": "PROPN|Case=Loc|NameType=Nat|root",
373
+ "359": "PROPN|NameType=Giv|acl",
374
+ "360": "PROPN|NameType=Giv|advcl",
375
+ "361": "PROPN|NameType=Giv|amod",
376
+ "362": "PROPN|NameType=Giv|ccomp",
377
+ "363": "PROPN|NameType=Giv|compound",
378
+ "364": "PROPN|NameType=Giv|conj",
379
+ "365": "PROPN|NameType=Giv|dislocated",
380
+ "366": "PROPN|NameType=Giv|flat",
381
+ "367": "PROPN|NameType=Giv|flat:vv",
382
+ "368": "PROPN|NameType=Giv|iobj",
383
+ "369": "PROPN|NameType=Giv|list",
384
+ "370": "PROPN|NameType=Giv|nmod",
385
+ "371": "PROPN|NameType=Giv|nsubj",
386
+ "372": "PROPN|NameType=Giv|nsubj:outer",
387
+ "373": "PROPN|NameType=Giv|nsubj:pass",
388
+ "374": "PROPN|NameType=Giv|obj",
389
+ "375": "PROPN|NameType=Giv|obl",
390
+ "376": "PROPN|NameType=Giv|obl:lmod",
391
+ "377": "PROPN|NameType=Giv|parataxis",
392
+ "378": "PROPN|NameType=Giv|root",
393
+ "379": "PROPN|NameType=Giv|vocative",
394
+ "380": "PROPN|NameType=Prs|acl",
395
+ "381": "PROPN|NameType=Prs|advcl",
396
+ "382": "PROPN|NameType=Prs|amod",
397
+ "383": "PROPN|NameType=Prs|compound",
398
+ "384": "PROPN|NameType=Prs|conj",
399
+ "385": "PROPN|NameType=Prs|dislocated",
400
+ "386": "PROPN|NameType=Prs|flat",
401
+ "387": "PROPN|NameType=Prs|iobj",
402
+ "388": "PROPN|NameType=Prs|nmod",
403
+ "389": "PROPN|NameType=Prs|nsubj",
404
+ "390": "PROPN|NameType=Prs|nsubj:outer",
405
+ "391": "PROPN|NameType=Prs|obj",
406
+ "392": "PROPN|NameType=Prs|obl",
407
+ "393": "PROPN|NameType=Prs|parataxis",
408
+ "394": "PROPN|NameType=Prs|root",
409
+ "395": "PROPN|NameType=Sur|acl",
410
+ "396": "PROPN|NameType=Sur|advcl",
411
+ "397": "PROPN|NameType=Sur|amod",
412
+ "398": "PROPN|NameType=Sur|compound",
413
+ "399": "PROPN|NameType=Sur|conj",
414
+ "400": "PROPN|NameType=Sur|csubj",
415
+ "401": "PROPN|NameType=Sur|dislocated",
416
+ "402": "PROPN|NameType=Sur|flat",
417
+ "403": "PROPN|NameType=Sur|flat:vv",
418
+ "404": "PROPN|NameType=Sur|iobj",
419
+ "405": "PROPN|NameType=Sur|list",
420
+ "406": "PROPN|NameType=Sur|nmod",
421
+ "407": "PROPN|NameType=Sur|nsubj",
422
+ "408": "PROPN|NameType=Sur|nsubj:outer",
423
+ "409": "PROPN|NameType=Sur|nsubj:pass",
424
+ "410": "PROPN|NameType=Sur|obj",
425
+ "411": "PROPN|NameType=Sur|obl",
426
+ "412": "PROPN|NameType=Sur|obl:lmod",
427
+ "413": "PROPN|NameType=Sur|parataxis",
428
+ "414": "PROPN|NameType=Sur|root",
429
+ "415": "PROPN|NameType=Sur|vocative",
430
+ "416": "SCONJ|_|case",
431
+ "417": "SCONJ|_|cc",
432
+ "418": "SCONJ|_|iobj",
433
+ "419": "SCONJ|_|mark",
434
+ "420": "SCONJ|_|nsubj",
435
+ "421": "SCONJ|_|obj",
436
+ "422": "SCONJ|_|root",
437
+ "423": "SYM|_|conj",
438
+ "424": "SYM|_|nmod",
439
+ "425": "SYM|_|nsubj",
440
+ "426": "SYM|_|root",
441
+ "427": "SYM|_|xcomp",
442
+ "428": "VERB|Degree=Equ|VerbForm=Part|amod",
443
+ "429": "VERB|Degree=Equ|acl",
444
+ "430": "VERB|Degree=Equ|advcl",
445
+ "431": "VERB|Degree=Equ|ccomp",
446
+ "432": "VERB|Degree=Equ|compound:redup",
447
+ "433": "VERB|Degree=Equ|conj",
448
+ "434": "VERB|Degree=Equ|csubj",
449
+ "435": "VERB|Degree=Equ|nsubj",
450
+ "436": "VERB|Degree=Equ|obj",
451
+ "437": "VERB|Degree=Equ|parataxis",
452
+ "438": "VERB|Degree=Equ|root",
453
+ "439": "VERB|Degree=Equ|xcomp",
454
+ "440": "VERB|Degree=Pos|VerbForm=Part|amod",
455
+ "441": "VERB|Degree=Pos|acl",
456
+ "442": "VERB|Degree=Pos|advcl",
457
+ "443": "VERB|Degree=Pos|ccomp",
458
+ "444": "VERB|Degree=Pos|compound",
459
+ "445": "VERB|Degree=Pos|compound:redup",
460
+ "446": "VERB|Degree=Pos|conj",
461
+ "447": "VERB|Degree=Pos|csubj",
462
+ "448": "VERB|Degree=Pos|csubj:outer",
463
+ "449": "VERB|Degree=Pos|dislocated",
464
+ "450": "VERB|Degree=Pos|fixed",
465
+ "451": "VERB|Degree=Pos|flat:vv",
466
+ "452": "VERB|Degree=Pos|iobj",
467
+ "453": "VERB|Degree=Pos|nsubj",
468
+ "454": "VERB|Degree=Pos|nsubj:outer",
469
+ "455": "VERB|Degree=Pos|obj",
470
+ "456": "VERB|Degree=Pos|obl",
471
+ "457": "VERB|Degree=Pos|parataxis",
472
+ "458": "VERB|Degree=Pos|root",
473
+ "459": "VERB|Degree=Pos|xcomp",
474
+ "460": "VERB|Polarity=Neg|VerbForm=Part|amod",
475
+ "461": "VERB|Polarity=Neg|acl",
476
+ "462": "VERB|Polarity=Neg|advcl",
477
+ "463": "VERB|Polarity=Neg|ccomp",
478
+ "464": "VERB|Polarity=Neg|conj",
479
+ "465": "VERB|Polarity=Neg|csubj",
480
+ "466": "VERB|Polarity=Neg|flat:vv",
481
+ "467": "VERB|Polarity=Neg|nsubj",
482
+ "468": "VERB|Polarity=Neg|obj",
483
+ "469": "VERB|Polarity=Neg|obl",
484
+ "470": "VERB|Polarity=Neg|parataxis",
485
+ "471": "VERB|Polarity=Neg|root",
486
+ "472": "VERB|Polarity=Neg|xcomp",
487
+ "473": "VERB|VerbForm=Part|amod",
488
+ "474": "VERB|_|acl",
489
+ "475": "VERB|_|advcl",
490
+ "476": "VERB|_|ccomp",
491
+ "477": "VERB|_|compound:redup",
492
+ "478": "VERB|_|conj",
493
+ "479": "VERB|_|csubj",
494
+ "480": "VERB|_|csubj:outer",
495
+ "481": "VERB|_|csubj:pass",
496
+ "482": "VERB|_|dislocated",
497
+ "483": "VERB|_|fixed",
498
+ "484": "VERB|_|flat:vv",
499
+ "485": "VERB|_|iobj",
500
+ "486": "VERB|_|list",
501
+ "487": "VERB|_|nsubj",
502
+ "488": "VERB|_|nsubj:outer",
503
+ "489": "VERB|_|obj",
504
+ "490": "VERB|_|obl",
505
+ "491": "VERB|_|obl:lmod",
506
+ "492": "VERB|_|parataxis",
507
+ "493": "VERB|_|root",
508
+ "494": "VERB|_|vocative",
509
+ "495": "VERB|_|xcomp",
510
+ "496": "X|_|goeswith"
511
+ },
512
+ "initializer_range": 0.02,
513
+ "intermediate_size": 4096,
514
+ "label2id": {
515
+ "-|_|dep": 0,
516
+ "ADP|Degree=Equ|cc": 1,
517
+ "ADP|_|amod": 2,
518
+ "ADP|_|case": 3,
519
+ "ADP|_|cc": 4,
520
+ "ADP|_|fixed": 5,
521
+ "ADP|_|mark": 6,
522
+ "ADP|_|nsubj": 7,
523
+ "ADP|_|obj": 8,
524
+ "ADP|_|root": 9,
525
+ "ADV|AdvType=Cau|advmod": 10,
526
+ "ADV|AdvType=Cau|amod": 11,
527
+ "ADV|AdvType=Cau|obj": 12,
528
+ "ADV|AdvType=Deg|Degree=Cmp|advmod": 13,
529
+ "ADV|AdvType=Deg|Degree=Cmp|amod": 14,
530
+ "ADV|AdvType=Deg|Degree=Cmp|conj": 15,
531
+ "ADV|AdvType=Deg|Degree=Pos|advmod": 16,
532
+ "ADV|AdvType=Deg|Degree=Pos|amod": 17,
533
+ "ADV|AdvType=Deg|Degree=Pos|ccomp": 18,
534
+ "ADV|AdvType=Deg|Degree=Pos|conj": 19,
535
+ "ADV|AdvType=Deg|Degree=Pos|flat:vv": 20,
536
+ "ADV|AdvType=Deg|Degree=Pos|parataxis": 21,
537
+ "ADV|AdvType=Deg|Degree=Pos|root": 22,
538
+ "ADV|AdvType=Deg|Degree=Sup|advmod": 23,
539
+ "ADV|AdvType=Deg|Degree=Sup|amod": 24,
540
+ "ADV|AdvType=Deg|Degree=Sup|conj": 25,
541
+ "ADV|AdvType=Deg|Degree=Sup|nsubj": 26,
542
+ "ADV|AdvType=Deg|Degree=Sup|root": 27,
543
+ "ADV|AdvType=Tim|Aspect=Perf|advmod": 28,
544
+ "ADV|AdvType=Tim|Aspect=Perf|amod": 29,
545
+ "ADV|AdvType=Tim|Aspect=Perf|parataxis": 30,
546
+ "ADV|AdvType=Tim|Tense=Fut|advmod": 31,
547
+ "ADV|AdvType=Tim|Tense=Fut|amod": 32,
548
+ "ADV|AdvType=Tim|Tense=Fut|nsubj": 33,
549
+ "ADV|AdvType=Tim|Tense=Fut|nsubj:outer": 34,
550
+ "ADV|AdvType=Tim|Tense=Fut|root": 35,
551
+ "ADV|AdvType=Tim|Tense=Past|advmod": 36,
552
+ "ADV|AdvType=Tim|Tense=Past|amod": 37,
553
+ "ADV|AdvType=Tim|Tense=Pres|advmod": 38,
554
+ "ADV|AdvType=Tim|Tense=Pres|amod": 39,
555
+ "ADV|AdvType=Tim|Tense=Pres|root": 40,
556
+ "ADV|AdvType=Tim|advcl": 41,
557
+ "ADV|AdvType=Tim|advmod": 42,
558
+ "ADV|AdvType=Tim|amod": 43,
559
+ "ADV|AdvType=Tim|ccomp": 44,
560
+ "ADV|AdvType=Tim|compound:redup": 45,
561
+ "ADV|AdvType=Tim|conj": 46,
562
+ "ADV|AdvType=Tim|flat:vv": 47,
563
+ "ADV|AdvType=Tim|nsubj": 48,
564
+ "ADV|AdvType=Tim|root": 49,
565
+ "ADV|Degree=Equ|VerbForm=Conv|advmod": 50,
566
+ "ADV|Degree=Pos|VerbForm=Conv|advmod": 51,
567
+ "ADV|Polarity=Neg|VerbForm=Conv|advmod": 52,
568
+ "ADV|Polarity=Neg|advmod": 53,
569
+ "ADV|Polarity=Neg|amod": 54,
570
+ "ADV|Polarity=Neg|conj": 55,
571
+ "ADV|Polarity=Neg|nsubj": 56,
572
+ "ADV|Polarity=Neg|obj": 57,
573
+ "ADV|Polarity=Neg|parataxis": 58,
574
+ "ADV|Polarity=Neg|root": 59,
575
+ "ADV|VerbForm=Conv|advmod": 60,
576
+ "ADV|_|acl": 61,
577
+ "ADV|_|advcl": 62,
578
+ "ADV|_|advmod": 63,
579
+ "ADV|_|amod": 64,
580
+ "ADV|_|cc": 65,
581
+ "ADV|_|ccomp": 66,
582
+ "ADV|_|conj": 67,
583
+ "ADV|_|flat:vv": 68,
584
+ "ADV|_|nsubj": 69,
585
+ "ADV|_|obj": 70,
586
+ "ADV|_|root": 71,
587
+ "AUX|Mood=Des|aux": 72,
588
+ "AUX|Mood=Des|conj": 73,
589
+ "AUX|Mood=Des|csubj": 74,
590
+ "AUX|Mood=Des|flat:vv": 75,
591
+ "AUX|Mood=Des|parataxis": 76,
592
+ "AUX|Mood=Des|root": 77,
593
+ "AUX|Mood=Nec|acl": 78,
594
+ "AUX|Mood=Nec|amod": 79,
595
+ "AUX|Mood=Nec|aux": 80,
596
+ "AUX|Mood=Nec|root": 81,
597
+ "AUX|Mood=Pot|acl": 82,
598
+ "AUX|Mood=Pot|advcl": 83,
599
+ "AUX|Mood=Pot|amod": 84,
600
+ "AUX|Mood=Pot|aux": 85,
601
+ "AUX|Mood=Pot|ccomp": 86,
602
+ "AUX|Mood=Pot|conj": 87,
603
+ "AUX|Mood=Pot|nsubj": 88,
604
+ "AUX|Mood=Pot|obj": 89,
605
+ "AUX|Mood=Pot|parataxis": 90,
606
+ "AUX|Mood=Pot|root": 91,
607
+ "AUX|VerbType=Cop|cop": 92,
608
+ "AUX|Voice=Pass|aux": 93,
609
+ "AUX|Voice=Pass|conj": 94,
610
+ "AUX|Voice=Pass|root": 95,
611
+ "CCONJ|_|advmod": 96,
612
+ "CCONJ|_|amod": 97,
613
+ "CCONJ|_|cc": 98,
614
+ "CCONJ|_|fixed": 99,
615
+ "CCONJ|_|obj": 100,
616
+ "CCONJ|_|orphan": 101,
617
+ "INTJ|_|advcl": 102,
618
+ "INTJ|_|compound:redup": 103,
619
+ "INTJ|_|conj": 104,
620
+ "INTJ|_|csubj": 105,
621
+ "INTJ|_|discourse": 106,
622
+ "INTJ|_|discourse:sp": 107,
623
+ "INTJ|_|dislocated": 108,
624
+ "INTJ|_|flat:vv": 109,
625
+ "INTJ|_|nsubj": 110,
626
+ "INTJ|_|obj": 111,
627
+ "INTJ|_|root": 112,
628
+ "NOUN|Case=Loc|acl": 113,
629
+ "NOUN|Case=Loc|advcl": 114,
630
+ "NOUN|Case=Loc|amod": 115,
631
+ "NOUN|Case=Loc|ccomp": 116,
632
+ "NOUN|Case=Loc|clf": 117,
633
+ "NOUN|Case=Loc|compound": 118,
634
+ "NOUN|Case=Loc|compound:redup": 119,
635
+ "NOUN|Case=Loc|conj": 120,
636
+ "NOUN|Case=Loc|csubj": 121,
637
+ "NOUN|Case=Loc|dislocated": 122,
638
+ "NOUN|Case=Loc|flat": 123,
639
+ "NOUN|Case=Loc|flat:vv": 124,
640
+ "NOUN|Case=Loc|iobj": 125,
641
+ "NOUN|Case=Loc|list": 126,
642
+ "NOUN|Case=Loc|nmod": 127,
643
+ "NOUN|Case=Loc|nsubj": 128,
644
+ "NOUN|Case=Loc|nsubj:outer": 129,
645
+ "NOUN|Case=Loc|obj": 130,
646
+ "NOUN|Case=Loc|obl": 131,
647
+ "NOUN|Case=Loc|obl:lmod": 132,
648
+ "NOUN|Case=Loc|obl:tmod": 133,
649
+ "NOUN|Case=Loc|parataxis": 134,
650
+ "NOUN|Case=Loc|root": 135,
651
+ "NOUN|Case=Loc|xcomp": 136,
652
+ "NOUN|Case=Tem|acl": 137,
653
+ "NOUN|Case=Tem|advcl": 138,
654
+ "NOUN|Case=Tem|amod": 139,
655
+ "NOUN|Case=Tem|ccomp": 140,
656
+ "NOUN|Case=Tem|clf": 141,
657
+ "NOUN|Case=Tem|compound": 142,
658
+ "NOUN|Case=Tem|compound:redup": 143,
659
+ "NOUN|Case=Tem|conj": 144,
660
+ "NOUN|Case=Tem|csubj": 145,
661
+ "NOUN|Case=Tem|flat": 146,
662
+ "NOUN|Case=Tem|flat:vv": 147,
663
+ "NOUN|Case=Tem|iobj": 148,
664
+ "NOUN|Case=Tem|list": 149,
665
+ "NOUN|Case=Tem|nmod": 150,
666
+ "NOUN|Case=Tem|nsubj": 151,
667
+ "NOUN|Case=Tem|nsubj:outer": 152,
668
+ "NOUN|Case=Tem|obj": 153,
669
+ "NOUN|Case=Tem|obl:tmod": 154,
670
+ "NOUN|Case=Tem|parataxis": 155,
671
+ "NOUN|Case=Tem|root": 156,
672
+ "NOUN|Case=Tem|xcomp": 157,
673
+ "NOUN|NounType=Clf|ccomp": 158,
674
+ "NOUN|NounType=Clf|clf": 159,
675
+ "NOUN|NounType=Clf|conj": 160,
676
+ "NOUN|NounType=Clf|flat": 161,
677
+ "NOUN|NounType=Clf|nmod": 162,
678
+ "NOUN|NounType=Clf|nsubj": 163,
679
+ "NOUN|NounType=Clf|obj": 164,
680
+ "NOUN|NounType=Clf|parataxis": 165,
681
+ "NOUN|NounType=Clf|root": 166,
682
+ "NOUN|_|acl": 167,
683
+ "NOUN|_|advcl": 168,
684
+ "NOUN|_|amod": 169,
685
+ "NOUN|_|ccomp": 170,
686
+ "NOUN|_|clf": 171,
687
+ "NOUN|_|compound": 172,
688
+ "NOUN|_|compound:redup": 173,
689
+ "NOUN|_|conj": 174,
690
+ "NOUN|_|csubj": 175,
691
+ "NOUN|_|csubj:outer": 176,
692
+ "NOUN|_|dislocated": 177,
693
+ "NOUN|_|flat": 178,
694
+ "NOUN|_|flat:foreign": 179,
695
+ "NOUN|_|flat:vv": 180,
696
+ "NOUN|_|iobj": 181,
697
+ "NOUN|_|list": 182,
698
+ "NOUN|_|nmod": 183,
699
+ "NOUN|_|nsubj": 184,
700
+ "NOUN|_|nsubj:outer": 185,
701
+ "NOUN|_|nsubj:pass": 186,
702
+ "NOUN|_|obj": 187,
703
+ "NOUN|_|obl": 188,
704
+ "NOUN|_|obl:lmod": 189,
705
+ "NOUN|_|obl:tmod": 190,
706
+ "NOUN|_|parataxis": 191,
707
+ "NOUN|_|root": 192,
708
+ "NOUN|_|vocative": 193,
709
+ "NOUN|_|xcomp": 194,
710
+ "NUM|NumType=Ord|conj": 195,
711
+ "NUM|NumType=Ord|flat": 196,
712
+ "NUM|NumType=Ord|nsubj": 197,
713
+ "NUM|NumType=Ord|nummod": 198,
714
+ "NUM|NumType=Ord|obj": 199,
715
+ "NUM|NumType=Ord|obl": 200,
716
+ "NUM|NumType=Ord|obl:lmod": 201,
717
+ "NUM|NumType=Ord|obl:tmod": 202,
718
+ "NUM|NumType=Ord|root": 203,
719
+ "NUM|_|acl": 204,
720
+ "NUM|_|advcl": 205,
721
+ "NUM|_|ccomp": 206,
722
+ "NUM|_|clf": 207,
723
+ "NUM|_|compound": 208,
724
+ "NUM|_|conj": 209,
725
+ "NUM|_|csubj": 210,
726
+ "NUM|_|dislocated": 211,
727
+ "NUM|_|flat": 212,
728
+ "NUM|_|iobj": 213,
729
+ "NUM|_|list": 214,
730
+ "NUM|_|nsubj": 215,
731
+ "NUM|_|nsubj:outer": 216,
732
+ "NUM|_|nummod": 217,
733
+ "NUM|_|obj": 218,
734
+ "NUM|_|obl": 219,
735
+ "NUM|_|obl:lmod": 220,
736
+ "NUM|_|obl:tmod": 221,
737
+ "NUM|_|parataxis": 222,
738
+ "NUM|_|root": 223,
739
+ "NUM|_|xcomp": 224,
740
+ "PART|_|acl": 225,
741
+ "PART|_|advcl": 226,
742
+ "PART|_|advmod": 227,
743
+ "PART|_|case": 228,
744
+ "PART|_|cc": 229,
745
+ "PART|_|ccomp": 230,
746
+ "PART|_|clf": 231,
747
+ "PART|_|conj": 232,
748
+ "PART|_|csubj:outer": 233,
749
+ "PART|_|discourse": 234,
750
+ "PART|_|discourse:sp": 235,
751
+ "PART|_|dislocated": 236,
752
+ "PART|_|fixed": 237,
753
+ "PART|_|flat": 238,
754
+ "PART|_|iobj": 239,
755
+ "PART|_|list": 240,
756
+ "PART|_|mark": 241,
757
+ "PART|_|nmod": 242,
758
+ "PART|_|nsubj": 243,
759
+ "PART|_|nsubj:outer": 244,
760
+ "PART|_|nsubj:pass": 245,
761
+ "PART|_|obj": 246,
762
+ "PART|_|obl": 247,
763
+ "PART|_|obl:lmod": 248,
764
+ "PART|_|parataxis": 249,
765
+ "PART|_|root": 250,
766
+ "PRON|Person=1|PronType=Prs|acl": 251,
767
+ "PRON|Person=1|PronType=Prs|advcl": 252,
768
+ "PRON|Person=1|PronType=Prs|ccomp": 253,
769
+ "PRON|Person=1|PronType=Prs|conj": 254,
770
+ "PRON|Person=1|PronType=Prs|det": 255,
771
+ "PRON|Person=1|PronType=Prs|iobj": 256,
772
+ "PRON|Person=1|PronType=Prs|nsubj": 257,
773
+ "PRON|Person=1|PronType=Prs|nsubj:outer": 258,
774
+ "PRON|Person=1|PronType=Prs|obj": 259,
775
+ "PRON|Person=1|PronType=Prs|obl": 260,
776
+ "PRON|Person=1|PronType=Prs|obl:lmod": 261,
777
+ "PRON|Person=1|PronType=Prs|root": 262,
778
+ "PRON|Person=1|PronType=Prs|vocative": 263,
779
+ "PRON|Person=2|PronType=Prs|conj": 264,
780
+ "PRON|Person=2|PronType=Prs|det": 265,
781
+ "PRON|Person=2|PronType=Prs|iobj": 266,
782
+ "PRON|Person=2|PronType=Prs|nsubj": 267,
783
+ "PRON|Person=2|PronType=Prs|obj": 268,
784
+ "PRON|Person=2|PronType=Prs|obl": 269,
785
+ "PRON|Person=2|PronType=Prs|root": 270,
786
+ "PRON|Person=2|PronType=Prs|vocative": 271,
787
+ "PRON|Person=3|PronType=Prs|advcl": 272,
788
+ "PRON|Person=3|PronType=Prs|amod": 273,
789
+ "PRON|Person=3|PronType=Prs|conj": 274,
790
+ "PRON|Person=3|PronType=Prs|det": 275,
791
+ "PRON|Person=3|PronType=Prs|dislocated": 276,
792
+ "PRON|Person=3|PronType=Prs|expl": 277,
793
+ "PRON|Person=3|PronType=Prs|iobj": 278,
794
+ "PRON|Person=3|PronType=Prs|nsubj": 279,
795
+ "PRON|Person=3|PronType=Prs|nsubj:pass": 280,
796
+ "PRON|Person=3|PronType=Prs|obj": 281,
797
+ "PRON|Person=3|PronType=Prs|obl": 282,
798
+ "PRON|Person=3|PronType=Prs|root": 283,
799
+ "PRON|PronType=Dem|acl": 284,
800
+ "PRON|PronType=Dem|advcl": 285,
801
+ "PRON|PronType=Dem|amod": 286,
802
+ "PRON|PronType=Dem|compound": 287,
803
+ "PRON|PronType=Dem|conj": 288,
804
+ "PRON|PronType=Dem|det": 289,
805
+ "PRON|PronType=Dem|dislocated": 290,
806
+ "PRON|PronType=Dem|expl": 291,
807
+ "PRON|PronType=Dem|flat": 292,
808
+ "PRON|PronType=Dem|iobj": 293,
809
+ "PRON|PronType=Dem|nsubj": 294,
810
+ "PRON|PronType=Dem|nsubj:outer": 295,
811
+ "PRON|PronType=Dem|obj": 296,
812
+ "PRON|PronType=Dem|obl": 297,
813
+ "PRON|PronType=Dem|obl:lmod": 298,
814
+ "PRON|PronType=Dem|root": 299,
815
+ "PRON|PronType=Int|amod": 300,
816
+ "PRON|PronType=Int|ccomp": 301,
817
+ "PRON|PronType=Int|conj": 302,
818
+ "PRON|PronType=Int|det": 303,
819
+ "PRON|PronType=Int|dislocated": 304,
820
+ "PRON|PronType=Int|nsubj": 305,
821
+ "PRON|PronType=Int|nsubj:outer": 306,
822
+ "PRON|PronType=Int|obj": 307,
823
+ "PRON|PronType=Int|obl": 308,
824
+ "PRON|PronType=Int|parataxis": 309,
825
+ "PRON|PronType=Int|root": 310,
826
+ "PRON|PronType=Int|vocative": 311,
827
+ "PRON|PronType=Int|xcomp": 312,
828
+ "PRON|PronType=Prs|Reflex=Yes|det": 313,
829
+ "PRON|PronType=Prs|Reflex=Yes|dislocated": 314,
830
+ "PRON|PronType=Prs|Reflex=Yes|nsubj": 315,
831
+ "PRON|PronType=Prs|Reflex=Yes|obj": 316,
832
+ "PRON|PronType=Prs|Reflex=Yes|obl": 317,
833
+ "PRON|PronType=Prs|Reflex=Yes|root": 318,
834
+ "PRON|PronType=Prs|conj": 319,
835
+ "PRON|PronType=Prs|det": 320,
836
+ "PRON|PronType=Prs|iobj": 321,
837
+ "PRON|PronType=Prs|nsubj": 322,
838
+ "PRON|PronType=Prs|nsubj:outer": 323,
839
+ "PRON|PronType=Prs|obj": 324,
840
+ "PROPN|Case=Loc|NameType=Geo|acl": 325,
841
+ "PROPN|Case=Loc|NameType=Geo|advcl": 326,
842
+ "PROPN|Case=Loc|NameType=Geo|amod": 327,
843
+ "PROPN|Case=Loc|NameType=Geo|compound": 328,
844
+ "PROPN|Case=Loc|NameType=Geo|conj": 329,
845
+ "PROPN|Case=Loc|NameType=Geo|csubj": 330,
846
+ "PROPN|Case=Loc|NameType=Geo|dislocated": 331,
847
+ "PROPN|Case=Loc|NameType=Geo|flat": 332,
848
+ "PROPN|Case=Loc|NameType=Geo|iobj": 333,
849
+ "PROPN|Case=Loc|NameType=Geo|nmod": 334,
850
+ "PROPN|Case=Loc|NameType=Geo|nsubj": 335,
851
+ "PROPN|Case=Loc|NameType=Geo|nsubj:outer": 336,
852
+ "PROPN|Case=Loc|NameType=Geo|obj": 337,
853
+ "PROPN|Case=Loc|NameType=Geo|obl": 338,
854
+ "PROPN|Case=Loc|NameType=Geo|obl:lmod": 339,
855
+ "PROPN|Case=Loc|NameType=Geo|parataxis": 340,
856
+ "PROPN|Case=Loc|NameType=Geo|root": 341,
857
+ "PROPN|Case=Loc|NameType=Geo|xcomp": 342,
858
+ "PROPN|Case=Loc|NameType=Nat|acl": 343,
859
+ "PROPN|Case=Loc|NameType=Nat|advcl": 344,
860
+ "PROPN|Case=Loc|NameType=Nat|amod": 345,
861
+ "PROPN|Case=Loc|NameType=Nat|ccomp": 346,
862
+ "PROPN|Case=Loc|NameType=Nat|compound": 347,
863
+ "PROPN|Case=Loc|NameType=Nat|conj": 348,
864
+ "PROPN|Case=Loc|NameType=Nat|flat": 349,
865
+ "PROPN|Case=Loc|NameType=Nat|iobj": 350,
866
+ "PROPN|Case=Loc|NameType=Nat|nmod": 351,
867
+ "PROPN|Case=Loc|NameType=Nat|nsubj": 352,
868
+ "PROPN|Case=Loc|NameType=Nat|nsubj:outer": 353,
869
+ "PROPN|Case=Loc|NameType=Nat|obj": 354,
870
+ "PROPN|Case=Loc|NameType=Nat|obl": 355,
871
+ "PROPN|Case=Loc|NameType=Nat|obl:lmod": 356,
872
+ "PROPN|Case=Loc|NameType=Nat|parataxis": 357,
873
+ "PROPN|Case=Loc|NameType=Nat|root": 358,
874
+ "PROPN|NameType=Giv|acl": 359,
875
+ "PROPN|NameType=Giv|advcl": 360,
876
+ "PROPN|NameType=Giv|amod": 361,
877
+ "PROPN|NameType=Giv|ccomp": 362,
878
+ "PROPN|NameType=Giv|compound": 363,
879
+ "PROPN|NameType=Giv|conj": 364,
880
+ "PROPN|NameType=Giv|dislocated": 365,
881
+ "PROPN|NameType=Giv|flat": 366,
882
+ "PROPN|NameType=Giv|flat:vv": 367,
883
+ "PROPN|NameType=Giv|iobj": 368,
884
+ "PROPN|NameType=Giv|list": 369,
885
+ "PROPN|NameType=Giv|nmod": 370,
886
+ "PROPN|NameType=Giv|nsubj": 371,
887
+ "PROPN|NameType=Giv|nsubj:outer": 372,
888
+ "PROPN|NameType=Giv|nsubj:pass": 373,
889
+ "PROPN|NameType=Giv|obj": 374,
890
+ "PROPN|NameType=Giv|obl": 375,
891
+ "PROPN|NameType=Giv|obl:lmod": 376,
892
+ "PROPN|NameType=Giv|parataxis": 377,
893
+ "PROPN|NameType=Giv|root": 378,
894
+ "PROPN|NameType=Giv|vocative": 379,
895
+ "PROPN|NameType=Prs|acl": 380,
896
+ "PROPN|NameType=Prs|advcl": 381,
897
+ "PROPN|NameType=Prs|amod": 382,
898
+ "PROPN|NameType=Prs|compound": 383,
899
+ "PROPN|NameType=Prs|conj": 384,
900
+ "PROPN|NameType=Prs|dislocated": 385,
901
+ "PROPN|NameType=Prs|flat": 386,
902
+ "PROPN|NameType=Prs|iobj": 387,
903
+ "PROPN|NameType=Prs|nmod": 388,
904
+ "PROPN|NameType=Prs|nsubj": 389,
905
+ "PROPN|NameType=Prs|nsubj:outer": 390,
906
+ "PROPN|NameType=Prs|obj": 391,
907
+ "PROPN|NameType=Prs|obl": 392,
908
+ "PROPN|NameType=Prs|parataxis": 393,
909
+ "PROPN|NameType=Prs|root": 394,
910
+ "PROPN|NameType=Sur|acl": 395,
911
+ "PROPN|NameType=Sur|advcl": 396,
912
+ "PROPN|NameType=Sur|amod": 397,
913
+ "PROPN|NameType=Sur|compound": 398,
914
+ "PROPN|NameType=Sur|conj": 399,
915
+ "PROPN|NameType=Sur|csubj": 400,
916
+ "PROPN|NameType=Sur|dislocated": 401,
917
+ "PROPN|NameType=Sur|flat": 402,
918
+ "PROPN|NameType=Sur|flat:vv": 403,
919
+ "PROPN|NameType=Sur|iobj": 404,
920
+ "PROPN|NameType=Sur|list": 405,
921
+ "PROPN|NameType=Sur|nmod": 406,
922
+ "PROPN|NameType=Sur|nsubj": 407,
923
+ "PROPN|NameType=Sur|nsubj:outer": 408,
924
+ "PROPN|NameType=Sur|nsubj:pass": 409,
925
+ "PROPN|NameType=Sur|obj": 410,
926
+ "PROPN|NameType=Sur|obl": 411,
927
+ "PROPN|NameType=Sur|obl:lmod": 412,
928
+ "PROPN|NameType=Sur|parataxis": 413,
929
+ "PROPN|NameType=Sur|root": 414,
930
+ "PROPN|NameType=Sur|vocative": 415,
931
+ "SCONJ|_|case": 416,
932
+ "SCONJ|_|cc": 417,
933
+ "SCONJ|_|iobj": 418,
934
+ "SCONJ|_|mark": 419,
935
+ "SCONJ|_|nsubj": 420,
936
+ "SCONJ|_|obj": 421,
937
+ "SCONJ|_|root": 422,
938
+ "SYM|_|conj": 423,
939
+ "SYM|_|nmod": 424,
940
+ "SYM|_|nsubj": 425,
941
+ "SYM|_|root": 426,
942
+ "SYM|_|xcomp": 427,
943
+ "VERB|Degree=Equ|VerbForm=Part|amod": 428,
944
+ "VERB|Degree=Equ|acl": 429,
945
+ "VERB|Degree=Equ|advcl": 430,
946
+ "VERB|Degree=Equ|ccomp": 431,
947
+ "VERB|Degree=Equ|compound:redup": 432,
948
+ "VERB|Degree=Equ|conj": 433,
949
+ "VERB|Degree=Equ|csubj": 434,
950
+ "VERB|Degree=Equ|nsubj": 435,
951
+ "VERB|Degree=Equ|obj": 436,
952
+ "VERB|Degree=Equ|parataxis": 437,
953
+ "VERB|Degree=Equ|root": 438,
954
+ "VERB|Degree=Equ|xcomp": 439,
955
+ "VERB|Degree=Pos|VerbForm=Part|amod": 440,
956
+ "VERB|Degree=Pos|acl": 441,
957
+ "VERB|Degree=Pos|advcl": 442,
958
+ "VERB|Degree=Pos|ccomp": 443,
959
+ "VERB|Degree=Pos|compound": 444,
960
+ "VERB|Degree=Pos|compound:redup": 445,
961
+ "VERB|Degree=Pos|conj": 446,
962
+ "VERB|Degree=Pos|csubj": 447,
963
+ "VERB|Degree=Pos|csubj:outer": 448,
964
+ "VERB|Degree=Pos|dislocated": 449,
965
+ "VERB|Degree=Pos|fixed": 450,
966
+ "VERB|Degree=Pos|flat:vv": 451,
967
+ "VERB|Degree=Pos|iobj": 452,
968
+ "VERB|Degree=Pos|nsubj": 453,
969
+ "VERB|Degree=Pos|nsubj:outer": 454,
970
+ "VERB|Degree=Pos|obj": 455,
971
+ "VERB|Degree=Pos|obl": 456,
972
+ "VERB|Degree=Pos|parataxis": 457,
973
+ "VERB|Degree=Pos|root": 458,
974
+ "VERB|Degree=Pos|xcomp": 459,
975
+ "VERB|Polarity=Neg|VerbForm=Part|amod": 460,
976
+ "VERB|Polarity=Neg|acl": 461,
977
+ "VERB|Polarity=Neg|advcl": 462,
978
+ "VERB|Polarity=Neg|ccomp": 463,
979
+ "VERB|Polarity=Neg|conj": 464,
980
+ "VERB|Polarity=Neg|csubj": 465,
981
+ "VERB|Polarity=Neg|flat:vv": 466,
982
+ "VERB|Polarity=Neg|nsubj": 467,
983
+ "VERB|Polarity=Neg|obj": 468,
984
+ "VERB|Polarity=Neg|obl": 469,
985
+ "VERB|Polarity=Neg|parataxis": 470,
986
+ "VERB|Polarity=Neg|root": 471,
987
+ "VERB|Polarity=Neg|xcomp": 472,
988
+ "VERB|VerbForm=Part|amod": 473,
989
+ "VERB|_|acl": 474,
990
+ "VERB|_|advcl": 475,
991
+ "VERB|_|ccomp": 476,
992
+ "VERB|_|compound:redup": 477,
993
+ "VERB|_|conj": 478,
994
+ "VERB|_|csubj": 479,
995
+ "VERB|_|csubj:outer": 480,
996
+ "VERB|_|csubj:pass": 481,
997
+ "VERB|_|dislocated": 482,
998
+ "VERB|_|fixed": 483,
999
+ "VERB|_|flat:vv": 484,
1000
+ "VERB|_|iobj": 485,
1001
+ "VERB|_|list": 486,
1002
+ "VERB|_|nsubj": 487,
1003
+ "VERB|_|nsubj:outer": 488,
1004
+ "VERB|_|obj": 489,
1005
+ "VERB|_|obl": 490,
1006
+ "VERB|_|obl:lmod": 491,
1007
+ "VERB|_|parataxis": 492,
1008
+ "VERB|_|root": 493,
1009
+ "VERB|_|vocative": 494,
1010
+ "VERB|_|xcomp": 495,
1011
+ "X|_|goeswith": 496
1012
+ },
1013
+ "layer_norm_eps": 1e-05,
1014
+ "max_position_embeddings": 514,
1015
+ "model_type": "roberta",
1016
+ "num_attention_heads": 16,
1017
+ "num_hidden_layers": 24,
1018
+ "pad_token_id": 1,
1019
+ "position_embedding_type": "absolute",
1020
+ "tokenizer_class": "BertTokenizerFast",
1021
+ "torch_dtype": "float32",
1022
+ "transformers_version": "4.22.0",
1023
+ "type_vocab_size": 1,
1024
+ "use_cache": true,
1025
+ "vocab_size": 26318
1026
+ }
maker.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/python3
2
+ src="KoichiYasuoka/roberta-classical-chinese-large-char"
3
+ tgt="KoichiYasuoka/roberta-classical-chinese-large-ud-goeswith"
4
+ url="https://github.com/UniversalDependencies/UD_Classical_Chinese-Kyoto"
5
+ import os
6
+ d=os.path.basename(url)
7
+ os.system("test -d "+d+" || git clone --depth=1 "+url)
8
+ os.system("for F in train dev test ; do cp "+d+"/*-$F.conllu $F.conllu ; done")
9
+ class UDgoeswithDataset(object):
10
+ def __init__(self,conllu,tokenizer):
11
+ self.ids,self.tags,label=[],[],set()
12
+ with open(conllu,"r",encoding="utf-8") as r:
13
+ cls,sep,msk=tokenizer.cls_token_id,tokenizer.sep_token_id,tokenizer.mask_token_id
14
+ dep,c="-|_|dep",[]
15
+ for s in r:
16
+ t=s.split("\t")
17
+ if len(t)==10 and t[0].isdecimal():
18
+ c.append(t)
19
+ elif c!=[]:
20
+ v=tokenizer([t[1] for t in c],add_special_tokens=False)["input_ids"]
21
+ for i in range(len(v)-1,-1,-1):
22
+ for j in range(1,len(v[i])):
23
+ c.insert(i+1,[c[i][0],"_","_","X","_","_",c[i][0],"goeswith","_","_"])
24
+ y=["0"]+[t[0] for t in c]
25
+ h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
26
+ p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
27
+ self.ids.append([cls]+v+[sep])
28
+ self.tags.append([dep]+p+[dep])
29
+ label=set(sum([self.tags[-1],list(label)],[]))
30
+ for i,k in enumerate(v):
31
+ self.ids.append([cls]+v[0:i]+[msk]+v[i+1:]+[sep,k])
32
+ self.tags.append([dep]+[t if h[j]==i+1 else dep for j,t in enumerate(p)]+[dep,dep])
33
+ c=[]
34
+ self.label2id={l:i for i,l in enumerate(sorted(label))}
35
+ def __call__(*args):
36
+ label=set(sum([list(t.label2id) for t in args],[]))
37
+ lid={l:i for i,l in enumerate(sorted(label))}
38
+ for t in args:
39
+ t.label2id=lid
40
+ return lid
41
+ __len__=lambda self:len(self.ids)
42
+ __getitem__=lambda self,i:{"input_ids":self.ids[i],"labels":[self.label2id[t] for t in self.tags[i]]}
43
+ from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
44
+ tkz=AutoTokenizer.from_pretrained(src)
45
+ trainDS=UDgoeswithDataset("train.conllu",tkz)
46
+ devDS=UDgoeswithDataset("dev.conllu",tkz)
47
+ testDS=UDgoeswithDataset("test.conllu",tkz)
48
+ lid=trainDS(devDS,testDS)
49
+ cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()})
50
+ arg=TrainingArguments(num_train_epochs=3,per_device_train_batch_size=32,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1)
51
+ trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg),train_dataset=trainDS,eval_dataset=devDS)
52
+ trn.train()
53
+ trn.save_model(tgt)
54
+ tkz.save_pretrained(tgt)
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51ac4ffd2009eb439a2381314153d3d32d575113b72c638bf675ffe30211e4e1
3
+ size 1321327921
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "never_split": null,
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "special_tokens_map_file": null,
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizerFast",
14
+ "unk_token": "[UNK]"
15
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff