KoichiYasuoka
commited on
Commit
•
0fc396c
1
Parent(s):
566045b
model improved
Browse files- config.json +52 -50
- maker.py +5 -4
- pytorch_model.bin +2 -2
config.json
CHANGED
@@ -129,31 +129,32 @@
|
|
129 |
"112": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc",
|
130 |
"113": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark",
|
131 |
"114": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root",
|
132 |
-
"115": "SCONJ|\u63a5\u7d9a\u8a5e|
|
133 |
-
"116": "SCONJ|\u63a5\u7d9a\u8a5e|
|
134 |
-
"117": "
|
135 |
-
"118": "SYM|_|
|
136 |
-
"119": "
|
137 |
-
"120": "VERB|\u4ed6\u52d5\u8a5e|
|
138 |
-
"121": "VERB|\u4ed6\u52d5\u8a5e|
|
139 |
-
"122": "VERB|\u4ed6\u52d5\u8a5e|
|
140 |
-
"123": "VERB|\u4ed6\u52d5\u8a5e|
|
141 |
-
"124": "VERB|\u4ed6\u52d5\u8a5e|
|
142 |
-
"125": "VERB|\u4ed6\u52d5\u8a5e|
|
143 |
-
"126": "VERB|\
|
144 |
-
"127": "VERB|\u5b8c\u5168\u52d5\u8a5e|
|
145 |
-
"128": "VERB|\u5b8c\u5168\u52d5\u8a5e|
|
146 |
-
"129": "VERB|\u5b8c\u5168\u52d5\u8a5e|
|
147 |
-
"130": "VERB|\
|
148 |
-
"131": "VERB|\u81ea\u52d5\u8a5e|
|
149 |
-
"132": "VERB|\u81ea\u52d5\u8a5e|
|
150 |
-
"133": "VERB|\u81ea\u52d5\u8a5e|
|
151 |
-
"134": "VERB|\u81ea\u52d5\u8a5e|
|
152 |
-
"135": "VERB|\u81ea\u52d5\u8a5e|
|
153 |
-
"136": "VERB|\u81ea\u52d5\u8a5e|
|
154 |
-
"137": "
|
155 |
-
"138": "X|_|
|
156 |
-
"139": "X|_|
|
|
|
157 |
},
|
158 |
"initializer_range": 0.02,
|
159 |
"intermediate_size": 3072,
|
@@ -273,31 +274,32 @@
|
|
273 |
"SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc": 112,
|
274 |
"SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark": 113,
|
275 |
"SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root": 114,
|
276 |
-
"SCONJ|\u63a5\u7d9a\u8a5e|
|
277 |
-
"SCONJ|\u63a5\u7d9a\u8a5e|
|
278 |
-
"
|
279 |
-
"SYM|_|
|
280 |
-
"
|
281 |
-
"VERB|\u4ed6\u52d5\u8a5e|
|
282 |
-
"VERB|\u4ed6\u52d5\u8a5e|
|
283 |
-
"VERB|\u4ed6\u52d5\u8a5e|
|
284 |
-
"VERB|\u4ed6\u52d5\u8a5e|
|
285 |
-
"VERB|\u4ed6\u52d5\u8a5e|
|
286 |
-
"VERB|\u4ed6\u52d5\u8a5e|
|
287 |
-
"VERB|\
|
288 |
-
"VERB|\u5b8c\u5168\u52d5\u8a5e|
|
289 |
-
"VERB|\u5b8c\u5168\u52d5\u8a5e|
|
290 |
-
"VERB|\u5b8c\u5168\u52d5\u8a5e|
|
291 |
-
"VERB|\
|
292 |
-
"VERB|\u81ea\u52d5\u8a5e|
|
293 |
-
"VERB|\u81ea\u52d5\u8a5e|
|
294 |
-
"VERB|\u81ea\u52d5\u8a5e|
|
295 |
-
"VERB|\u81ea\u52d5\u8a5e|
|
296 |
-
"VERB|\u81ea\u52d5\u8a5e|
|
297 |
-
"VERB|\u81ea\u52d5\u8a5e|
|
298 |
-
"
|
299 |
-
"X|_|
|
300 |
-
"X|_|
|
|
|
301 |
},
|
302 |
"layer_norm_eps": 1e-07,
|
303 |
"max_position_embeddings": 512,
|
|
|
129 |
"112": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc",
|
130 |
"113": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark",
|
131 |
"114": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root",
|
132 |
+
"115": "SCONJ|\u63a5\u7d9a\u8a5e|advmod",
|
133 |
+
"116": "SCONJ|\u63a5\u7d9a\u8a5e|case",
|
134 |
+
"117": "SCONJ|\u63a5\u7d9a\u8a5e|mark",
|
135 |
+
"118": "SYM|_|conj",
|
136 |
+
"119": "SYM|_|obj",
|
137 |
+
"120": "VERB|\u4ed6\u52d5\u8a5e|acl",
|
138 |
+
"121": "VERB|\u4ed6\u52d5\u8a5e|advcl",
|
139 |
+
"122": "VERB|\u4ed6\u52d5\u8a5e|amod",
|
140 |
+
"123": "VERB|\u4ed6\u52d5\u8a5e|ccomp",
|
141 |
+
"124": "VERB|\u4ed6\u52d5\u8a5e|conj",
|
142 |
+
"125": "VERB|\u4ed6\u52d5\u8a5e|parataxis",
|
143 |
+
"126": "VERB|\u4ed6\u52d5\u8a5e|root",
|
144 |
+
"127": "VERB|\u5b8c\u5168\u52d5\u8a5e|acl",
|
145 |
+
"128": "VERB|\u5b8c\u5168\u52d5\u8a5e|advcl",
|
146 |
+
"129": "VERB|\u5b8c\u5168\u52d5\u8a5e|parataxis",
|
147 |
+
"130": "VERB|\u5b8c\u5168\u52d5\u8a5e|root",
|
148 |
+
"131": "VERB|\u81ea\u52d5\u8a5e|acl",
|
149 |
+
"132": "VERB|\u81ea\u52d5\u8a5e|advcl",
|
150 |
+
"133": "VERB|\u81ea\u52d5\u8a5e|amod",
|
151 |
+
"134": "VERB|\u81ea\u52d5\u8a5e|ccomp",
|
152 |
+
"135": "VERB|\u81ea\u52d5\u8a5e|conj",
|
153 |
+
"136": "VERB|\u81ea\u52d5\u8a5e|parataxis",
|
154 |
+
"137": "VERB|\u81ea\u52d5\u8a5e|root",
|
155 |
+
"138": "X|_|conj",
|
156 |
+
"139": "X|_|goeswith",
|
157 |
+
"140": "X|_|nsubj"
|
158 |
},
|
159 |
"initializer_range": 0.02,
|
160 |
"intermediate_size": 3072,
|
|
|
274 |
"SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc": 112,
|
275 |
"SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark": 113,
|
276 |
"SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root": 114,
|
277 |
+
"SCONJ|\u63a5\u7d9a\u8a5e|advmod": 115,
|
278 |
+
"SCONJ|\u63a5\u7d9a\u8a5e|case": 116,
|
279 |
+
"SCONJ|\u63a5\u7d9a\u8a5e|mark": 117,
|
280 |
+
"SYM|_|conj": 118,
|
281 |
+
"SYM|_|obj": 119,
|
282 |
+
"VERB|\u4ed6\u52d5\u8a5e|acl": 120,
|
283 |
+
"VERB|\u4ed6\u52d5\u8a5e|advcl": 121,
|
284 |
+
"VERB|\u4ed6\u52d5\u8a5e|amod": 122,
|
285 |
+
"VERB|\u4ed6\u52d5\u8a5e|ccomp": 123,
|
286 |
+
"VERB|\u4ed6\u52d5\u8a5e|conj": 124,
|
287 |
+
"VERB|\u4ed6\u52d5\u8a5e|parataxis": 125,
|
288 |
+
"VERB|\u4ed6\u52d5\u8a5e|root": 126,
|
289 |
+
"VERB|\u5b8c\u5168\u52d5\u8a5e|acl": 127,
|
290 |
+
"VERB|\u5b8c\u5168\u52d5\u8a5e|advcl": 128,
|
291 |
+
"VERB|\u5b8c\u5168\u52d5\u8a5e|parataxis": 129,
|
292 |
+
"VERB|\u5b8c\u5168\u52d5\u8a5e|root": 130,
|
293 |
+
"VERB|\u81ea\u52d5\u8a5e|acl": 131,
|
294 |
+
"VERB|\u81ea\u52d5\u8a5e|advcl": 132,
|
295 |
+
"VERB|\u81ea\u52d5\u8a5e|amod": 133,
|
296 |
+
"VERB|\u81ea\u52d5\u8a5e|ccomp": 134,
|
297 |
+
"VERB|\u81ea\u52d5\u8a5e|conj": 135,
|
298 |
+
"VERB|\u81ea\u52d5\u8a5e|parataxis": 136,
|
299 |
+
"VERB|\u81ea\u52d5\u8a5e|root": 137,
|
300 |
+
"X|_|conj": 138,
|
301 |
+
"X|_|goeswith": 139,
|
302 |
+
"X|_|nsubj": 140
|
303 |
},
|
304 |
"layer_norm_eps": 1e-07,
|
305 |
"max_position_embeddings": 512,
|
maker.py
CHANGED
@@ -15,8 +15,9 @@ class UDgoeswithDataset(object):
|
|
15 |
dep,c="-|_|dep",[]
|
16 |
for s in r:
|
17 |
t=s.split("\t")
|
18 |
-
if len(t)==10
|
19 |
-
|
|
|
20 |
elif c!=[]:
|
21 |
for x in [1,2]:
|
22 |
d=list(c)
|
@@ -42,8 +43,8 @@ from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassificatio
|
|
42 |
tkz=AutoTokenizer.from_pretrained(src)
|
43 |
trainDS=UDgoeswithDataset("train.conllu",tkz)
|
44 |
lid=trainDS.label2id
|
45 |
-
cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
|
46 |
-
arg=TrainingArguments(num_train_epochs=3,per_device_train_batch_size=
|
47 |
trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS)
|
48 |
trn.train()
|
49 |
trn.save_model(tgt)
|
|
|
15 |
dep,c="-|_|dep",[]
|
16 |
for s in r:
|
17 |
t=s.split("\t")
|
18 |
+
if len(t)==10:
|
19 |
+
if t[0].isdecimal():
|
20 |
+
c.append(t)
|
21 |
elif c!=[]:
|
22 |
for x in [1,2]:
|
23 |
d=list(c)
|
|
|
43 |
tkz=AutoTokenizer.from_pretrained(src)
|
44 |
trainDS=UDgoeswithDataset("train.conllu",tkz)
|
45 |
lid=trainDS.label2id
|
46 |
+
cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True,task_specific_params=None)
|
47 |
+
arg=TrainingArguments(num_train_epochs=3,per_device_train_batch_size=16,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,learning_rate=5e-05,warmup_ratio=0.1)
|
48 |
trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS)
|
49 |
trn.train()
|
50 |
trn.save_model(tgt)
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcadf30041fc773bdc1d871b11411b9810f9f19c0cb52eab4350d68935f65513
|
3 |
+
size 419803411
|