KoichiYasuoka commited on
Commit
498073a
1 Parent(s): a04d51c

model improved

Browse files
Files changed (6) hide show
  1. README.md +1 -1
  2. config.json +152 -141
  3. maker.py +4 -4
  4. pytorch_model.bin +2 -2
  5. tokenizer.json +0 -0
  6. training_args.bin +0 -3
README.md CHANGED
@@ -17,7 +17,7 @@ widget:
17
 
18
  ## Model Description
19
 
20
- This is a DeBERTa(V2) model pretrained on 青空文庫 texts for POS-tagging and dependency-parsing (using `goeswith` for subwords), derived from [deberta-large-japanese-aozora](https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora) and [UD_Japanese-GSDLUW](https://github.com/UniversalDependencies/UD_Japanese-GSDLUW).
21
 
22
  ## How to Use
23
 
 
17
 
18
  ## Model Description
19
 
20
+ This is a DeBERTa(V2) model pretrained on 青空文庫 texts for POS-tagging and dependency-parsing (using `goeswith` for subwords), derived from [deberta-large-japanese-luw-upos](https://huggingface.co/KoichiYasuoka/deberta-large-japanese-luw-upos) and [UD_Japanese-GSDLUW](https://github.com/UniversalDependencies/UD_Japanese-GSDLUW).
21
 
22
  ## How to Use
23
 
config.json CHANGED
@@ -20,8 +20,8 @@
20
  "3": "ADJ|_|amod",
21
  "4": "ADJ|_|ccomp",
22
  "5": "ADJ|_|csubj",
23
- "6": "ADJ|_|dep",
24
- "7": "ADJ|_|dislocated",
25
  "8": "ADJ|_|nmod",
26
  "9": "ADJ|_|nsubj",
27
  "10": "ADJ|_|obj",
@@ -35,73 +35,77 @@
35
  "18": "ADV|_|obj",
36
  "19": "ADV|_|root",
37
  "20": "AUX|Polarity=Neg|aux",
38
- "21": "AUX|_|aux",
39
- "22": "AUX|_|cop",
40
- "23": "AUX|_|fixed",
41
- "24": "AUX|_|root",
42
- "25": "CCONJ|_|cc",
43
- "26": "DET|_|det",
44
- "27": "INTJ|_|discourse",
45
- "28": "INTJ|_|root",
46
- "29": "NOUN|Polarity=Neg|obl",
47
- "30": "NOUN|Polarity=Neg|root",
48
- "31": "NOUN|_|acl",
49
- "32": "NOUN|_|advcl",
50
- "33": "NOUN|_|ccomp",
51
- "34": "NOUN|_|compound",
52
- "35": "NOUN|_|csubj",
53
- "36": "NOUN|_|dislocated",
54
- "37": "NOUN|_|nmod",
55
- "38": "NOUN|_|nsubj",
56
- "39": "NOUN|_|obj",
57
- "40": "NOUN|_|obl",
58
- "41": "NOUN|_|root",
59
- "42": "NUM|_|advcl",
60
- "43": "NUM|_|compound",
61
- "44": "NUM|_|dislocated",
62
- "45": "NUM|_|nmod",
63
- "46": "NUM|_|nsubj",
64
- "47": "NUM|_|nummod",
65
- "48": "NUM|_|obj",
66
- "49": "NUM|_|obl",
67
- "50": "NUM|_|root",
68
- "51": "PART|_|mark",
69
- "52": "PRON|_|acl",
70
- "53": "PRON|_|advcl",
71
- "54": "PRON|_|dislocated",
72
- "55": "PRON|_|nmod",
73
- "56": "PRON|_|nsubj",
74
- "57": "PRON|_|obj",
75
- "58": "PRON|_|obl",
76
- "59": "PRON|_|root",
77
- "60": "PROPN|_|acl",
78
- "61": "PROPN|_|advcl",
79
- "62": "PROPN|_|compound",
80
- "63": "PROPN|_|dislocated",
81
- "64": "PROPN|_|nmod",
82
- "65": "PROPN|_|nsubj",
83
- "66": "PROPN|_|obj",
84
- "67": "PROPN|_|obl",
85
- "68": "PROPN|_|root",
86
- "69": "PUNCT|_|punct",
87
- "70": "SCONJ|_|mark",
88
- "71": "SYM|_|compound",
89
- "72": "SYM|_|dep",
90
- "73": "SYM|_|nmod",
91
- "74": "SYM|_|obl",
92
- "75": "VERB|_|acl",
93
- "76": "VERB|_|advcl",
94
- "77": "VERB|_|ccomp",
95
- "78": "VERB|_|compound",
96
- "79": "VERB|_|csubj",
97
- "80": "VERB|_|dislocated",
98
- "81": "VERB|_|nmod",
99
- "82": "VERB|_|obj",
100
- "83": "VERB|_|obl",
101
- "84": "VERB|_|root",
102
- "85": "X|_|dep",
103
- "86": "X|_|goeswith",
104
- "87": "X|_|nmod"
 
 
 
 
105
  },
106
  "initializer_range": 0.02,
107
  "intermediate_size": 4096,
@@ -112,8 +116,8 @@
112
  "ADJ|_|amod": 3,
113
  "ADJ|_|ccomp": 4,
114
  "ADJ|_|csubj": 5,
115
- "ADJ|_|dep": 6,
116
- "ADJ|_|dislocated": 7,
117
  "ADJ|_|nmod": 8,
118
  "ADJ|_|nsubj": 9,
119
  "ADJ|_|obj": 10,
@@ -127,73 +131,77 @@
127
  "ADV|_|obj": 18,
128
  "ADV|_|root": 19,
129
  "AUX|Polarity=Neg|aux": 20,
130
- "AUX|_|aux": 21,
131
- "AUX|_|cop": 22,
132
- "AUX|_|fixed": 23,
133
- "AUX|_|root": 24,
134
- "CCONJ|_|cc": 25,
135
- "DET|_|det": 26,
136
- "INTJ|_|discourse": 27,
137
- "INTJ|_|root": 28,
138
- "NOUN|Polarity=Neg|obl": 29,
139
- "NOUN|Polarity=Neg|root": 30,
140
- "NOUN|_|acl": 31,
141
- "NOUN|_|advcl": 32,
142
- "NOUN|_|ccomp": 33,
143
- "NOUN|_|compound": 34,
144
- "NOUN|_|csubj": 35,
145
- "NOUN|_|dislocated": 36,
146
- "NOUN|_|nmod": 37,
147
- "NOUN|_|nsubj": 38,
148
- "NOUN|_|obj": 39,
149
- "NOUN|_|obl": 40,
150
- "NOUN|_|root": 41,
151
- "NUM|_|advcl": 42,
152
- "NUM|_|compound": 43,
153
- "NUM|_|dislocated": 44,
154
- "NUM|_|nmod": 45,
155
- "NUM|_|nsubj": 46,
156
- "NUM|_|nummod": 47,
157
- "NUM|_|obj": 48,
158
- "NUM|_|obl": 49,
159
- "NUM|_|root": 50,
160
- "PART|_|mark": 51,
161
- "PRON|_|acl": 52,
162
- "PRON|_|advcl": 53,
163
- "PRON|_|dislocated": 54,
164
- "PRON|_|nmod": 55,
165
- "PRON|_|nsubj": 56,
166
- "PRON|_|obj": 57,
167
- "PRON|_|obl": 58,
168
- "PRON|_|root": 59,
169
- "PROPN|_|acl": 60,
170
- "PROPN|_|advcl": 61,
171
- "PROPN|_|compound": 62,
172
- "PROPN|_|dislocated": 63,
173
- "PROPN|_|nmod": 64,
174
- "PROPN|_|nsubj": 65,
175
- "PROPN|_|obj": 66,
176
- "PROPN|_|obl": 67,
177
- "PROPN|_|root": 68,
178
- "PUNCT|_|punct": 69,
179
- "SCONJ|_|mark": 70,
180
- "SYM|_|compound": 71,
181
- "SYM|_|dep": 72,
182
- "SYM|_|nmod": 73,
183
- "SYM|_|obl": 74,
184
- "VERB|_|acl": 75,
185
- "VERB|_|advcl": 76,
186
- "VERB|_|ccomp": 77,
187
- "VERB|_|compound": 78,
188
- "VERB|_|csubj": 79,
189
- "VERB|_|dislocated": 80,
190
- "VERB|_|nmod": 81,
191
- "VERB|_|obj": 82,
192
- "VERB|_|obl": 83,
193
- "VERB|_|root": 84,
194
- "X|_|dep": 85,
195
- "X|_|goeswith": 86,
196
- "X|_|nmod": 87
 
 
 
 
197
  },
198
  "layer_norm_eps": 1e-07,
199
  "max_position_embeddings": 512,
@@ -205,9 +213,12 @@
205
  "pooler_dropout": 0,
206
  "pooler_hidden_act": "gelu",
207
  "pooler_hidden_size": 1024,
208
- "pos_att_type": null,
209
- "position_biased_input": true,
210
- "relative_attention": false,
 
 
 
211
  "tokenizer_class": "DebertaV2TokenizerFast",
212
  "torch_dtype": "float32",
213
  "transformers_version": "4.22.1",
 
20
  "3": "ADJ|_|amod",
21
  "4": "ADJ|_|ccomp",
22
  "5": "ADJ|_|csubj",
23
+ "6": "ADJ|_|csubj:outer",
24
+ "7": "ADJ|_|dep",
25
  "8": "ADJ|_|nmod",
26
  "9": "ADJ|_|nsubj",
27
  "10": "ADJ|_|obj",
 
35
  "18": "ADV|_|obj",
36
  "19": "ADV|_|root",
37
  "20": "AUX|Polarity=Neg|aux",
38
+ "21": "AUX|Polarity=Neg|fixed",
39
+ "22": "AUX|_|aux",
40
+ "23": "AUX|_|cop",
41
+ "24": "AUX|_|fixed",
42
+ "25": "AUX|_|root",
43
+ "26": "CCONJ|_|cc",
44
+ "27": "DET|_|det",
45
+ "28": "INTJ|_|discourse",
46
+ "29": "INTJ|_|root",
47
+ "30": "NOUN|Polarity=Neg|obl",
48
+ "31": "NOUN|Polarity=Neg|root",
49
+ "32": "NOUN|_|acl",
50
+ "33": "NOUN|_|advcl",
51
+ "34": "NOUN|_|ccomp",
52
+ "35": "NOUN|_|compound",
53
+ "36": "NOUN|_|csubj",
54
+ "37": "NOUN|_|csubj:outer",
55
+ "38": "NOUN|_|nmod",
56
+ "39": "NOUN|_|nsubj",
57
+ "40": "NOUN|_|nsubj:outer",
58
+ "41": "NOUN|_|obj",
59
+ "42": "NOUN|_|obl",
60
+ "43": "NOUN|_|root",
61
+ "44": "NUM|_|advcl",
62
+ "45": "NUM|_|compound",
63
+ "46": "NUM|_|nmod",
64
+ "47": "NUM|_|nsubj",
65
+ "48": "NUM|_|nsubj:outer",
66
+ "49": "NUM|_|nummod",
67
+ "50": "NUM|_|obj",
68
+ "51": "NUM|_|obl",
69
+ "52": "NUM|_|root",
70
+ "53": "PART|_|mark",
71
+ "54": "PRON|_|acl",
72
+ "55": "PRON|_|advcl",
73
+ "56": "PRON|_|nmod",
74
+ "57": "PRON|_|nsubj",
75
+ "58": "PRON|_|nsubj:outer",
76
+ "59": "PRON|_|obj",
77
+ "60": "PRON|_|obl",
78
+ "61": "PRON|_|root",
79
+ "62": "PROPN|_|acl",
80
+ "63": "PROPN|_|advcl",
81
+ "64": "PROPN|_|compound",
82
+ "65": "PROPN|_|nmod",
83
+ "66": "PROPN|_|nsubj",
84
+ "67": "PROPN|_|nsubj:outer",
85
+ "68": "PROPN|_|obj",
86
+ "69": "PROPN|_|obl",
87
+ "70": "PROPN|_|root",
88
+ "71": "PUNCT|_|punct",
89
+ "72": "SCONJ|_|dep",
90
+ "73": "SCONJ|_|fixed",
91
+ "74": "SCONJ|_|mark",
92
+ "75": "SYM|_|compound",
93
+ "76": "SYM|_|dep",
94
+ "77": "SYM|_|nmod",
95
+ "78": "SYM|_|obl",
96
+ "79": "VERB|_|acl",
97
+ "80": "VERB|_|advcl",
98
+ "81": "VERB|_|ccomp",
99
+ "82": "VERB|_|compound",
100
+ "83": "VERB|_|csubj",
101
+ "84": "VERB|_|csubj:outer",
102
+ "85": "VERB|_|nmod",
103
+ "86": "VERB|_|obj",
104
+ "87": "VERB|_|obl",
105
+ "88": "VERB|_|root",
106
+ "89": "X|_|dep",
107
+ "90": "X|_|goeswith",
108
+ "91": "X|_|nmod"
109
  },
110
  "initializer_range": 0.02,
111
  "intermediate_size": 4096,
 
116
  "ADJ|_|amod": 3,
117
  "ADJ|_|ccomp": 4,
118
  "ADJ|_|csubj": 5,
119
+ "ADJ|_|csubj:outer": 6,
120
+ "ADJ|_|dep": 7,
121
  "ADJ|_|nmod": 8,
122
  "ADJ|_|nsubj": 9,
123
  "ADJ|_|obj": 10,
 
131
  "ADV|_|obj": 18,
132
  "ADV|_|root": 19,
133
  "AUX|Polarity=Neg|aux": 20,
134
+ "AUX|Polarity=Neg|fixed": 21,
135
+ "AUX|_|aux": 22,
136
+ "AUX|_|cop": 23,
137
+ "AUX|_|fixed": 24,
138
+ "AUX|_|root": 25,
139
+ "CCONJ|_|cc": 26,
140
+ "DET|_|det": 27,
141
+ "INTJ|_|discourse": 28,
142
+ "INTJ|_|root": 29,
143
+ "NOUN|Polarity=Neg|obl": 30,
144
+ "NOUN|Polarity=Neg|root": 31,
145
+ "NOUN|_|acl": 32,
146
+ "NOUN|_|advcl": 33,
147
+ "NOUN|_|ccomp": 34,
148
+ "NOUN|_|compound": 35,
149
+ "NOUN|_|csubj": 36,
150
+ "NOUN|_|csubj:outer": 37,
151
+ "NOUN|_|nmod": 38,
152
+ "NOUN|_|nsubj": 39,
153
+ "NOUN|_|nsubj:outer": 40,
154
+ "NOUN|_|obj": 41,
155
+ "NOUN|_|obl": 42,
156
+ "NOUN|_|root": 43,
157
+ "NUM|_|advcl": 44,
158
+ "NUM|_|compound": 45,
159
+ "NUM|_|nmod": 46,
160
+ "NUM|_|nsubj": 47,
161
+ "NUM|_|nsubj:outer": 48,
162
+ "NUM|_|nummod": 49,
163
+ "NUM|_|obj": 50,
164
+ "NUM|_|obl": 51,
165
+ "NUM|_|root": 52,
166
+ "PART|_|mark": 53,
167
+ "PRON|_|acl": 54,
168
+ "PRON|_|advcl": 55,
169
+ "PRON|_|nmod": 56,
170
+ "PRON|_|nsubj": 57,
171
+ "PRON|_|nsubj:outer": 58,
172
+ "PRON|_|obj": 59,
173
+ "PRON|_|obl": 60,
174
+ "PRON|_|root": 61,
175
+ "PROPN|_|acl": 62,
176
+ "PROPN|_|advcl": 63,
177
+ "PROPN|_|compound": 64,
178
+ "PROPN|_|nmod": 65,
179
+ "PROPN|_|nsubj": 66,
180
+ "PROPN|_|nsubj:outer": 67,
181
+ "PROPN|_|obj": 68,
182
+ "PROPN|_|obl": 69,
183
+ "PROPN|_|root": 70,
184
+ "PUNCT|_|punct": 71,
185
+ "SCONJ|_|dep": 72,
186
+ "SCONJ|_|fixed": 73,
187
+ "SCONJ|_|mark": 74,
188
+ "SYM|_|compound": 75,
189
+ "SYM|_|dep": 76,
190
+ "SYM|_|nmod": 77,
191
+ "SYM|_|obl": 78,
192
+ "VERB|_|acl": 79,
193
+ "VERB|_|advcl": 80,
194
+ "VERB|_|ccomp": 81,
195
+ "VERB|_|compound": 82,
196
+ "VERB|_|csubj": 83,
197
+ "VERB|_|csubj:outer": 84,
198
+ "VERB|_|nmod": 85,
199
+ "VERB|_|obj": 86,
200
+ "VERB|_|obl": 87,
201
+ "VERB|_|root": 88,
202
+ "X|_|dep": 89,
203
+ "X|_|goeswith": 90,
204
+ "X|_|nmod": 91
205
  },
206
  "layer_norm_eps": 1e-07,
207
  "max_position_embeddings": 512,
 
213
  "pooler_dropout": 0,
214
  "pooler_hidden_act": "gelu",
215
  "pooler_hidden_size": 1024,
216
+ "pos_att_type": [
217
+ "p2c",
218
+ "c2p"
219
+ ],
220
+ "position_biased_input": false,
221
+ "relative_attention": true,
222
  "tokenizer_class": "DebertaV2TokenizerFast",
223
  "torch_dtype": "float32",
224
  "transformers_version": "4.22.1",
maker.py CHANGED
@@ -1,5 +1,5 @@
1
  #! /usr/bin/python3
2
- src="KoichiYasuoka/deberta-large-japanese-aozora"
3
  tgt="KoichiYasuoka/deberta-large-japanese-aozora-ud-goeswith"
4
  url="https://github.com/UniversalDependencies/UD_Japanese-GSDLUW"
5
  import os
@@ -46,9 +46,9 @@ trainDS=UDgoeswithDataset("train.conllu",tkz)
46
  devDS=UDgoeswithDataset("dev.conllu",tkz)
47
  testDS=UDgoeswithDataset("test.conllu",tkz)
48
  lid=trainDS(devDS,testDS)
49
- cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()})
50
- arg=TrainingArguments(num_train_epochs=3,per_device_train_batch_size=32,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1)
51
- trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg),train_dataset=trainDS,eval_dataset=devDS)
52
  trn.train()
53
  trn.save_model(tgt)
54
  tkz.save_pretrained(tgt)
 
1
  #! /usr/bin/python3
2
+ src="KoichiYasuoka/deberta-large-japanese-luw-upos"
3
  tgt="KoichiYasuoka/deberta-large-japanese-aozora-ud-goeswith"
4
  url="https://github.com/UniversalDependencies/UD_Japanese-GSDLUW"
5
  import os
 
46
  devDS=UDgoeswithDataset("dev.conllu",tkz)
47
  testDS=UDgoeswithDataset("test.conllu",tkz)
48
  lid=trainDS(devDS,testDS)
49
+ cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},task_specific_params=None,ignore_mismatched_sizes=True)
50
+ arg=TrainingArguments(num_train_epochs=3,per_device_train_batch_size=16,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1)
51
+ trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
52
  trn.train()
53
  trn.save_model(tgt)
54
  tkz.save_pretrained(tgt)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33b8461274971c818c6736e78138330ebd3758a4f910fa5b3eaef72d701d02f2
3
- size 1342912499
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d578f36fc7b6326126331f8f3642ce61ce7191a116d9e3c190d1dc20735a8f03
3
+ size 1546582835
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c39958a1a96c80c2eb1858d29a204a873727b9899d4ccef6da6898e50f364c9
3
- size 3311