KoichiYasuoka commited on
Commit
eec4065
·
1 Parent(s): d6a0791

model improved

Browse files
config.json CHANGED
@@ -40,7 +40,7 @@
40
  "28": "B-PART",
41
  "29": "B-PRON",
42
  "30": "B-PROPN",
43
- "31": "B-PUNCT",
44
  "32": "B-SCONJ",
45
  "33": "B-SYM",
46
  "34": "B-VERB",
@@ -77,7 +77,7 @@
77
  "65": "I-PART",
78
  "66": "I-PRON",
79
  "67": "I-PROPN",
80
- "68": "I-PUNCT",
81
  "69": "I-SCONJ",
82
  "70": "I-SYM",
83
  "71": "I-VERB",
@@ -96,12 +96,14 @@
96
  "84": "PRON+ADP",
97
  "85": "PROPN",
98
  "86": "PUNCT",
99
- "87": "SCONJ",
100
- "88": "SYM",
101
- "89": "VERB",
102
- "90": "VERB+AUX",
103
- "91": "VERB+AUX+PART",
104
- "92": "X"
 
 
105
  },
106
  "initializer_range": 0.02,
107
  "intermediate_size": 768,
@@ -137,7 +139,7 @@
137
  "B-PART": 28,
138
  "B-PRON": 29,
139
  "B-PROPN": 30,
140
- "B-PUNCT": 31,
141
  "B-SCONJ": 32,
142
  "B-SYM": 33,
143
  "B-VERB": 34,
@@ -174,7 +176,7 @@
174
  "I-PART": 65,
175
  "I-PRON": 66,
176
  "I-PROPN": 67,
177
- "I-PUNCT": 68,
178
  "I-SCONJ": 69,
179
  "I-SYM": 70,
180
  "I-VERB": 71,
@@ -193,12 +195,14 @@
193
  "PRON+ADP": 84,
194
  "PROPN": 85,
195
  "PUNCT": 86,
196
- "SCONJ": 87,
197
- "SYM": 88,
198
- "VERB": 89,
199
- "VERB+AUX": 90,
200
- "VERB+AUX+PART": 91,
201
- "X": 92
 
 
202
  },
203
  "layer_norm_eps": 1e-07,
204
  "max_position_embeddings": 128,
@@ -210,9 +214,12 @@
210
  "pooler_dropout": 0,
211
  "pooler_hidden_act": "gelu",
212
  "pooler_hidden_size": 256,
213
- "pos_att_type": null,
214
- "position_biased_input": true,
215
- "relative_attention": false,
 
 
 
216
  "task_specific_params": {
217
  "upos_multiword": {
218
  "ADJ+ADJ": {
@@ -364,6 +371,18 @@
364
  "\u304b"
365
  ]
366
  },
 
 
 
 
 
 
 
 
 
 
 
 
367
  "VERB+AUX": {
368
  "\u3044\u308f\u308c": [
369
  "\u3044\u308f",
@@ -728,7 +747,7 @@
728
  },
729
  "tokenizer_class": "DebertaV2TokenizerFast",
730
  "torch_dtype": "float32",
731
- "transformers_version": "4.19.1",
732
  "type_vocab_size": 0,
733
  "vocab_size": 32000
734
  }
 
40
  "28": "B-PART",
41
  "29": "B-PRON",
42
  "30": "B-PROPN",
43
+ "31": "B-PUNCT+NOUN",
44
  "32": "B-SCONJ",
45
  "33": "B-SYM",
46
  "34": "B-VERB",
 
77
  "65": "I-PART",
78
  "66": "I-PRON",
79
  "67": "I-PROPN",
80
+ "68": "I-PUNCT+NOUN",
81
  "69": "I-SCONJ",
82
  "70": "I-SYM",
83
  "71": "I-VERB",
 
96
  "84": "PRON+ADP",
97
  "85": "PROPN",
98
  "86": "PUNCT",
99
+ "87": "PUNCT+PUNCT",
100
+ "88": "PUNCT+PUNCT+PUNCT",
101
+ "89": "SCONJ",
102
+ "90": "SYM",
103
+ "91": "VERB",
104
+ "92": "VERB+AUX",
105
+ "93": "VERB+AUX+PART",
106
+ "94": "X"
107
  },
108
  "initializer_range": 0.02,
109
  "intermediate_size": 768,
 
139
  "B-PART": 28,
140
  "B-PRON": 29,
141
  "B-PROPN": 30,
142
+ "B-PUNCT+NOUN": 31,
143
  "B-SCONJ": 32,
144
  "B-SYM": 33,
145
  "B-VERB": 34,
 
176
  "I-PART": 65,
177
  "I-PRON": 66,
178
  "I-PROPN": 67,
179
+ "I-PUNCT+NOUN": 68,
180
  "I-SCONJ": 69,
181
  "I-SYM": 70,
182
  "I-VERB": 71,
 
195
  "PRON+ADP": 84,
196
  "PROPN": 85,
197
  "PUNCT": 86,
198
+ "PUNCT+PUNCT": 87,
199
+ "PUNCT+PUNCT+PUNCT": 88,
200
+ "SCONJ": 89,
201
+ "SYM": 90,
202
+ "VERB": 91,
203
+ "VERB+AUX": 92,
204
+ "VERB+AUX+PART": 93,
205
+ "X": 94
206
  },
207
  "layer_norm_eps": 1e-07,
208
  "max_position_embeddings": 128,
 
214
  "pooler_dropout": 0,
215
  "pooler_hidden_act": "gelu",
216
  "pooler_hidden_size": 256,
217
+ "pos_att_type": [
218
+ "p2c",
219
+ "c2p"
220
+ ],
221
+ "position_biased_input": false,
222
+ "relative_attention": true,
223
  "task_specific_params": {
224
  "upos_multiword": {
225
  "ADJ+ADJ": {
 
371
  "\u304b"
372
  ]
373
  },
374
+ "PUNCT+NOUN": {
375
+ ",\u201c\u62c9\u81f4\u76e3\u7981\u201d\u554f\u984c": [
376
+ ",",
377
+ "\u201c\u62c9\u81f4\u76e3\u7981\u201d\u554f\u984c"
378
+ ]
379
+ },
380
+ "PUNCT+PUNCT": {
381
+ ",\u201c": [
382
+ ",",
383
+ "\u201c"
384
+ ]
385
+ },
386
  "VERB+AUX": {
387
  "\u3044\u308f\u308c": [
388
  "\u3044\u308f",
 
747
  },
748
  "tokenizer_class": "DebertaV2TokenizerFast",
749
  "torch_dtype": "float32",
750
+ "transformers_version": "4.22.1",
751
  "type_vocab_size": 0,
752
  "vocab_size": 32000
753
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f130dada2cd229239d695632a67097b8469f20e42435e3e45f4f5a6cbf4435b
3
- size 64669363
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d92ef47bf3e9bf6c5a30f1bcb641a7e6f9753e12048d6316b730cdb93ea5b23a
3
+ size 71135251
special_tokens_map.json CHANGED
@@ -1 +1,9 @@
1
- {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2be76956d4705737f83345962fab32b8e99f0af5a0f3eb4a2615adb9ec1169f4
3
- size 113144939
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b2bf72d53af75ee5f30f2be07e46b4abd4478d2166ff18199c96660105f5d9f
3
+ size 119649355
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1,14 @@
1
- {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": true, "keep_accents": true, "model_max_length": 128, "tokenizer_class": "DebertaV2TokenizerFast"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": true,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 128,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "split_by_punct": true,
12
+ "tokenizer_class": "DebertaV2TokenizerFast",
13
+ "unk_token": "[UNK]"
14
+ }