KoichiYasuoka commited on
Commit
f5822b9
1 Parent(s): 1dbb93a

model improved

Browse files

Files changed (5) hide show
  1. config.json +101 -99
  2. pytorch_model.bin +2 -2
  3. supar.model +2 -2
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +1 -1
config.json CHANGED
@@ -3,113 +3,114 @@
3
  "BertForTokenClassification"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
 
6
  "gradient_checkpointing": false,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 1024,
10
  "id2label": {
11
- "0": "I-AUX",
12
- "1": "I-ADJ",
13
- "2": "SCONJ",
14
- "3": "I-DET",
15
- "4": "I-VERB",
16
- "5": "B-AUX",
17
- "6": "SYM",
18
- "7": "ADV",
19
- "8": "VERB",
20
- "9": "I-SYM",
21
- "10": "I-PUNCT",
22
- "11": "B-PUNCT",
23
- "12": "I-X",
24
- "13": "B-X",
25
- "14": "I-SCONJ",
26
- "15": "CCONJ",
27
- "16": "I-NOUN",
28
- "17": "B-NOUN",
29
- "18": "B-ADV",
30
- "19": "PART",
31
- "20": "B-PROPN",
32
- "21": "B-VERB",
33
- "22": "B-CCONJ",
34
- "23": "AUX",
35
- "24": "B-SYM",
36
- "25": "X",
37
- "26": "I-INTJ",
38
- "27": "I-PROPN",
39
- "28": "I-NUM",
40
- "29": "ADP",
41
- "30": "PROPN",
42
- "31": "B-ADP",
43
- "32": "PUNCT",
44
- "33": "B-DET",
45
- "34": "NUM",
46
- "35": "I-ADP",
47
- "36": "I-PART",
48
- "37": "I-PRON",
49
- "38": "I-CCONJ",
50
- "39": "B-INTJ",
51
- "40": "B-SCONJ",
52
- "41": "B-PART",
53
- "42": "B-ADJ",
54
- "43": "I-ADV",
55
- "44": "PRON",
56
- "45": "NOUN",
57
- "46": "B-NUM",
58
- "47": "ADJ",
59
- "48": "B-PRON"
60
  },
61
  "initializer_range": 0.02,
62
  "intermediate_size": 4096,
63
  "label2id": {
64
- "ADJ": 47,
65
- "ADP": 29,
66
- "ADV": 7,
67
- "AUX": 23,
68
- "B-ADJ": 42,
69
- "B-ADP": 31,
70
- "B-ADV": 18,
71
- "B-AUX": 5,
72
- "B-CCONJ": 22,
73
- "B-DET": 33,
74
- "B-INTJ": 39,
75
- "B-NOUN": 17,
76
- "B-NUM": 46,
77
- "B-PART": 41,
78
- "B-PRON": 48,
79
- "B-PROPN": 20,
80
- "B-PUNCT": 11,
81
- "B-SCONJ": 40,
82
- "B-SYM": 24,
83
- "B-VERB": 21,
84
- "B-X": 13,
85
- "CCONJ": 15,
86
- "I-ADJ": 1,
87
- "I-ADP": 35,
88
- "I-ADV": 43,
89
- "I-AUX": 0,
90
- "I-CCONJ": 38,
91
- "I-DET": 3,
92
- "I-INTJ": 26,
93
- "I-NOUN": 16,
94
- "I-NUM": 28,
95
- "I-PART": 36,
96
- "I-PRON": 37,
97
- "I-PROPN": 27,
98
- "I-PUNCT": 10,
99
- "I-SCONJ": 14,
100
- "I-SYM": 9,
101
- "I-VERB": 4,
102
- "I-X": 12,
103
- "NOUN": 45,
104
- "NUM": 34,
105
- "PART": 19,
106
- "PRON": 44,
107
- "PROPN": 30,
108
- "PUNCT": 32,
109
- "SCONJ": 2,
110
- "SYM": 6,
111
- "VERB": 8,
112
- "X": 25
113
  },
114
  "layer_norm_eps": 1e-12,
115
  "max_position_embeddings": 512,
@@ -119,7 +120,8 @@
119
  "pad_token_id": 0,
120
  "position_embedding_type": "absolute",
121
  "tokenizer_class": "BertTokenizer",
122
- "transformers_version": "4.7.0",
 
123
  "type_vocab_size": 2,
124
  "use_cache": true,
125
  "vocab_size": 6291
3
  "BertForTokenClassification"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
  "gradient_checkpointing": false,
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 1024,
11
  "id2label": {
12
+ "0": "ADJ",
13
+ "1": "ADP",
14
+ "2": "ADV",
15
+ "3": "AUX",
16
+ "4": "B-ADJ",
17
+ "5": "B-ADP",
18
+ "6": "B-ADV",
19
+ "7": "B-AUX",
20
+ "8": "B-CCONJ",
21
+ "9": "B-DET",
22
+ "10": "B-INTJ",
23
+ "11": "B-NOUN",
24
+ "12": "B-NUM",
25
+ "13": "B-PART",
26
+ "14": "B-PRON",
27
+ "15": "B-PROPN",
28
+ "16": "B-PUNCT",
29
+ "17": "B-SCONJ",
30
+ "18": "B-SYM",
31
+ "19": "B-VERB",
32
+ "20": "B-X",
33
+ "21": "CCONJ",
34
+ "22": "I-ADJ",
35
+ "23": "I-ADP",
36
+ "24": "I-ADV",
37
+ "25": "I-AUX",
38
+ "26": "I-CCONJ",
39
+ "27": "I-DET",
40
+ "28": "I-INTJ",
41
+ "29": "I-NOUN",
42
+ "30": "I-NUM",
43
+ "31": "I-PART",
44
+ "32": "I-PRON",
45
+ "33": "I-PROPN",
46
+ "34": "I-PUNCT",
47
+ "35": "I-SCONJ",
48
+ "36": "I-SYM",
49
+ "37": "I-VERB",
50
+ "38": "I-X",
51
+ "39": "NOUN",
52
+ "40": "NUM",
53
+ "41": "PART",
54
+ "42": "PRON",
55
+ "43": "PROPN",
56
+ "44": "PUNCT",
57
+ "45": "SCONJ",
58
+ "46": "SYM",
59
+ "47": "VERB",
60
+ "48": "X"
61
  },
62
  "initializer_range": 0.02,
63
  "intermediate_size": 4096,
64
  "label2id": {
65
+ "ADJ": 0,
66
+ "ADP": 1,
67
+ "ADV": 2,
68
+ "AUX": 3,
69
+ "B-ADJ": 4,
70
+ "B-ADP": 5,
71
+ "B-ADV": 6,
72
+ "B-AUX": 7,
73
+ "B-CCONJ": 8,
74
+ "B-DET": 9,
75
+ "B-INTJ": 10,
76
+ "B-NOUN": 11,
77
+ "B-NUM": 12,
78
+ "B-PART": 13,
79
+ "B-PRON": 14,
80
+ "B-PROPN": 15,
81
+ "B-PUNCT": 16,
82
+ "B-SCONJ": 17,
83
+ "B-SYM": 18,
84
+ "B-VERB": 19,
85
+ "B-X": 20,
86
+ "CCONJ": 21,
87
+ "I-ADJ": 22,
88
+ "I-ADP": 23,
89
+ "I-ADV": 24,
90
+ "I-AUX": 25,
91
+ "I-CCONJ": 26,
92
+ "I-DET": 27,
93
+ "I-INTJ": 28,
94
+ "I-NOUN": 29,
95
+ "I-NUM": 30,
96
+ "I-PART": 31,
97
+ "I-PRON": 32,
98
+ "I-PROPN": 33,
99
+ "I-PUNCT": 34,
100
+ "I-SCONJ": 35,
101
+ "I-SYM": 36,
102
+ "I-VERB": 37,
103
+ "I-X": 38,
104
+ "NOUN": 39,
105
+ "NUM": 40,
106
+ "PART": 41,
107
+ "PRON": 42,
108
+ "PROPN": 43,
109
+ "PUNCT": 44,
110
+ "SCONJ": 45,
111
+ "SYM": 46,
112
+ "VERB": 47,
113
+ "X": 48
114
  },
115
  "layer_norm_eps": 1e-12,
116
  "max_position_embeddings": 512,
120
  "pad_token_id": 0,
121
  "position_embedding_type": "absolute",
122
  "tokenizer_class": "BertTokenizer",
123
+ "torch_dtype": "float32",
124
+ "transformers_version": "4.19.2",
125
  "type_vocab_size": 2,
126
  "use_cache": true,
127
  "vocab_size": 6291
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09f5e8cb2c6837a071321d12a470b6a23bd669fdfa542c6d250c400ee398f7b1
3
- size 1237486224
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d374996c4a74c84205d41521dd47f0510dc723fcd5b378bb68ee0fde2557a144
3
+ size 1237455217
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34faa11528d150b8ec7640cb89338628697c4bb956588c48c2642eaaac06044b
3
- size 1289796779
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a3eed7e15f59a2a3c2020a506f76bbd478d9ccbe0ea534214960f7d17350ed
3
+ size 1289789669
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "do_word_tokenize": true, "do_subword_tokenize": true, "word_tokenizer_type": "basic", "subword_tokenizer_type": "character", "never_split": ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"], "mecab_kwargs": null, "do_basic_tokenize": true, "tokenizer_class": "BertTokenizer"}
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "do_word_tokenize": true, "do_subword_tokenize": true, "word_tokenizer_type": "basic", "subword_tokenizer_type": "character", "never_split": ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"], "mecab_kwargs": null, "do_basic_tokenize": true, "tokenizer_class": "BertTokenizerFast"}