KoichiYasuoka commited on
Commit
89aa265
1 Parent(s): 80907a9

model improved

Browse files
Files changed (5) hide show
  1. config.json +97 -100
  2. pytorch_model.bin +2 -2
  3. supar.model +2 -2
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +1 -1
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "KoichiYasuoka/roberta-large-japanese-aozora-char",
3
  "architectures": [
4
  "RobertaForTokenClassification"
5
  ],
@@ -11,110 +10,108 @@
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
- "0": "PRON",
15
- "1": "I-PART",
16
- "2": "B-NOUN",
17
- "3": "VERB",
18
- "4": "ADJ",
19
- "5": "I-SYM",
20
- "6": "B-VERB",
21
- "7": "AUX",
22
- "8": "PROPN",
23
- "9": "PUNCT",
24
- "10": "I-PRON",
25
- "11": "I-VERB",
26
- "12": "I-X",
27
- "13": "B-ADV",
28
- "14": "B-ADP",
29
- "15": "B-DET",
30
- "16": "B-CCONJ",
31
- "17": "B-X",
32
  "18": "B-SYM",
33
- "19": "CCONJ",
34
- "20": "PART",
35
- "21": "B-PUNCT",
36
- "22": "I-NOUN",
37
- "23": "I-INTJ",
38
- "24": "INTJ",
39
- "25": "I-ADJ",
40
- "26": "I-NUM",
41
- "27": "B-INTJ",
42
- "28": "I-AUX",
43
- "29": "I-CCONJ",
44
- "30": "NUM",
45
- "31": "SCONJ",
46
- "32": "I-PUNCT",
47
- "33": "I-SCONJ",
48
- "34": "B-PART",
49
- "35": "B-ADJ",
50
- "36": "I-PROPN",
51
- "37": "B-NUM",
52
- "38": "X",
53
- "39": "B-AUX",
54
- "40": "B-SCONJ",
55
- "41": "ADP",
56
- "42": "B-PROPN",
57
- "43": "NOUN",
58
- "44": "I-DET",
59
- "45": "SYM",
60
- "46": "I-ADP",
61
- "47": "I-ADV",
62
- "48": "B-PRON",
63
- "49": "ADV"
64
  },
65
  "initializer_range": 0.02,
66
  "intermediate_size": 4096,
67
  "label2id": {
68
- "ADJ": 4,
69
- "ADP": 41,
70
- "ADV": 49,
71
- "AUX": 7,
72
- "B-ADJ": 35,
73
- "B-ADP": 14,
74
- "B-ADV": 13,
75
- "B-AUX": 39,
76
- "B-CCONJ": 16,
77
- "B-DET": 15,
78
- "B-INTJ": 27,
79
- "B-NOUN": 2,
80
- "B-NUM": 37,
81
- "B-PART": 34,
82
- "B-PRON": 48,
83
- "B-PROPN": 42,
84
- "B-PUNCT": 21,
85
- "B-SCONJ": 40,
86
  "B-SYM": 18,
87
- "B-VERB": 6,
88
- "B-X": 17,
89
- "CCONJ": 19,
90
- "I-ADJ": 25,
91
- "I-ADP": 46,
92
- "I-ADV": 47,
93
- "I-AUX": 28,
94
- "I-CCONJ": 29,
95
- "I-DET": 44,
96
- "I-INTJ": 23,
97
- "I-NOUN": 22,
98
- "I-NUM": 26,
99
- "I-PART": 1,
100
- "I-PRON": 10,
101
- "I-PROPN": 36,
102
- "I-PUNCT": 32,
103
- "I-SCONJ": 33,
104
- "I-SYM": 5,
105
- "I-VERB": 11,
106
- "I-X": 12,
107
- "INTJ": 24,
108
- "NOUN": 43,
109
- "NUM": 30,
110
- "PART": 20,
111
- "PRON": 0,
112
- "PROPN": 8,
113
- "PUNCT": 9,
114
- "SCONJ": 31,
115
- "SYM": 45,
116
- "VERB": 3,
117
- "X": 38
118
  },
119
  "layer_norm_eps": 1e-12,
120
  "max_position_embeddings": 512,
@@ -125,7 +122,7 @@
125
  "position_embedding_type": "absolute",
126
  "tokenizer_class": "BertTokenizerFast",
127
  "torch_dtype": "float32",
128
- "transformers_version": "4.11.3",
129
  "type_vocab_size": 2,
130
  "use_cache": true,
131
  "vocab_size": 9415
 
1
  {
 
2
  "architectures": [
3
  "RobertaForTokenClassification"
4
  ],
 
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 1024,
12
  "id2label": {
13
+ "0": "ADJ",
14
+ "1": "ADP",
15
+ "2": "ADV",
16
+ "3": "AUX",
17
+ "4": "B-ADJ",
18
+ "5": "B-ADP",
19
+ "6": "B-ADV",
20
+ "7": "B-AUX",
21
+ "8": "B-CCONJ",
22
+ "9": "B-DET",
23
+ "10": "B-INTJ",
24
+ "11": "B-NOUN",
25
+ "12": "B-NUM",
26
+ "13": "B-PART",
27
+ "14": "B-PRON",
28
+ "15": "B-PROPN",
29
+ "16": "B-PUNCT",
30
+ "17": "B-SCONJ",
31
  "18": "B-SYM",
32
+ "19": "B-VERB",
33
+ "20": "B-X",
34
+ "21": "CCONJ",
35
+ "22": "I-ADJ",
36
+ "23": "I-ADP",
37
+ "24": "I-ADV",
38
+ "25": "I-AUX",
39
+ "26": "I-CCONJ",
40
+ "27": "I-DET",
41
+ "28": "I-INTJ",
42
+ "29": "I-NOUN",
43
+ "30": "I-NUM",
44
+ "31": "I-PART",
45
+ "32": "I-PRON",
46
+ "33": "I-PROPN",
47
+ "34": "I-PUNCT",
48
+ "35": "I-SCONJ",
49
+ "36": "I-SYM",
50
+ "37": "I-VERB",
51
+ "38": "I-X",
52
+ "39": "NOUN",
53
+ "40": "NUM",
54
+ "41": "PART",
55
+ "42": "PRON",
56
+ "43": "PROPN",
57
+ "44": "PUNCT",
58
+ "45": "SCONJ",
59
+ "46": "SYM",
60
+ "47": "VERB",
61
+ "48": "X"
 
62
  },
63
  "initializer_range": 0.02,
64
  "intermediate_size": 4096,
65
  "label2id": {
66
+ "ADJ": 0,
67
+ "ADP": 1,
68
+ "ADV": 2,
69
+ "AUX": 3,
70
+ "B-ADJ": 4,
71
+ "B-ADP": 5,
72
+ "B-ADV": 6,
73
+ "B-AUX": 7,
74
+ "B-CCONJ": 8,
75
+ "B-DET": 9,
76
+ "B-INTJ": 10,
77
+ "B-NOUN": 11,
78
+ "B-NUM": 12,
79
+ "B-PART": 13,
80
+ "B-PRON": 14,
81
+ "B-PROPN": 15,
82
+ "B-PUNCT": 16,
83
+ "B-SCONJ": 17,
84
  "B-SYM": 18,
85
+ "B-VERB": 19,
86
+ "B-X": 20,
87
+ "CCONJ": 21,
88
+ "I-ADJ": 22,
89
+ "I-ADP": 23,
90
+ "I-ADV": 24,
91
+ "I-AUX": 25,
92
+ "I-CCONJ": 26,
93
+ "I-DET": 27,
94
+ "I-INTJ": 28,
95
+ "I-NOUN": 29,
96
+ "I-NUM": 30,
97
+ "I-PART": 31,
98
+ "I-PRON": 32,
99
+ "I-PROPN": 33,
100
+ "I-PUNCT": 34,
101
+ "I-SCONJ": 35,
102
+ "I-SYM": 36,
103
+ "I-VERB": 37,
104
+ "I-X": 38,
105
+ "NOUN": 39,
106
+ "NUM": 40,
107
+ "PART": 41,
108
+ "PRON": 42,
109
+ "PROPN": 43,
110
+ "PUNCT": 44,
111
+ "SCONJ": 45,
112
+ "SYM": 46,
113
+ "VERB": 47,
114
+ "X": 48
 
115
  },
116
  "layer_norm_eps": 1e-12,
117
  "max_position_embeddings": 512,
 
122
  "position_embedding_type": "absolute",
123
  "tokenizer_class": "BertTokenizerFast",
124
  "torch_dtype": "float32",
125
+ "transformers_version": "4.19.2",
126
  "type_vocab_size": 2,
127
  "use_cache": true,
128
  "vocab_size": 9415
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da2e0d83b40f3976aa7d2a8fd47596b0789446b3ad1c28c3ec2d761cf377c007
3
- size 1250288662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a82baa53d0f94a7ae47b5933238aa15c369913006dc75dedec8e75c55bb7c8
3
+ size 1250252273
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdcefe8e6d943edda200dab52b7e195f6030312242854185240662f72290c842
3
- size 1303629217
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836d88dbb4f11fd2abfe580f94809b656a335f544a17ca15ba4ef6107577240d
3
+ size 1302635621
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": false, "never_split": ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"], "model_max_length": 512, "do_basic_tokenize": true, "special_tokens_map_file": "/home/yasuoka/.cache/huggingface/transformers/f6e99603eee497be86830d375f662df1256064f3228eb468f7a8d4b546585e34.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "KoichiYasuoka/roberta-large-japanese-aozora-char", "tokenizer_class": "BertTokenizer"}
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": false, "never_split": ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"], "model_max_length": 512, "do_basic_tokenize": true, "tokenizer_class": "BertTokenizerFast"}