emfomy commited on
Commit
9578a02
β€’
1 Parent(s): 5b9404a

Upload model files.

Browse files
Files changed (6) hide show
  1. README.md +19 -0
  2. config.json +150 -0
  3. pytorch_model.bin +3 -0
  4. special_tokens_map.json +1 -0
  5. tokenizer_config.json +1 -0
  6. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - zh
4
+ thumbnail: https://ckip.iis.sinica.edu.tw/files/ckip_logo.png
5
+ tags:
6
+ - pytorch
7
+ - token-classification
8
+ - bert
9
+ - zh
10
+ license: gpl-3.0
11
+ datasets:
12
+ metrics:
13
+ ---
14
+
15
+ # CKIP BERT Base Chinese β€” Part-of-Speech Tagging
16
+
17
+ ## Contributers
18
+
19
+ * [Mu Yang](https://muyang.pro) at [CKIP](https://ckip.iis.sinica.edu.tw) (Author & Maintainer)
config.json ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "directionality": "bidi",
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "A",
13
+ "1": "Caa",
14
+ "2": "Cab",
15
+ "3": "Cba",
16
+ "4": "Cbb",
17
+ "5": "D",
18
+ "6": "Da",
19
+ "7": "Dfa",
20
+ "8": "Dfb",
21
+ "9": "Di",
22
+ "10": "Dk",
23
+ "11": "DM",
24
+ "12": "I",
25
+ "13": "Na",
26
+ "14": "Nb",
27
+ "15": "Nc",
28
+ "16": "Ncd",
29
+ "17": "Nd",
30
+ "18": "Nep",
31
+ "19": "Neqa",
32
+ "20": "Neqb",
33
+ "21": "Nes",
34
+ "22": "Neu",
35
+ "23": "Nf",
36
+ "24": "Ng",
37
+ "25": "Nh",
38
+ "26": "Nv",
39
+ "27": "P",
40
+ "28": "T",
41
+ "29": "VA",
42
+ "30": "VAC",
43
+ "31": "VB",
44
+ "32": "VC",
45
+ "33": "VCL",
46
+ "34": "VD",
47
+ "35": "VF",
48
+ "36": "VE",
49
+ "37": "VG",
50
+ "38": "VH",
51
+ "39": "VHC",
52
+ "40": "VI",
53
+ "41": "VJ",
54
+ "42": "VK",
55
+ "43": "VL",
56
+ "44": "V_2",
57
+ "45": "DE",
58
+ "46": "SHI",
59
+ "47": "FW",
60
+ "48": "COLONCATEGORY",
61
+ "49": "COMMACATEGORY",
62
+ "50": "DASHCATEGORY",
63
+ "51": "DOTCATEGORY",
64
+ "52": "ETCCATEGORY",
65
+ "53": "EXCLAMATIONCATEGORY",
66
+ "54": "PARENTHESISCATEGORY",
67
+ "55": "PAUSECATEGORY",
68
+ "56": "PERIODCATEGORY",
69
+ "57": "QUESTIONCATEGORY",
70
+ "58": "SEMICOLONCATEGORY",
71
+ "59": "SPCHANGECATEGORY"
72
+ },
73
+ "initializer_range": 0.02,
74
+ "intermediate_size": 3072,
75
+ "label2id": {
76
+ "A": 0,
77
+ "COLONCATEGORY": 48,
78
+ "COMMACATEGORY": 49,
79
+ "Caa": 1,
80
+ "Cab": 2,
81
+ "Cba": 3,
82
+ "Cbb": 4,
83
+ "D": 5,
84
+ "DASHCATEGORY": 50,
85
+ "DE": 45,
86
+ "DM": 11,
87
+ "DOTCATEGORY": 51,
88
+ "Da": 6,
89
+ "Dfa": 7,
90
+ "Dfb": 8,
91
+ "Di": 9,
92
+ "Dk": 10,
93
+ "ETCCATEGORY": 52,
94
+ "EXCLAMATIONCATEGORY": 53,
95
+ "FW": 47,
96
+ "I": 12,
97
+ "Na": 13,
98
+ "Nb": 14,
99
+ "Nc": 15,
100
+ "Ncd": 16,
101
+ "Nd": 17,
102
+ "Nep": 18,
103
+ "Neqa": 19,
104
+ "Neqb": 20,
105
+ "Nes": 21,
106
+ "Neu": 22,
107
+ "Nf": 23,
108
+ "Ng": 24,
109
+ "Nh": 25,
110
+ "Nv": 26,
111
+ "P": 27,
112
+ "PARENTHESISCATEGORY": 54,
113
+ "PAUSECATEGORY": 55,
114
+ "PERIODCATEGORY": 56,
115
+ "QUESTIONCATEGORY": 57,
116
+ "SEMICOLONCATEGORY": 58,
117
+ "SHI": 46,
118
+ "SPCHANGECATEGORY": 59,
119
+ "T": 28,
120
+ "VA": 29,
121
+ "VAC": 30,
122
+ "VB": 31,
123
+ "VC": 32,
124
+ "VCL": 33,
125
+ "VD": 34,
126
+ "VE": 36,
127
+ "VF": 35,
128
+ "VG": 37,
129
+ "VH": 38,
130
+ "VHC": 39,
131
+ "VI": 40,
132
+ "VJ": 41,
133
+ "VK": 42,
134
+ "VL": 43,
135
+ "V_2": 44
136
+ },
137
+ "layer_norm_eps": 1e-12,
138
+ "max_position_embeddings": 512,
139
+ "model_type": "bert",
140
+ "num_attention_heads": 12,
141
+ "num_hidden_layers": 12,
142
+ "pad_token_id": 0,
143
+ "pooler_fc_size": 768,
144
+ "pooler_num_attention_heads": 12,
145
+ "pooler_num_fc_layers": 3,
146
+ "pooler_size_per_head": 128,
147
+ "pooler_type": "first_token_transform",
148
+ "type_vocab_size": 2,
149
+ "vocab_size": 21128
150
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b614c94a6a88e58c4126902c78f8b5e1c482f4c17dd762b0e0ad5683259ee61
3
+ size 406981303
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-chinese"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff