emfomy commited on
Commit
1727d9e
β€’
1 Parent(s): 0d33176

Upload model files.

Browse files
Files changed (6) hide show
  1. README.md +22 -0
  2. config.json +153 -0
  3. pytorch_model.bin +3 -0
  4. special_tokens_map.json +1 -0
  5. tokenizer_config.json +1 -0
  6. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - zh
4
+ thumbnail: https://ckip.iis.sinica.edu.tw/files/ckip_logo.png
5
+ tags:
6
+ - pytorch
7
+ - token-classification
8
+ - albert
9
+ - zh
10
+ license: gpl-3.0
11
+ datasets:
12
+ metrics:
13
+ ---
14
+
15
+ # CKIP ALBERT Tiny Chinese β€” Part-of-Speech Tagging
16
+
17
+ ## Contributers
18
+
19
+ * [Mu Yang](https://muyang.pro) at [CKIP](https://ckip.iis.sinica.edu.tw) (Author & Maintainer)
20
+
21
+ ## Attention
22
+ Please Use `BertTokenizer` instead of `AutoTokenizer`!!!
config.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "AlbertForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "bos_token_id": 2,
7
+ "classifier_dropout_prob": 0.1,
8
+ "down_scale_factor": 1,
9
+ "embedding_size": 128,
10
+ "eos_token_id": 3,
11
+ "gap_size": 0,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.0,
14
+ "hidden_size": 312,
15
+ "id2label": {
16
+ "0": "A",
17
+ "1": "Caa",
18
+ "2": "Cab",
19
+ "3": "Cba",
20
+ "4": "Cbb",
21
+ "5": "D",
22
+ "6": "Da",
23
+ "7": "Dfa",
24
+ "8": "Dfb",
25
+ "9": "Di",
26
+ "10": "Dk",
27
+ "11": "DM",
28
+ "12": "I",
29
+ "13": "Na",
30
+ "14": "Nb",
31
+ "15": "Nc",
32
+ "16": "Ncd",
33
+ "17": "Nd",
34
+ "18": "Nep",
35
+ "19": "Neqa",
36
+ "20": "Neqb",
37
+ "21": "Nes",
38
+ "22": "Neu",
39
+ "23": "Nf",
40
+ "24": "Ng",
41
+ "25": "Nh",
42
+ "26": "Nv",
43
+ "27": "P",
44
+ "28": "T",
45
+ "29": "VA",
46
+ "30": "VAC",
47
+ "31": "VB",
48
+ "32": "VC",
49
+ "33": "VCL",
50
+ "34": "VD",
51
+ "35": "VF",
52
+ "36": "VE",
53
+ "37": "VG",
54
+ "38": "VH",
55
+ "39": "VHC",
56
+ "40": "VI",
57
+ "41": "VJ",
58
+ "42": "VK",
59
+ "43": "VL",
60
+ "44": "V_2",
61
+ "45": "DE",
62
+ "46": "SHI",
63
+ "47": "FW",
64
+ "48": "COLONCATEGORY",
65
+ "49": "COMMACATEGORY",
66
+ "50": "DASHCATEGORY",
67
+ "51": "DOTCATEGORY",
68
+ "52": "ETCCATEGORY",
69
+ "53": "EXCLAMATIONCATEGORY",
70
+ "54": "PARENTHESISCATEGORY",
71
+ "55": "PAUSECATEGORY",
72
+ "56": "PERIODCATEGORY",
73
+ "57": "QUESTIONCATEGORY",
74
+ "58": "SEMICOLONCATEGORY",
75
+ "59": "SPCHANGECATEGORY"
76
+ },
77
+ "initializer_range": 0.02,
78
+ "inner_group_num": 1,
79
+ "intermediate_size": 1248,
80
+ "label2id": {
81
+ "A": 0,
82
+ "COLONCATEGORY": 48,
83
+ "COMMACATEGORY": 49,
84
+ "Caa": 1,
85
+ "Cab": 2,
86
+ "Cba": 3,
87
+ "Cbb": 4,
88
+ "D": 5,
89
+ "DASHCATEGORY": 50,
90
+ "DE": 45,
91
+ "DM": 11,
92
+ "DOTCATEGORY": 51,
93
+ "Da": 6,
94
+ "Dfa": 7,
95
+ "Dfb": 8,
96
+ "Di": 9,
97
+ "Dk": 10,
98
+ "ETCCATEGORY": 52,
99
+ "EXCLAMATIONCATEGORY": 53,
100
+ "FW": 47,
101
+ "I": 12,
102
+ "Na": 13,
103
+ "Nb": 14,
104
+ "Nc": 15,
105
+ "Ncd": 16,
106
+ "Nd": 17,
107
+ "Nep": 18,
108
+ "Neqa": 19,
109
+ "Neqb": 20,
110
+ "Nes": 21,
111
+ "Neu": 22,
112
+ "Nf": 23,
113
+ "Ng": 24,
114
+ "Nh": 25,
115
+ "Nv": 26,
116
+ "P": 27,
117
+ "PARENTHESISCATEGORY": 54,
118
+ "PAUSECATEGORY": 55,
119
+ "PERIODCATEGORY": 56,
120
+ "QUESTIONCATEGORY": 57,
121
+ "SEMICOLONCATEGORY": 58,
122
+ "SHI": 46,
123
+ "SPCHANGECATEGORY": 59,
124
+ "T": 28,
125
+ "VA": 29,
126
+ "VAC": 30,
127
+ "VB": 31,
128
+ "VC": 32,
129
+ "VCL": 33,
130
+ "VD": 34,
131
+ "VE": 36,
132
+ "VF": 35,
133
+ "VG": 37,
134
+ "VH": 38,
135
+ "VHC": 39,
136
+ "VI": 40,
137
+ "VJ": 41,
138
+ "VK": 42,
139
+ "VL": 43,
140
+ "V_2": 44
141
+ },
142
+ "layer_norm_eps": 1e-12,
143
+ "max_position_embeddings": 512,
144
+ "model_type": "albert",
145
+ "net_structure_type": 0,
146
+ "num_attention_heads": 12,
147
+ "num_hidden_groups": 1,
148
+ "num_hidden_layers": 4,
149
+ "num_memory_blocks": 0,
150
+ "pad_token_id": 0,
151
+ "type_vocab_size": 2,
152
+ "vocab_size": 21128
153
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2531da210e17fef06b14ad9b5b056ccdcf4bd8952cdb13142d094a67f9c2c8
3
+ size 16021593
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-chinese"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff