wenhuan commited on
Commit
8316d26
1 Parent(s): f5182a5

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "AGN": 25000,
3
+ "ATA": 25002,
4
+ "ATG": 25011,
5
+ "ATN": 25034,
6
+ "CCG": 25016,
7
+ "CCN": 25006,
8
+ "CGN": 25021,
9
+ "CTA": 25003,
10
+ "CTG": 25008,
11
+ "CTN": 25022,
12
+ "GAN": 25027,
13
+ "GCG": 25032,
14
+ "GCN": 25014,
15
+ "GGN": 25017,
16
+ "GTA": 25030,
17
+ "GTG": 25033,
18
+ "GTN": 25029,
19
+ "NAA": 25023,
20
+ "NAC": 25010,
21
+ "NAG": 25019,
22
+ "NAT": 25025,
23
+ "NCA": 25020,
24
+ "NCG": 25005,
25
+ "NCT": 25035,
26
+ "NGA": 25009,
27
+ "NGC": 25024,
28
+ "NGG": 25026,
29
+ "NGT": 25036,
30
+ "NTA": 25028,
31
+ "NTC": 25012,
32
+ "NTG": 25004,
33
+ "NTT": 25018,
34
+ "TAN": 25013,
35
+ "TCG": 25015,
36
+ "TCN": 25031,
37
+ "TGN": 25038,
38
+ "TTA": 25001,
39
+ "TTG": 25037,
40
+ "TTN": 25007
41
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": false,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "/mnt/volume/project/5mC/pretrainedModel/distilbert/tokenizer/distilbert_seq_tax_trained",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "DistilBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff