victornica commited on
Commit
1af1b02
1 Parent(s): 554f442

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +30 -0
  2. tokenizer_config.json +38 -0
  3. vocab_file.txt +96 -0
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "model_max_length": 72,
21
+ "pad_token": {
22
+ "__type": "AddedToken",
23
+ "content": "<|endoftext|>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ },
29
+ "tokenizer_class": "CharacterTokenizer",
30
+ "unk_token": {
31
+ "__type": "AddedToken",
32
+ "content": "<|endoftext|>",
33
+ "lstrip": false,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ }
38
+ }
vocab_file.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <|endoftext|>
2
+ [#Branch1]
3
+ [#Branch2]
4
+ [#C-1]
5
+ [#C]
6
+ [#N+1]
7
+ [#N]
8
+ [#O+1]
9
+ [=B]
10
+ [=Branch1]
11
+ [=Branch2]
12
+ [=C-1]
13
+ [=C]
14
+ [=N+1]
15
+ [=N-1]
16
+ [=NH1+1]
17
+ [=NH2+1]
18
+ [=N]
19
+ [=O+1]
20
+ [=OH1+1]
21
+ [=O]
22
+ [=PH1]
23
+ [=P]
24
+ [=Ring1]
25
+ [=Ring2]
26
+ [=S+1]
27
+ [=SH1]
28
+ [=S]
29
+ [=Se+1]
30
+ [=Se]
31
+ [=Si]
32
+ [B-1]
33
+ [BH0]
34
+ [BH1-1]
35
+ [BH2-1]
36
+ [BH3-1]
37
+ [B]
38
+ [Br+2]
39
+ [Br-1]
40
+ [Br]
41
+ [Branch1]
42
+ [Branch2]
43
+ [C+1]
44
+ [C-1]
45
+ [CH1+1]
46
+ [CH1-1]
47
+ [CH1]
48
+ [CH2+1]
49
+ [CH2]
50
+ [C]
51
+ [Cl+1]
52
+ [Cl+2]
53
+ [Cl+3]
54
+ [Cl-1]
55
+ [Cl]
56
+ [F+1]
57
+ [F-1]
58
+ [F]
59
+ [H]
60
+ [I+1]
61
+ [I+2]
62
+ [I+3]
63
+ [I]
64
+ [N+1]
65
+ [N-1]
66
+ [NH0]
67
+ [NH1+1]
68
+ [NH1-1]
69
+ [NH1]
70
+ [NH2+1]
71
+ [NH3+1]
72
+ [N]
73
+ [O+1]
74
+ [O-1]
75
+ [OH0]
76
+ [O]
77
+ [P+1]
78
+ [PH1]
79
+ [PH2+1]
80
+ [P]
81
+ [Ring1]
82
+ [Ring2]
83
+ [S+1]
84
+ [S-1]
85
+ [SH1]
86
+ [S]
87
+ [Se+1]
88
+ [Se-1]
89
+ [SeH1]
90
+ [SeH2]
91
+ [Se]
92
+ [Si-1]
93
+ [SiH1-1]
94
+ [SiH1]
95
+ [SiH2]
96
+ [Si]