ZJU-Fangyin commited on
Commit
19ceedd
1 Parent(s): a92e79d

Upload 6 files

Browse files
Files changed (6) hide show
  1. added_tokens.json +183 -0
  2. config.json +44 -0
  3. merges.txt +0 -0
  4. special_tokens_map.json +51 -0
  5. tokenizer_config.json +64 -0
  6. vocab.json +6 -0
added_tokens.json ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<mask>": 184,
3
+ "[#Branch1]": 12,
4
+ "[#Branch2]": 178,
5
+ "[#C-1]": 181,
6
+ "[#C]": 165,
7
+ "[#N+1]": 120,
8
+ "[#N]": 23,
9
+ "[#P]": 87,
10
+ "[#Ring1]": 128,
11
+ "[#S]": 40,
12
+ "[-/Ring1]": 129,
13
+ "[-/Ring2]": 156,
14
+ "[-\\Ring1]": 164,
15
+ "[-\\Ring2]": 34,
16
+ "[/123I]": 7,
17
+ "[/B]": 125,
18
+ "[/Br]": 149,
19
+ "[/C-1]": 68,
20
+ "[/C@@H1]": 13,
21
+ "[/C@@]": 14,
22
+ "[/C@H1]": 177,
23
+ "[/C@]": 31,
24
+ "[/CH0]": 126,
25
+ "[/CH1-1]": 88,
26
+ "[/CH1]": 10,
27
+ "[/C]": 118,
28
+ "[/Cl]": 155,
29
+ "[/F]": 6,
30
+ "[/I]": 83,
31
+ "[/N+1]": 152,
32
+ "[/NH0]": 55,
33
+ "[/NH1]": 24,
34
+ "[/N]": 67,
35
+ "[/O-1]": 171,
36
+ "[/OH0]": 174,
37
+ "[/O]": 69,
38
+ "[/P@@]": 16,
39
+ "[/P]": 79,
40
+ "[/S+1]": 47,
41
+ "[/S@@+1]": 21,
42
+ "[/S@@]": 160,
43
+ "[/S@]": 77,
44
+ "[/S]": 112,
45
+ "[/Si]": 38,
46
+ "[11CH3]": 107,
47
+ "[123I]": 124,
48
+ "[124I]": 136,
49
+ "[125I]": 39,
50
+ "[127I]": 36,
51
+ "[17F]": 143,
52
+ "[18F]": 97,
53
+ "[18OH1]": 76,
54
+ "[3H]": 137,
55
+ "[=B]": 25,
56
+ "[=Branch1]": 33,
57
+ "[=Branch2]": 100,
58
+ "[=CH0]": 78,
59
+ "[=C]": 35,
60
+ "[=N+1]": 144,
61
+ "[=N-1]": 161,
62
+ "[=NH0]": 27,
63
+ "[=N]": 105,
64
+ "[=O+1]": 96,
65
+ "[=O]": 30,
66
+ "[=P+1]": 98,
67
+ "[=P@@H1]": 32,
68
+ "[=P@@]": 53,
69
+ "[=P@H1]": 147,
70
+ "[=P@]": 157,
71
+ "[=PH1]": 132,
72
+ "[=P]": 86,
73
+ "[=Ring1]": 163,
74
+ "[=Ring2]": 52,
75
+ "[=S+1]": 133,
76
+ "[=S@+1]": 167,
77
+ "[=S@@+1]": 42,
78
+ "[=S@@H1]": 106,
79
+ "[=S@@]": 5,
80
+ "[=S@]": 44,
81
+ "[=SH1]": 117,
82
+ "[=S]": 73,
83
+ "[B-1]": 122,
84
+ "[B@-1]": 179,
85
+ "[B@@-1]": 17,
86
+ "[B@@H1-1]": 56,
87
+ "[B@H1-1]": 121,
88
+ "[BH1-1]": 54,
89
+ "[BH2-1]": 91,
90
+ "[BH3-1]": 75,
91
+ "[B]": 108,
92
+ "[Br+1]": 166,
93
+ "[Br]": 62,
94
+ "[Branch1]": 104,
95
+ "[Branch2]": 140,
96
+ "[C+1]": 65,
97
+ "[C-1]": 8,
98
+ "[C@@H1]": 15,
99
+ "[C@@]": 173,
100
+ "[C@H1]": 29,
101
+ "[C@]": 9,
102
+ "[CH0]": 28,
103
+ "[CH1+1]": 41,
104
+ "[CH1-1]": 4,
105
+ "[CH1]": 115,
106
+ "[CH2-1]": 110,
107
+ "[CH2]": 146,
108
+ "[C]": 139,
109
+ "[Cl]": 11,
110
+ "[F+1]": 50,
111
+ "[F]": 66,
112
+ "[I]": 63,
113
+ "[N+1]": 150,
114
+ "[N-1]": 84,
115
+ "[N@+1]": 111,
116
+ "[N@@+1]": 131,
117
+ "[N@@H1+1]": 162,
118
+ "[NH0]": 148,
119
+ "[NH1]": 130,
120
+ "[N]": 19,
121
+ "[O+1]": 85,
122
+ "[O-1]": 58,
123
+ "[OH0]": 71,
124
+ "[O]": 20,
125
+ "[P+1]": 82,
126
+ "[P@+1]": 158,
127
+ "[P@@+1]": 94,
128
+ "[P@@H1]": 170,
129
+ "[P@@]": 59,
130
+ "[P@H1]": 81,
131
+ "[P@]": 175,
132
+ "[PH1]": 134,
133
+ "[PH2]": 51,
134
+ "[P]": 80,
135
+ "[Ring1]": 70,
136
+ "[Ring2]": 90,
137
+ "[S+1]": 61,
138
+ "[S@+1]": 72,
139
+ "[S@@+1]": 57,
140
+ "[S@@H1]": 45,
141
+ "[S@@]": 154,
142
+ "[S@]": 93,
143
+ "[SH0]": 182,
144
+ "[SH1]": 123,
145
+ "[SH2]": 101,
146
+ "[SH3]": 102,
147
+ "[S]": 60,
148
+ "[Si]": 119,
149
+ "[Sn+2]": 183,
150
+ "[Sn+3]": 135,
151
+ "[SnH1]": 109,
152
+ "[SnH2]": 99,
153
+ "[Sn]": 114,
154
+ "[\\123I]": 89,
155
+ "[\\B-1]": 64,
156
+ "[\\B]": 159,
157
+ "[\\Br]": 142,
158
+ "[\\C-1]": 127,
159
+ "[\\C@@H1]": 22,
160
+ "[\\C@@]": 46,
161
+ "[\\C@H1]": 49,
162
+ "[\\C@]": 145,
163
+ "[\\CH0]": 176,
164
+ "[\\CH1-1]": 153,
165
+ "[\\C]": 43,
166
+ "[\\Cl]": 37,
167
+ "[\\F]": 26,
168
+ "[\\I]": 172,
169
+ "[\\N+1]": 92,
170
+ "[\\NH1]": 103,
171
+ "[\\N]": 138,
172
+ "[\\O-1]": 141,
173
+ "[\\O]": 18,
174
+ "[\\P@@]": 113,
175
+ "[\\P]": 74,
176
+ "[\\S+1]": 95,
177
+ "[\\S@@+1]": 169,
178
+ "[\\S@@]": 180,
179
+ "[\\S@]": 116,
180
+ "[\\SH1]": 151,
181
+ "[\\S]": 48,
182
+ "[\\Si]": 168
183
+ }
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "architectures": [
5
+ "BartForConditionalGeneration"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": 0.0,
10
+ "d_model": 1024,
11
+ "decoder_attention_heads": 16,
12
+ "decoder_ffn_dim": 4096,
13
+ "decoder_layerdrop": 0.0,
14
+ "decoder_layers": 12,
15
+ "decoder_start_token_id": 2,
16
+ "dropout": 0.1,
17
+ "encoder_attention_heads": 16,
18
+ "encoder_ffn_dim": 4096,
19
+ "encoder_layerdrop": 0.0,
20
+ "encoder_layers": 12,
21
+ "eos_token_id": 2,
22
+ "forced_eos_token_id": 2,
23
+ "id2label": {
24
+ "0": "LABEL_0",
25
+ "1": "LABEL_1",
26
+ "2": "LABEL_2"
27
+ },
28
+ "init_std": 0.02,
29
+ "is_encoder_decoder": true,
30
+ "label2id": {
31
+ "LABEL_0": 0,
32
+ "LABEL_1": 1,
33
+ "LABEL_2": 2
34
+ },
35
+ "max_position_embeddings": 1024,
36
+ "model_type": "bart",
37
+ "num_hidden_layers": 12,
38
+ "pad_token_id": 1,
39
+ "scale_embedding": false,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.21.2",
42
+ "use_cache": true,
43
+ "vocab_size": 185
44
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 1024,
37
+ "name_or_path": "facebook/bart-base",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": null,
55
+ "tokenizer_class": "BartTokenizer",
56
+ "unk_token": {
57
+ "__type": "AddedToken",
58
+ "content": "<unk>",
59
+ "lstrip": false,
60
+ "normalized": true,
61
+ "rstrip": false,
62
+ "single_word": false
63
+ }
64
+ }
vocab.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<pad>": 1,
4
+ "<s>": 0,
5
+ "<unk>": 3
6
+ }