nozzi commited on
Commit
7c436d9
1 Parent(s): ecf7e87

2023.09.26

Browse files
added_tokens.json ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[BOS]": 32000,
3
+ "[EOS]": 32001,
4
+ "[UNK0]": 32002,
5
+ "[UNK1]": 32003,
6
+ "[UNK2]": 32004,
7
+ "[UNK3]": 32005,
8
+ "[UNK4]": 32006,
9
+ "[UNK5]": 32007,
10
+ "[UNK6]": 32008,
11
+ "[UNK7]": 32009,
12
+ "[UNK8]": 32010,
13
+ "[UNK9]": 32011,
14
+ "[unused0]": 32012,
15
+ "[unused100]": 32112,
16
+ "[unused101]": 32113,
17
+ "[unused102]": 32114,
18
+ "[unused103]": 32115,
19
+ "[unused104]": 32116,
20
+ "[unused105]": 32117,
21
+ "[unused106]": 32118,
22
+ "[unused107]": 32119,
23
+ "[unused108]": 32120,
24
+ "[unused109]": 32121,
25
+ "[unused10]": 32022,
26
+ "[unused110]": 32122,
27
+ "[unused111]": 32123,
28
+ "[unused112]": 32124,
29
+ "[unused113]": 32125,
30
+ "[unused114]": 32126,
31
+ "[unused115]": 32127,
32
+ "[unused116]": 32128,
33
+ "[unused117]": 32129,
34
+ "[unused118]": 32130,
35
+ "[unused119]": 32131,
36
+ "[unused11]": 32023,
37
+ "[unused120]": 32132,
38
+ "[unused121]": 32133,
39
+ "[unused122]": 32134,
40
+ "[unused123]": 32135,
41
+ "[unused124]": 32136,
42
+ "[unused125]": 32137,
43
+ "[unused126]": 32138,
44
+ "[unused127]": 32139,
45
+ "[unused128]": 32140,
46
+ "[unused129]": 32141,
47
+ "[unused12]": 32024,
48
+ "[unused130]": 32142,
49
+ "[unused131]": 32143,
50
+ "[unused132]": 32144,
51
+ "[unused133]": 32145,
52
+ "[unused134]": 32146,
53
+ "[unused135]": 32147,
54
+ "[unused136]": 32148,
55
+ "[unused137]": 32149,
56
+ "[unused138]": 32150,
57
+ "[unused139]": 32151,
58
+ "[unused13]": 32025,
59
+ "[unused140]": 32152,
60
+ "[unused141]": 32153,
61
+ "[unused142]": 32154,
62
+ "[unused143]": 32155,
63
+ "[unused144]": 32156,
64
+ "[unused145]": 32157,
65
+ "[unused146]": 32158,
66
+ "[unused147]": 32159,
67
+ "[unused148]": 32160,
68
+ "[unused149]": 32161,
69
+ "[unused14]": 32026,
70
+ "[unused150]": 32162,
71
+ "[unused151]": 32163,
72
+ "[unused152]": 32164,
73
+ "[unused153]": 32165,
74
+ "[unused154]": 32166,
75
+ "[unused155]": 32167,
76
+ "[unused156]": 32168,
77
+ "[unused157]": 32169,
78
+ "[unused158]": 32170,
79
+ "[unused159]": 32171,
80
+ "[unused15]": 32027,
81
+ "[unused160]": 32172,
82
+ "[unused161]": 32173,
83
+ "[unused162]": 32174,
84
+ "[unused163]": 32175,
85
+ "[unused164]": 32176,
86
+ "[unused165]": 32177,
87
+ "[unused166]": 32178,
88
+ "[unused167]": 32179,
89
+ "[unused168]": 32180,
90
+ "[unused169]": 32181,
91
+ "[unused16]": 32028,
92
+ "[unused170]": 32182,
93
+ "[unused171]": 32183,
94
+ "[unused172]": 32184,
95
+ "[unused173]": 32185,
96
+ "[unused174]": 32186,
97
+ "[unused175]": 32187,
98
+ "[unused176]": 32188,
99
+ "[unused177]": 32189,
100
+ "[unused178]": 32190,
101
+ "[unused179]": 32191,
102
+ "[unused17]": 32029,
103
+ "[unused180]": 32192,
104
+ "[unused181]": 32193,
105
+ "[unused182]": 32194,
106
+ "[unused183]": 32195,
107
+ "[unused184]": 32196,
108
+ "[unused185]": 32197,
109
+ "[unused186]": 32198,
110
+ "[unused187]": 32199,
111
+ "[unused188]": 32200,
112
+ "[unused189]": 32201,
113
+ "[unused18]": 32030,
114
+ "[unused190]": 32202,
115
+ "[unused191]": 32203,
116
+ "[unused192]": 32204,
117
+ "[unused193]": 32205,
118
+ "[unused194]": 32206,
119
+ "[unused195]": 32207,
120
+ "[unused196]": 32208,
121
+ "[unused197]": 32209,
122
+ "[unused198]": 32210,
123
+ "[unused199]": 32211,
124
+ "[unused19]": 32031,
125
+ "[unused1]": 32013,
126
+ "[unused20]": 32032,
127
+ "[unused21]": 32033,
128
+ "[unused22]": 32034,
129
+ "[unused23]": 32035,
130
+ "[unused24]": 32036,
131
+ "[unused25]": 32037,
132
+ "[unused26]": 32038,
133
+ "[unused27]": 32039,
134
+ "[unused28]": 32040,
135
+ "[unused29]": 32041,
136
+ "[unused2]": 32014,
137
+ "[unused30]": 32042,
138
+ "[unused31]": 32043,
139
+ "[unused32]": 32044,
140
+ "[unused33]": 32045,
141
+ "[unused34]": 32046,
142
+ "[unused35]": 32047,
143
+ "[unused36]": 32048,
144
+ "[unused37]": 32049,
145
+ "[unused38]": 32050,
146
+ "[unused39]": 32051,
147
+ "[unused3]": 32015,
148
+ "[unused40]": 32052,
149
+ "[unused41]": 32053,
150
+ "[unused42]": 32054,
151
+ "[unused43]": 32055,
152
+ "[unused44]": 32056,
153
+ "[unused45]": 32057,
154
+ "[unused46]": 32058,
155
+ "[unused47]": 32059,
156
+ "[unused48]": 32060,
157
+ "[unused49]": 32061,
158
+ "[unused4]": 32016,
159
+ "[unused50]": 32062,
160
+ "[unused51]": 32063,
161
+ "[unused52]": 32064,
162
+ "[unused53]": 32065,
163
+ "[unused54]": 32066,
164
+ "[unused55]": 32067,
165
+ "[unused56]": 32068,
166
+ "[unused57]": 32069,
167
+ "[unused58]": 32070,
168
+ "[unused59]": 32071,
169
+ "[unused5]": 32017,
170
+ "[unused60]": 32072,
171
+ "[unused61]": 32073,
172
+ "[unused62]": 32074,
173
+ "[unused63]": 32075,
174
+ "[unused64]": 32076,
175
+ "[unused65]": 32077,
176
+ "[unused66]": 32078,
177
+ "[unused67]": 32079,
178
+ "[unused68]": 32080,
179
+ "[unused69]": 32081,
180
+ "[unused6]": 32018,
181
+ "[unused70]": 32082,
182
+ "[unused71]": 32083,
183
+ "[unused72]": 32084,
184
+ "[unused73]": 32085,
185
+ "[unused74]": 32086,
186
+ "[unused75]": 32087,
187
+ "[unused76]": 32088,
188
+ "[unused77]": 32089,
189
+ "[unused78]": 32090,
190
+ "[unused79]": 32091,
191
+ "[unused7]": 32019,
192
+ "[unused80]": 32092,
193
+ "[unused81]": 32093,
194
+ "[unused82]": 32094,
195
+ "[unused83]": 32095,
196
+ "[unused84]": 32096,
197
+ "[unused85]": 32097,
198
+ "[unused86]": 32098,
199
+ "[unused87]": 32099,
200
+ "[unused88]": 32100,
201
+ "[unused89]": 32101,
202
+ "[unused8]": 32020,
203
+ "[unused90]": 32102,
204
+ "[unused91]": 32103,
205
+ "[unused92]": 32104,
206
+ "[unused93]": 32105,
207
+ "[unused94]": 32106,
208
+ "[unused95]": 32107,
209
+ "[unused96]": 32108,
210
+ "[unused97]": 32109,
211
+ "[unused98]": 32110,
212
+ "[unused99]": 32111,
213
+ "[unused9]": 32021
214
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[BOS]",
4
+ "[EOS]",
5
+ "[UNK0]",
6
+ "[UNK1]",
7
+ "[UNK2]",
8
+ "[UNK3]",
9
+ "[UNK4]",
10
+ "[UNK5]",
11
+ "[UNK6]",
12
+ "[UNK7]",
13
+ "[UNK8]",
14
+ "[UNK9]",
15
+ "[unused0]",
16
+ "[unused1]",
17
+ "[unused2]",
18
+ "[unused3]",
19
+ "[unused4]",
20
+ "[unused5]",
21
+ "[unused6]",
22
+ "[unused7]",
23
+ "[unused8]",
24
+ "[unused9]",
25
+ "[unused10]",
26
+ "[unused11]",
27
+ "[unused12]",
28
+ "[unused13]",
29
+ "[unused14]",
30
+ "[unused15]",
31
+ "[unused16]",
32
+ "[unused17]",
33
+ "[unused18]",
34
+ "[unused19]",
35
+ "[unused20]",
36
+ "[unused21]",
37
+ "[unused22]",
38
+ "[unused23]",
39
+ "[unused24]",
40
+ "[unused25]",
41
+ "[unused26]",
42
+ "[unused27]",
43
+ "[unused28]",
44
+ "[unused29]",
45
+ "[unused30]",
46
+ "[unused31]",
47
+ "[unused32]",
48
+ "[unused33]",
49
+ "[unused34]",
50
+ "[unused35]",
51
+ "[unused36]",
52
+ "[unused37]",
53
+ "[unused38]",
54
+ "[unused39]",
55
+ "[unused40]",
56
+ "[unused41]",
57
+ "[unused42]",
58
+ "[unused43]",
59
+ "[unused44]",
60
+ "[unused45]",
61
+ "[unused46]",
62
+ "[unused47]",
63
+ "[unused48]",
64
+ "[unused49]",
65
+ "[unused50]",
66
+ "[unused51]",
67
+ "[unused52]",
68
+ "[unused53]",
69
+ "[unused54]",
70
+ "[unused55]",
71
+ "[unused56]",
72
+ "[unused57]",
73
+ "[unused58]",
74
+ "[unused59]",
75
+ "[unused60]",
76
+ "[unused61]",
77
+ "[unused62]",
78
+ "[unused63]",
79
+ "[unused64]",
80
+ "[unused65]",
81
+ "[unused66]",
82
+ "[unused67]",
83
+ "[unused68]",
84
+ "[unused69]",
85
+ "[unused70]",
86
+ "[unused71]",
87
+ "[unused72]",
88
+ "[unused73]",
89
+ "[unused74]",
90
+ "[unused75]",
91
+ "[unused76]",
92
+ "[unused77]",
93
+ "[unused78]",
94
+ "[unused79]",
95
+ "[unused80]",
96
+ "[unused81]",
97
+ "[unused82]",
98
+ "[unused83]",
99
+ "[unused84]",
100
+ "[unused85]",
101
+ "[unused86]",
102
+ "[unused87]",
103
+ "[unused88]",
104
+ "[unused89]",
105
+ "[unused90]",
106
+ "[unused91]",
107
+ "[unused92]",
108
+ "[unused93]",
109
+ "[unused94]",
110
+ "[unused95]",
111
+ "[unused96]",
112
+ "[unused97]",
113
+ "[unused98]",
114
+ "[unused99]",
115
+ "[unused100]",
116
+ "[unused101]",
117
+ "[unused102]",
118
+ "[unused103]",
119
+ "[unused104]",
120
+ "[unused105]",
121
+ "[unused106]",
122
+ "[unused107]",
123
+ "[unused108]",
124
+ "[unused109]",
125
+ "[unused110]",
126
+ "[unused111]",
127
+ "[unused112]",
128
+ "[unused113]",
129
+ "[unused114]",
130
+ "[unused115]",
131
+ "[unused116]",
132
+ "[unused117]",
133
+ "[unused118]",
134
+ "[unused119]",
135
+ "[unused120]",
136
+ "[unused121]",
137
+ "[unused122]",
138
+ "[unused123]",
139
+ "[unused124]",
140
+ "[unused125]",
141
+ "[unused126]",
142
+ "[unused127]",
143
+ "[unused128]",
144
+ "[unused129]",
145
+ "[unused130]",
146
+ "[unused131]",
147
+ "[unused132]",
148
+ "[unused133]",
149
+ "[unused134]",
150
+ "[unused135]",
151
+ "[unused136]",
152
+ "[unused137]",
153
+ "[unused138]",
154
+ "[unused139]",
155
+ "[unused140]",
156
+ "[unused141]",
157
+ "[unused142]",
158
+ "[unused143]",
159
+ "[unused144]",
160
+ "[unused145]",
161
+ "[unused146]",
162
+ "[unused147]",
163
+ "[unused148]",
164
+ "[unused149]",
165
+ "[unused150]",
166
+ "[unused151]",
167
+ "[unused152]",
168
+ "[unused153]",
169
+ "[unused154]",
170
+ "[unused155]",
171
+ "[unused156]",
172
+ "[unused157]",
173
+ "[unused158]",
174
+ "[unused159]",
175
+ "[unused160]",
176
+ "[unused161]",
177
+ "[unused162]",
178
+ "[unused163]",
179
+ "[unused164]",
180
+ "[unused165]",
181
+ "[unused166]",
182
+ "[unused167]",
183
+ "[unused168]",
184
+ "[unused169]",
185
+ "[unused170]",
186
+ "[unused171]",
187
+ "[unused172]",
188
+ "[unused173]",
189
+ "[unused174]",
190
+ "[unused175]",
191
+ "[unused176]",
192
+ "[unused177]",
193
+ "[unused178]",
194
+ "[unused179]",
195
+ "[unused180]",
196
+ "[unused181]",
197
+ "[unused182]",
198
+ "[unused183]",
199
+ "[unused184]",
200
+ "[unused185]",
201
+ "[unused186]",
202
+ "[unused187]",
203
+ "[unused188]",
204
+ "[unused189]",
205
+ "[unused190]",
206
+ "[unused191]",
207
+ "[unused192]",
208
+ "[unused193]",
209
+ "[unused194]",
210
+ "[unused195]",
211
+ "[unused196]",
212
+ "[unused197]",
213
+ "[unused198]",
214
+ "[unused199]"
215
+ ],
216
+ "cls_token": "[CLS]",
217
+ "mask_token": "[MASK]",
218
+ "pad_token": "[PAD]",
219
+ "sep_token": "[SEP]",
220
+ "unk_token": "[UNK]"
221
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": true,
5
+ "lowercase": false,
6
+ "mask_token": "[MASK]",
7
+ "name_or_path": "tokenizer_model",
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_tokens_map_file": null,
12
+ "strip_accents": false,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff