ohallstrom commited on
Commit
0ae7ba1
1 Parent(s): f9430da

Add tokenizer files

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +371 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +9 -0
special_tokens_map.json ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ ">>TITLE<<",
4
+ ">>ABSTRACT<<",
5
+ ">>INTRODUCTION<<",
6
+ ">>SUMMARY<<",
7
+ ">>COMMENT<<",
8
+ ">>ANSWER<<",
9
+ ">>QUESTION<<",
10
+ ">>DOMAIN<<",
11
+ ">>PREFIX<<",
12
+ ">>SUFFIX<<",
13
+ ">>MIDDLE<<",
14
+ ">>[S]<<",
15
+ ">>[X]<<",
16
+ ">>[R]<<",
17
+ ">>S0<<",
18
+ ">>S1<<",
19
+ ">>S2<<",
20
+ ">>S3<<",
21
+ ">>S4<<",
22
+ ">>S5<<",
23
+ ">>S6<<",
24
+ ">>S7<<",
25
+ ">>S8<<",
26
+ ">>S9<<",
27
+ ">>S10<<",
28
+ ">>S11<<",
29
+ ">>S12<<",
30
+ ">>S13<<",
31
+ ">>S14<<",
32
+ ">>S15<<",
33
+ ">>S16<<",
34
+ ">>S17<<",
35
+ ">>S18<<",
36
+ ">>S19<<",
37
+ ">>S20<<",
38
+ ">>S21<<",
39
+ ">>S22<<",
40
+ ">>S23<<",
41
+ ">>S24<<",
42
+ ">>S25<<",
43
+ ">>S26<<",
44
+ ">>S27<<",
45
+ ">>S28<<",
46
+ ">>S29<<",
47
+ ">>S30<<",
48
+ ">>S31<<",
49
+ ">>S32<<",
50
+ ">>S33<<",
51
+ ">>S34<<",
52
+ ">>S35<<",
53
+ ">>S36<<",
54
+ ">>S37<<",
55
+ ">>S38<<",
56
+ ">>S39<<",
57
+ ">>S40<<",
58
+ ">>S41<<",
59
+ ">>S42<<",
60
+ ">>S43<<",
61
+ ">>S44<<",
62
+ ">>S45<<",
63
+ ">>S46<<",
64
+ ">>S47<<",
65
+ ">>S48<<",
66
+ ">>S49<<",
67
+ ">>S50<<",
68
+ ">>S51<<",
69
+ ">>S52<<",
70
+ ">>S53<<",
71
+ ">>S54<<",
72
+ ">>S55<<",
73
+ ">>S56<<",
74
+ ">>S57<<",
75
+ ">>S58<<",
76
+ ">>S59<<",
77
+ ">>S60<<",
78
+ ">>S61<<",
79
+ ">>S62<<",
80
+ ">>S63<<",
81
+ ">>S64<<",
82
+ ">>S65<<",
83
+ ">>S66<<",
84
+ ">>S67<<",
85
+ ">>S68<<",
86
+ ">>S69<<",
87
+ ">>S70<<",
88
+ ">>S71<<",
89
+ ">>S72<<",
90
+ ">>S73<<",
91
+ ">>S74<<",
92
+ ">>S75<<",
93
+ ">>S76<<",
94
+ ">>S77<<",
95
+ ">>S78<<",
96
+ ">>S79<<",
97
+ ">>S80<<",
98
+ ">>S81<<",
99
+ ">>S82<<",
100
+ ">>S83<<",
101
+ ">>S84<<",
102
+ ">>S85<<",
103
+ ">>S86<<",
104
+ ">>S87<<",
105
+ ">>S88<<",
106
+ ">>S89<<",
107
+ ">>S90<<",
108
+ ">>S91<<",
109
+ ">>S92<<",
110
+ ">>S93<<",
111
+ ">>S94<<",
112
+ ">>S95<<",
113
+ ">>S96<<",
114
+ ">>S97<<",
115
+ ">>S98<<",
116
+ ">>S99<<",
117
+ ">>S100<<",
118
+ ">>S101<<",
119
+ ">>S102<<",
120
+ ">>S103<<",
121
+ ">>S104<<",
122
+ ">>S105<<",
123
+ ">>S106<<",
124
+ ">>S107<<",
125
+ ">>S108<<",
126
+ ">>S109<<",
127
+ ">>S110<<",
128
+ ">>S111<<",
129
+ ">>S112<<",
130
+ ">>S113<<",
131
+ ">>S114<<",
132
+ ">>S115<<",
133
+ ">>S116<<",
134
+ ">>S117<<",
135
+ ">>S118<<",
136
+ ">>S119<<",
137
+ ">>S120<<",
138
+ ">>S121<<",
139
+ ">>S122<<",
140
+ ">>S123<<",
141
+ ">>S124<<",
142
+ ">>S125<<",
143
+ ">>S126<<",
144
+ ">>S127<<",
145
+ ">>S128<<",
146
+ ">>S129<<",
147
+ ">>S130<<",
148
+ ">>S131<<",
149
+ ">>S132<<",
150
+ ">>S133<<",
151
+ ">>S134<<",
152
+ ">>S135<<",
153
+ ">>S136<<",
154
+ ">>S137<<",
155
+ ">>S138<<",
156
+ ">>S139<<",
157
+ ">>S140<<",
158
+ ">>S141<<",
159
+ ">>S142<<",
160
+ ">>S143<<",
161
+ ">>S144<<",
162
+ ">>S145<<",
163
+ ">>S146<<",
164
+ ">>S147<<",
165
+ ">>S148<<",
166
+ ">>S149<<",
167
+ ">>S150<<",
168
+ ">>S151<<",
169
+ ">>S152<<",
170
+ ">>S153<<",
171
+ ">>S154<<",
172
+ ">>S155<<",
173
+ ">>S156<<",
174
+ ">>S157<<",
175
+ ">>S158<<",
176
+ ">>S159<<",
177
+ ">>S160<<",
178
+ ">>S161<<",
179
+ ">>S162<<",
180
+ ">>S163<<",
181
+ ">>S164<<",
182
+ ">>S165<<",
183
+ ">>S166<<",
184
+ ">>S167<<",
185
+ ">>S168<<",
186
+ ">>S169<<",
187
+ ">>S170<<",
188
+ ">>S171<<",
189
+ ">>S172<<",
190
+ ">>S173<<",
191
+ ">>S174<<",
192
+ ">>S175<<",
193
+ ">>S176<<",
194
+ ">>S177<<",
195
+ ">>S178<<",
196
+ ">>S179<<",
197
+ ">>S180<<",
198
+ ">>S181<<",
199
+ ">>S182<<",
200
+ ">>S183<<",
201
+ ">>S184<<",
202
+ ">>S185<<",
203
+ ">>S186<<",
204
+ ">>S187<<",
205
+ ">>S188<<",
206
+ ">>S189<<",
207
+ ">>S190<<",
208
+ ">>S191<<",
209
+ ">>S192<<",
210
+ ">>S193<<",
211
+ ">>S194<<",
212
+ ">>S195<<",
213
+ ">>S196<<",
214
+ ">>S197<<",
215
+ ">>S198<<",
216
+ ">>S199<<",
217
+ ">>S200<<",
218
+ ">>S201<<",
219
+ ">>S202<<",
220
+ ">>S203<<",
221
+ ">>S204<<",
222
+ ">>S205<<",
223
+ ">>S206<<",
224
+ ">>S207<<",
225
+ ">>S208<<",
226
+ ">>S209<<",
227
+ ">>S210<<",
228
+ ">>S211<<",
229
+ ">>S212<<",
230
+ ">>S213<<",
231
+ ">>S214<<",
232
+ ">>S215<<",
233
+ ">>S216<<",
234
+ ">>S217<<",
235
+ ">>S218<<",
236
+ ">>S219<<",
237
+ ">>S220<<",
238
+ ">>S221<<",
239
+ ">>S222<<",
240
+ ">>S223<<",
241
+ ">>S224<<",
242
+ ">>S225<<",
243
+ ">>S226<<",
244
+ ">>S227<<",
245
+ ">>S228<<",
246
+ ">>S229<<",
247
+ ">>S230<<",
248
+ ">>S231<<",
249
+ ">>S232<<",
250
+ ">>S233<<",
251
+ ">>S234<<",
252
+ ">>S235<<",
253
+ ">>S236<<",
254
+ ">>S237<<",
255
+ ">>S238<<",
256
+ ">>S239<<",
257
+ ">>S240<<",
258
+ ">>S241<<",
259
+ ">>S242<<",
260
+ ">>S243<<",
261
+ ">>S244<<",
262
+ ">>S245<<",
263
+ ">>S246<<",
264
+ ">>S247<<",
265
+ ">>S248<<",
266
+ ">>S249<<",
267
+ ">>S250<<",
268
+ ">>S251<<",
269
+ ">>S252<<",
270
+ ">>S253<<",
271
+ ">>S254<<",
272
+ ">>S255<<",
273
+ ">>S256<<",
274
+ ">>S257<<",
275
+ ">>S258<<",
276
+ ">>S259<<",
277
+ ">>S260<<",
278
+ ">>S261<<",
279
+ ">>S262<<",
280
+ ">>S263<<",
281
+ ">>S264<<",
282
+ ">>S265<<",
283
+ ">>S266<<",
284
+ ">>S267<<",
285
+ ">>S268<<",
286
+ ">>S269<<",
287
+ ">>S270<<",
288
+ ">>S271<<",
289
+ ">>S272<<",
290
+ ">>S273<<",
291
+ ">>S274<<",
292
+ ">>S275<<",
293
+ ">>S276<<",
294
+ ">>S277<<",
295
+ ">>S278<<",
296
+ ">>S279<<",
297
+ ">>S280<<",
298
+ ">>S281<<",
299
+ ">>S282<<",
300
+ ">>S283<<",
301
+ ">>S284<<",
302
+ ">>S285<<",
303
+ ">>S286<<",
304
+ ">>S287<<",
305
+ ">>S288<<",
306
+ ">>S289<<",
307
+ ">>S290<<",
308
+ ">>S291<<",
309
+ ">>S292<<",
310
+ ">>S293<<",
311
+ ">>S294<<",
312
+ ">>S295<<",
313
+ ">>S296<<",
314
+ ">>S297<<",
315
+ ">>S298<<",
316
+ ">>S299<<",
317
+ ">>S300<<",
318
+ ">>S301<<",
319
+ ">>S302<<",
320
+ ">>S303<<",
321
+ ">>S304<<",
322
+ ">>S305<<",
323
+ ">>S306<<",
324
+ ">>S307<<",
325
+ ">>S308<<",
326
+ ">>S309<<",
327
+ ">>S310<<",
328
+ ">>S311<<",
329
+ ">>S312<<",
330
+ ">>S313<<",
331
+ ">>S314<<",
332
+ ">>S315<<",
333
+ ">>S316<<",
334
+ ">>S317<<",
335
+ ">>S318<<",
336
+ ">>S319<<",
337
+ ">>S320<<",
338
+ ">>S321<<",
339
+ ">>S322<<",
340
+ ">>S323<<",
341
+ ">>S324<<",
342
+ ">>S325<<",
343
+ ">>S326<<",
344
+ ">>S327<<",
345
+ ">>S328<<",
346
+ ">>S329<<",
347
+ ">>S330<<",
348
+ ">>S331<<",
349
+ ">>S332<<",
350
+ ">>S333<<",
351
+ ">>S334<<",
352
+ ">>S335<<",
353
+ ">>S336<<",
354
+ ">>S337<<",
355
+ ">>S338<<",
356
+ ">>S339<<",
357
+ ">>S340<<",
358
+ ">>S341<<",
359
+ ">>S342<<",
360
+ ">>S343<<",
361
+ ">>S344<<",
362
+ ">>S345<<",
363
+ "<start_system>",
364
+ "<start_user>",
365
+ "<start_assistant>",
366
+ "<end_message>"
367
+ ],
368
+ "bos_token": "<|endoftext|>",
369
+ "eos_token": "<|endoftext|>",
370
+ "unk_token": "<|endoftext|>"
371
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "special_tokens_map_file": null,
7
+ "tokenizer_class": "GPT2Tokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }