guipenedo HF staff commited on
Commit
cabb954
1 Parent(s): c2ed7ce

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +370 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +8 -0
special_tokens_map.json ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ ">>TITLE<<",
5
+ ">>ABSTRACT<<",
6
+ ">>INTRODUCTION<<",
7
+ ">>SUMMARY<<",
8
+ ">>COMMENT<<",
9
+ ">>ANSWER<<",
10
+ ">>QUESTION<<",
11
+ ">>DOMAIN<<",
12
+ ">>PREFIX<<",
13
+ ">>SUFFIX<<",
14
+ ">>MIDDLE<<",
15
+ ">>[S]<<",
16
+ ">>[X]<<",
17
+ ">>[R]<<",
18
+ ">>S0<<",
19
+ ">>S1<<",
20
+ ">>S2<<",
21
+ ">>S3<<",
22
+ ">>S4<<",
23
+ ">>S5<<",
24
+ ">>S6<<",
25
+ ">>S7<<",
26
+ ">>S8<<",
27
+ ">>S9<<",
28
+ ">>S10<<",
29
+ ">>S11<<",
30
+ ">>S12<<",
31
+ ">>S13<<",
32
+ ">>S14<<",
33
+ ">>S15<<",
34
+ ">>S16<<",
35
+ ">>S17<<",
36
+ ">>S18<<",
37
+ ">>S19<<",
38
+ ">>S20<<",
39
+ ">>S21<<",
40
+ ">>S22<<",
41
+ ">>S23<<",
42
+ ">>S24<<",
43
+ ">>S25<<",
44
+ ">>S26<<",
45
+ ">>S27<<",
46
+ ">>S28<<",
47
+ ">>S29<<",
48
+ ">>S30<<",
49
+ ">>S31<<",
50
+ ">>S32<<",
51
+ ">>S33<<",
52
+ ">>S34<<",
53
+ ">>S35<<",
54
+ ">>S36<<",
55
+ ">>S37<<",
56
+ ">>S38<<",
57
+ ">>S39<<",
58
+ ">>S40<<",
59
+ ">>S41<<",
60
+ ">>S42<<",
61
+ ">>S43<<",
62
+ ">>S44<<",
63
+ ">>S45<<",
64
+ ">>S46<<",
65
+ ">>S47<<",
66
+ ">>S48<<",
67
+ ">>S49<<",
68
+ ">>S50<<",
69
+ ">>S51<<",
70
+ ">>S52<<",
71
+ ">>S53<<",
72
+ ">>S54<<",
73
+ ">>S55<<",
74
+ ">>S56<<",
75
+ ">>S57<<",
76
+ ">>S58<<",
77
+ ">>S59<<",
78
+ ">>S60<<",
79
+ ">>S61<<",
80
+ ">>S62<<",
81
+ ">>S63<<",
82
+ ">>S64<<",
83
+ ">>S65<<",
84
+ ">>S66<<",
85
+ ">>S67<<",
86
+ ">>S68<<",
87
+ ">>S69<<",
88
+ ">>S70<<",
89
+ ">>S71<<",
90
+ ">>S72<<",
91
+ ">>S73<<",
92
+ ">>S74<<",
93
+ ">>S75<<",
94
+ ">>S76<<",
95
+ ">>S77<<",
96
+ ">>S78<<",
97
+ ">>S79<<",
98
+ ">>S80<<",
99
+ ">>S81<<",
100
+ ">>S82<<",
101
+ ">>S83<<",
102
+ ">>S84<<",
103
+ ">>S85<<",
104
+ ">>S86<<",
105
+ ">>S87<<",
106
+ ">>S88<<",
107
+ ">>S89<<",
108
+ ">>S90<<",
109
+ ">>S91<<",
110
+ ">>S92<<",
111
+ ">>S93<<",
112
+ ">>S94<<",
113
+ ">>S95<<",
114
+ ">>S96<<",
115
+ ">>S97<<",
116
+ ">>S98<<",
117
+ ">>S99<<",
118
+ ">>S100<<",
119
+ ">>S101<<",
120
+ ">>S102<<",
121
+ ">>S103<<",
122
+ ">>S104<<",
123
+ ">>S105<<",
124
+ ">>S106<<",
125
+ ">>S107<<",
126
+ ">>S108<<",
127
+ ">>S109<<",
128
+ ">>S110<<",
129
+ ">>S111<<",
130
+ ">>S112<<",
131
+ ">>S113<<",
132
+ ">>S114<<",
133
+ ">>S115<<",
134
+ ">>S116<<",
135
+ ">>S117<<",
136
+ ">>S118<<",
137
+ ">>S119<<",
138
+ ">>S120<<",
139
+ ">>S121<<",
140
+ ">>S122<<",
141
+ ">>S123<<",
142
+ ">>S124<<",
143
+ ">>S125<<",
144
+ ">>S126<<",
145
+ ">>S127<<",
146
+ ">>S128<<",
147
+ ">>S129<<",
148
+ ">>S130<<",
149
+ ">>S131<<",
150
+ ">>S132<<",
151
+ ">>S133<<",
152
+ ">>S134<<",
153
+ ">>S135<<",
154
+ ">>S136<<",
155
+ ">>S137<<",
156
+ ">>S138<<",
157
+ ">>S139<<",
158
+ ">>S140<<",
159
+ ">>S141<<",
160
+ ">>S142<<",
161
+ ">>S143<<",
162
+ ">>S144<<",
163
+ ">>S145<<",
164
+ ">>S146<<",
165
+ ">>S147<<",
166
+ ">>S148<<",
167
+ ">>S149<<",
168
+ ">>S150<<",
169
+ ">>S151<<",
170
+ ">>S152<<",
171
+ ">>S153<<",
172
+ ">>S154<<",
173
+ ">>S155<<",
174
+ ">>S156<<",
175
+ ">>S157<<",
176
+ ">>S158<<",
177
+ ">>S159<<",
178
+ ">>S160<<",
179
+ ">>S161<<",
180
+ ">>S162<<",
181
+ ">>S163<<",
182
+ ">>S164<<",
183
+ ">>S165<<",
184
+ ">>S166<<",
185
+ ">>S167<<",
186
+ ">>S168<<",
187
+ ">>S169<<",
188
+ ">>S170<<",
189
+ ">>S171<<",
190
+ ">>S172<<",
191
+ ">>S173<<",
192
+ ">>S174<<",
193
+ ">>S175<<",
194
+ ">>S176<<",
195
+ ">>S177<<",
196
+ ">>S178<<",
197
+ ">>S179<<",
198
+ ">>S180<<",
199
+ ">>S181<<",
200
+ ">>S182<<",
201
+ ">>S183<<",
202
+ ">>S184<<",
203
+ ">>S185<<",
204
+ ">>S186<<",
205
+ ">>S187<<",
206
+ ">>S188<<",
207
+ ">>S189<<",
208
+ ">>S190<<",
209
+ ">>S191<<",
210
+ ">>S192<<",
211
+ ">>S193<<",
212
+ ">>S194<<",
213
+ ">>S195<<",
214
+ ">>S196<<",
215
+ ">>S197<<",
216
+ ">>S198<<",
217
+ ">>S199<<",
218
+ ">>S200<<",
219
+ ">>S201<<",
220
+ ">>S202<<",
221
+ ">>S203<<",
222
+ ">>S204<<",
223
+ ">>S205<<",
224
+ ">>S206<<",
225
+ ">>S207<<",
226
+ ">>S208<<",
227
+ ">>S209<<",
228
+ ">>S210<<",
229
+ ">>S211<<",
230
+ ">>S212<<",
231
+ ">>S213<<",
232
+ ">>S214<<",
233
+ ">>S215<<",
234
+ ">>S216<<",
235
+ ">>S217<<",
236
+ ">>S218<<",
237
+ ">>S219<<",
238
+ ">>S220<<",
239
+ ">>S221<<",
240
+ ">>S222<<",
241
+ ">>S223<<",
242
+ ">>S224<<",
243
+ ">>S225<<",
244
+ ">>S226<<",
245
+ ">>S227<<",
246
+ ">>S228<<",
247
+ ">>S229<<",
248
+ ">>S230<<",
249
+ ">>S231<<",
250
+ ">>S232<<",
251
+ ">>S233<<",
252
+ ">>S234<<",
253
+ ">>S235<<",
254
+ ">>S236<<",
255
+ ">>S237<<",
256
+ ">>S238<<",
257
+ ">>S239<<",
258
+ ">>S240<<",
259
+ ">>S241<<",
260
+ ">>S242<<",
261
+ ">>S243<<",
262
+ ">>S244<<",
263
+ ">>S245<<",
264
+ ">>S246<<",
265
+ ">>S247<<",
266
+ ">>S248<<",
267
+ ">>S249<<",
268
+ ">>S250<<",
269
+ ">>S251<<",
270
+ ">>S252<<",
271
+ ">>S253<<",
272
+ ">>S254<<",
273
+ ">>S255<<",
274
+ ">>S256<<",
275
+ ">>S257<<",
276
+ ">>S258<<",
277
+ ">>S259<<",
278
+ ">>S260<<",
279
+ ">>S261<<",
280
+ ">>S262<<",
281
+ ">>S263<<",
282
+ ">>S264<<",
283
+ ">>S265<<",
284
+ ">>S266<<",
285
+ ">>S267<<",
286
+ ">>S268<<",
287
+ ">>S269<<",
288
+ ">>S270<<",
289
+ ">>S271<<",
290
+ ">>S272<<",
291
+ ">>S273<<",
292
+ ">>S274<<",
293
+ ">>S275<<",
294
+ ">>S276<<",
295
+ ">>S277<<",
296
+ ">>S278<<",
297
+ ">>S279<<",
298
+ ">>S280<<",
299
+ ">>S281<<",
300
+ ">>S282<<",
301
+ ">>S283<<",
302
+ ">>S284<<",
303
+ ">>S285<<",
304
+ ">>S286<<",
305
+ ">>S287<<",
306
+ ">>S288<<",
307
+ ">>S289<<",
308
+ ">>S290<<",
309
+ ">>S291<<",
310
+ ">>S292<<",
311
+ ">>S293<<",
312
+ ">>S294<<",
313
+ ">>S295<<",
314
+ ">>S296<<",
315
+ ">>S297<<",
316
+ ">>S298<<",
317
+ ">>S299<<",
318
+ ">>S300<<",
319
+ ">>S301<<",
320
+ ">>S302<<",
321
+ ">>S303<<",
322
+ ">>S304<<",
323
+ ">>S305<<",
324
+ ">>S306<<",
325
+ ">>S307<<",
326
+ ">>S308<<",
327
+ ">>S309<<",
328
+ ">>S310<<",
329
+ ">>S311<<",
330
+ ">>S312<<",
331
+ ">>S313<<",
332
+ ">>S314<<",
333
+ ">>S315<<",
334
+ ">>S316<<",
335
+ ">>S317<<",
336
+ ">>S318<<",
337
+ ">>S319<<",
338
+ ">>S320<<",
339
+ ">>S321<<",
340
+ ">>S322<<",
341
+ ">>S323<<",
342
+ ">>S324<<",
343
+ ">>S325<<",
344
+ ">>S326<<",
345
+ ">>S327<<",
346
+ ">>S328<<",
347
+ ">>S329<<",
348
+ ">>S330<<",
349
+ ">>S331<<",
350
+ ">>S332<<",
351
+ ">>S333<<",
352
+ ">>S334<<",
353
+ ">>S335<<",
354
+ ">>S336<<",
355
+ ">>S337<<",
356
+ ">>S338<<",
357
+ ">>S339<<",
358
+ ">>S340<<",
359
+ ">>S341<<",
360
+ ">>S342<<",
361
+ ">>S343<<",
362
+ ">>S344<<",
363
+ ">>S345<<",
364
+ ">>S346<<",
365
+ ">>S347<<",
366
+ ">>S348<<",
367
+ ">>S349<<"
368
+ ],
369
+ "eos_token": "<|endoftext|>"
370
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "eos_token": "<|endoftext|>",
4
+ "model_max_length": 2048,
5
+ "name_or_path": "/home/gui/Desktop/lighton-dev/mdr/assets/tokenizer/dd_ul2",
6
+ "special_tokens_map_file": null,
7
+ "tokenizer_class": "PreTrainedTokenizerFast"
8
+ }