EddieChen372 commited on
Commit
61565ce
1 Parent(s): 4046b8a

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "CLASS_0": 30922,
3
+ "CLASS_1": 30923,
4
+ "CLASS_10": 30932,
5
+ "CLASS_11": 30933,
6
+ "CLASS_12": 30934,
7
+ "CLASS_13": 30935,
8
+ "CLASS_14": 30936,
9
+ "CLASS_15": 30937,
10
+ "CLASS_16": 30938,
11
+ "CLASS_17": 30939,
12
+ "CLASS_18": 30940,
13
+ "CLASS_19": 30941,
14
+ "CLASS_2": 30924,
15
+ "CLASS_20": 30942,
16
+ "CLASS_21": 30943,
17
+ "CLASS_22": 30944,
18
+ "CLASS_23": 30945,
19
+ "CLASS_24": 30946,
20
+ "CLASS_25": 30947,
21
+ "CLASS_26": 30948,
22
+ "CLASS_27": 30949,
23
+ "CLASS_28": 30950,
24
+ "CLASS_29": 30951,
25
+ "CLASS_3": 30925,
26
+ "CLASS_30": 30952,
27
+ "CLASS_31": 30953,
28
+ "CLASS_32": 30954,
29
+ "CLASS_33": 30955,
30
+ "CLASS_34": 30956,
31
+ "CLASS_35": 30957,
32
+ "CLASS_36": 30958,
33
+ "CLASS_37": 30959,
34
+ "CLASS_38": 30960,
35
+ "CLASS_39": 30961,
36
+ "CLASS_4": 30926,
37
+ "CLASS_40": 30962,
38
+ "CLASS_41": 30963,
39
+ "CLASS_42": 30964,
40
+ "CLASS_43": 30965,
41
+ "CLASS_44": 30966,
42
+ "CLASS_45": 30967,
43
+ "CLASS_46": 30968,
44
+ "CLASS_47": 30969,
45
+ "CLASS_48": 30970,
46
+ "CLASS_49": 30971,
47
+ "CLASS_5": 30927,
48
+ "CLASS_50": 30972,
49
+ "CLASS_51": 30973,
50
+ "CLASS_52": 30974,
51
+ "CLASS_53": 30975,
52
+ "CLASS_54": 30976,
53
+ "CLASS_55": 30977,
54
+ "CLASS_56": 30978,
55
+ "CLASS_57": 30979,
56
+ "CLASS_58": 30980,
57
+ "CLASS_59": 30981,
58
+ "CLASS_6": 30928,
59
+ "CLASS_60": 30982,
60
+ "CLASS_61": 30983,
61
+ "CLASS_62": 30984,
62
+ "CLASS_63": 30985,
63
+ "CLASS_64": 30986,
64
+ "CLASS_65": 30987,
65
+ "CLASS_66": 30988,
66
+ "CLASS_67": 30989,
67
+ "CLASS_68": 30990,
68
+ "CLASS_69": 30991,
69
+ "CLASS_7": 30929,
70
+ "CLASS_70": 30992,
71
+ "CLASS_71": 30993,
72
+ "CLASS_72": 30994,
73
+ "CLASS_73": 30995,
74
+ "CLASS_74": 30996,
75
+ "CLASS_75": 30997,
76
+ "CLASS_76": 30998,
77
+ "CLASS_77": 30999,
78
+ "CLASS_78": 31000,
79
+ "CLASS_79": 31001,
80
+ "CLASS_8": 30930,
81
+ "CLASS_80": 31002,
82
+ "CLASS_81": 31003,
83
+ "CLASS_82": 31004,
84
+ "CLASS_83": 31005,
85
+ "CLASS_84": 31006,
86
+ "CLASS_85": 31007,
87
+ "CLASS_86": 31008,
88
+ "CLASS_87": 31009,
89
+ "CLASS_88": 31010,
90
+ "CLASS_89": 31011,
91
+ "CLASS_9": 30931,
92
+ "CLASS_90": 31012,
93
+ "CLASS_91": 31013,
94
+ "CLASS_92": 31014,
95
+ "CLASS_93": 31015,
96
+ "CLASS_94": 31016,
97
+ "CLASS_95": 31017,
98
+ "CLASS_96": 31018,
99
+ "CLASS_97": 31019,
100
+ "CLASS_98": 31020,
101
+ "CLASS_99": 31021,
102
+ "FUNC_0": 30822,
103
+ "FUNC_1": 30823,
104
+ "FUNC_10": 30832,
105
+ "FUNC_11": 30833,
106
+ "FUNC_12": 30834,
107
+ "FUNC_13": 30835,
108
+ "FUNC_14": 30836,
109
+ "FUNC_15": 30837,
110
+ "FUNC_16": 30838,
111
+ "FUNC_17": 30839,
112
+ "FUNC_18": 30840,
113
+ "FUNC_19": 30841,
114
+ "FUNC_2": 30824,
115
+ "FUNC_20": 30842,
116
+ "FUNC_21": 30843,
117
+ "FUNC_22": 30844,
118
+ "FUNC_23": 30845,
119
+ "FUNC_24": 30846,
120
+ "FUNC_25": 30847,
121
+ "FUNC_26": 30848,
122
+ "FUNC_27": 30849,
123
+ "FUNC_28": 30850,
124
+ "FUNC_29": 30851,
125
+ "FUNC_3": 30825,
126
+ "FUNC_30": 30852,
127
+ "FUNC_31": 30853,
128
+ "FUNC_32": 30854,
129
+ "FUNC_33": 30855,
130
+ "FUNC_34": 30856,
131
+ "FUNC_35": 30857,
132
+ "FUNC_36": 30858,
133
+ "FUNC_37": 30859,
134
+ "FUNC_38": 30860,
135
+ "FUNC_39": 30861,
136
+ "FUNC_4": 30826,
137
+ "FUNC_40": 30862,
138
+ "FUNC_41": 30863,
139
+ "FUNC_42": 30864,
140
+ "FUNC_43": 30865,
141
+ "FUNC_44": 30866,
142
+ "FUNC_45": 30867,
143
+ "FUNC_46": 30868,
144
+ "FUNC_47": 30869,
145
+ "FUNC_48": 30870,
146
+ "FUNC_49": 30871,
147
+ "FUNC_5": 30827,
148
+ "FUNC_50": 30872,
149
+ "FUNC_51": 30873,
150
+ "FUNC_52": 30874,
151
+ "FUNC_53": 30875,
152
+ "FUNC_54": 30876,
153
+ "FUNC_55": 30877,
154
+ "FUNC_56": 30878,
155
+ "FUNC_57": 30879,
156
+ "FUNC_58": 30880,
157
+ "FUNC_59": 30881,
158
+ "FUNC_6": 30828,
159
+ "FUNC_60": 30882,
160
+ "FUNC_61": 30883,
161
+ "FUNC_62": 30884,
162
+ "FUNC_63": 30885,
163
+ "FUNC_64": 30886,
164
+ "FUNC_65": 30887,
165
+ "FUNC_66": 30888,
166
+ "FUNC_67": 30889,
167
+ "FUNC_68": 30890,
168
+ "FUNC_69": 30891,
169
+ "FUNC_7": 30829,
170
+ "FUNC_70": 30892,
171
+ "FUNC_71": 30893,
172
+ "FUNC_72": 30894,
173
+ "FUNC_73": 30895,
174
+ "FUNC_74": 30896,
175
+ "FUNC_75": 30897,
176
+ "FUNC_76": 30898,
177
+ "FUNC_77": 30899,
178
+ "FUNC_78": 30900,
179
+ "FUNC_79": 30901,
180
+ "FUNC_8": 30830,
181
+ "FUNC_80": 30902,
182
+ "FUNC_81": 30903,
183
+ "FUNC_82": 30904,
184
+ "FUNC_83": 30905,
185
+ "FUNC_84": 30906,
186
+ "FUNC_85": 30907,
187
+ "FUNC_86": 30908,
188
+ "FUNC_87": 30909,
189
+ "FUNC_88": 30910,
190
+ "FUNC_89": 30911,
191
+ "FUNC_9": 30831,
192
+ "FUNC_90": 30912,
193
+ "FUNC_91": 30913,
194
+ "FUNC_92": 30914,
195
+ "FUNC_93": 30915,
196
+ "FUNC_94": 30916,
197
+ "FUNC_95": 30917,
198
+ "FUNC_96": 30918,
199
+ "FUNC_97": 30919,
200
+ "FUNC_98": 30920,
201
+ "FUNC_99": 30921,
202
+ "VAR_0": 30522,
203
+ "VAR_1": 30523,
204
+ "VAR_10": 30532,
205
+ "VAR_100": 30622,
206
+ "VAR_101": 30623,
207
+ "VAR_102": 30624,
208
+ "VAR_103": 30625,
209
+ "VAR_104": 30626,
210
+ "VAR_105": 30627,
211
+ "VAR_106": 30628,
212
+ "VAR_107": 30629,
213
+ "VAR_108": 30630,
214
+ "VAR_109": 30631,
215
+ "VAR_11": 30533,
216
+ "VAR_110": 30632,
217
+ "VAR_111": 30633,
218
+ "VAR_112": 30634,
219
+ "VAR_113": 30635,
220
+ "VAR_114": 30636,
221
+ "VAR_115": 30637,
222
+ "VAR_116": 30638,
223
+ "VAR_117": 30639,
224
+ "VAR_118": 30640,
225
+ "VAR_119": 30641,
226
+ "VAR_12": 30534,
227
+ "VAR_120": 30642,
228
+ "VAR_121": 30643,
229
+ "VAR_122": 30644,
230
+ "VAR_123": 30645,
231
+ "VAR_124": 30646,
232
+ "VAR_125": 30647,
233
+ "VAR_126": 30648,
234
+ "VAR_127": 30649,
235
+ "VAR_128": 30650,
236
+ "VAR_129": 30651,
237
+ "VAR_13": 30535,
238
+ "VAR_130": 30652,
239
+ "VAR_131": 30653,
240
+ "VAR_132": 30654,
241
+ "VAR_133": 30655,
242
+ "VAR_134": 30656,
243
+ "VAR_135": 30657,
244
+ "VAR_136": 30658,
245
+ "VAR_137": 30659,
246
+ "VAR_138": 30660,
247
+ "VAR_139": 30661,
248
+ "VAR_14": 30536,
249
+ "VAR_140": 30662,
250
+ "VAR_141": 30663,
251
+ "VAR_142": 30664,
252
+ "VAR_143": 30665,
253
+ "VAR_144": 30666,
254
+ "VAR_145": 30667,
255
+ "VAR_146": 30668,
256
+ "VAR_147": 30669,
257
+ "VAR_148": 30670,
258
+ "VAR_149": 30671,
259
+ "VAR_15": 30537,
260
+ "VAR_150": 30672,
261
+ "VAR_151": 30673,
262
+ "VAR_152": 30674,
263
+ "VAR_153": 30675,
264
+ "VAR_154": 30676,
265
+ "VAR_155": 30677,
266
+ "VAR_156": 30678,
267
+ "VAR_157": 30679,
268
+ "VAR_158": 30680,
269
+ "VAR_159": 30681,
270
+ "VAR_16": 30538,
271
+ "VAR_160": 30682,
272
+ "VAR_161": 30683,
273
+ "VAR_162": 30684,
274
+ "VAR_163": 30685,
275
+ "VAR_164": 30686,
276
+ "VAR_165": 30687,
277
+ "VAR_166": 30688,
278
+ "VAR_167": 30689,
279
+ "VAR_168": 30690,
280
+ "VAR_169": 30691,
281
+ "VAR_17": 30539,
282
+ "VAR_170": 30692,
283
+ "VAR_171": 30693,
284
+ "VAR_172": 30694,
285
+ "VAR_173": 30695,
286
+ "VAR_174": 30696,
287
+ "VAR_175": 30697,
288
+ "VAR_176": 30698,
289
+ "VAR_177": 30699,
290
+ "VAR_178": 30700,
291
+ "VAR_179": 30701,
292
+ "VAR_18": 30540,
293
+ "VAR_180": 30702,
294
+ "VAR_181": 30703,
295
+ "VAR_182": 30704,
296
+ "VAR_183": 30705,
297
+ "VAR_184": 30706,
298
+ "VAR_185": 30707,
299
+ "VAR_186": 30708,
300
+ "VAR_187": 30709,
301
+ "VAR_188": 30710,
302
+ "VAR_189": 30711,
303
+ "VAR_19": 30541,
304
+ "VAR_190": 30712,
305
+ "VAR_191": 30713,
306
+ "VAR_192": 30714,
307
+ "VAR_193": 30715,
308
+ "VAR_194": 30716,
309
+ "VAR_195": 30717,
310
+ "VAR_196": 30718,
311
+ "VAR_197": 30719,
312
+ "VAR_198": 30720,
313
+ "VAR_199": 30721,
314
+ "VAR_2": 30524,
315
+ "VAR_20": 30542,
316
+ "VAR_200": 30722,
317
+ "VAR_201": 30723,
318
+ "VAR_202": 30724,
319
+ "VAR_203": 30725,
320
+ "VAR_204": 30726,
321
+ "VAR_205": 30727,
322
+ "VAR_206": 30728,
323
+ "VAR_207": 30729,
324
+ "VAR_208": 30730,
325
+ "VAR_209": 30731,
326
+ "VAR_21": 30543,
327
+ "VAR_210": 30732,
328
+ "VAR_211": 30733,
329
+ "VAR_212": 30734,
330
+ "VAR_213": 30735,
331
+ "VAR_214": 30736,
332
+ "VAR_215": 30737,
333
+ "VAR_216": 30738,
334
+ "VAR_217": 30739,
335
+ "VAR_218": 30740,
336
+ "VAR_219": 30741,
337
+ "VAR_22": 30544,
338
+ "VAR_220": 30742,
339
+ "VAR_221": 30743,
340
+ "VAR_222": 30744,
341
+ "VAR_223": 30745,
342
+ "VAR_224": 30746,
343
+ "VAR_225": 30747,
344
+ "VAR_226": 30748,
345
+ "VAR_227": 30749,
346
+ "VAR_228": 30750,
347
+ "VAR_229": 30751,
348
+ "VAR_23": 30545,
349
+ "VAR_230": 30752,
350
+ "VAR_231": 30753,
351
+ "VAR_232": 30754,
352
+ "VAR_233": 30755,
353
+ "VAR_234": 30756,
354
+ "VAR_235": 30757,
355
+ "VAR_236": 30758,
356
+ "VAR_237": 30759,
357
+ "VAR_238": 30760,
358
+ "VAR_239": 30761,
359
+ "VAR_24": 30546,
360
+ "VAR_240": 30762,
361
+ "VAR_241": 30763,
362
+ "VAR_242": 30764,
363
+ "VAR_243": 30765,
364
+ "VAR_244": 30766,
365
+ "VAR_245": 30767,
366
+ "VAR_246": 30768,
367
+ "VAR_247": 30769,
368
+ "VAR_248": 30770,
369
+ "VAR_249": 30771,
370
+ "VAR_25": 30547,
371
+ "VAR_250": 30772,
372
+ "VAR_251": 30773,
373
+ "VAR_252": 30774,
374
+ "VAR_253": 30775,
375
+ "VAR_254": 30776,
376
+ "VAR_255": 30777,
377
+ "VAR_256": 30778,
378
+ "VAR_257": 30779,
379
+ "VAR_258": 30780,
380
+ "VAR_259": 30781,
381
+ "VAR_26": 30548,
382
+ "VAR_260": 30782,
383
+ "VAR_261": 30783,
384
+ "VAR_262": 30784,
385
+ "VAR_263": 30785,
386
+ "VAR_264": 30786,
387
+ "VAR_265": 30787,
388
+ "VAR_266": 30788,
389
+ "VAR_267": 30789,
390
+ "VAR_268": 30790,
391
+ "VAR_269": 30791,
392
+ "VAR_27": 30549,
393
+ "VAR_270": 30792,
394
+ "VAR_271": 30793,
395
+ "VAR_272": 30794,
396
+ "VAR_273": 30795,
397
+ "VAR_274": 30796,
398
+ "VAR_275": 30797,
399
+ "VAR_276": 30798,
400
+ "VAR_277": 30799,
401
+ "VAR_278": 30800,
402
+ "VAR_279": 30801,
403
+ "VAR_28": 30550,
404
+ "VAR_280": 30802,
405
+ "VAR_281": 30803,
406
+ "VAR_282": 30804,
407
+ "VAR_283": 30805,
408
+ "VAR_284": 30806,
409
+ "VAR_285": 30807,
410
+ "VAR_286": 30808,
411
+ "VAR_287": 30809,
412
+ "VAR_288": 30810,
413
+ "VAR_289": 30811,
414
+ "VAR_29": 30551,
415
+ "VAR_290": 30812,
416
+ "VAR_291": 30813,
417
+ "VAR_292": 30814,
418
+ "VAR_293": 30815,
419
+ "VAR_294": 30816,
420
+ "VAR_295": 30817,
421
+ "VAR_296": 30818,
422
+ "VAR_297": 30819,
423
+ "VAR_298": 30820,
424
+ "VAR_299": 30821,
425
+ "VAR_3": 30525,
426
+ "VAR_30": 30552,
427
+ "VAR_31": 30553,
428
+ "VAR_32": 30554,
429
+ "VAR_33": 30555,
430
+ "VAR_34": 30556,
431
+ "VAR_35": 30557,
432
+ "VAR_36": 30558,
433
+ "VAR_37": 30559,
434
+ "VAR_38": 30560,
435
+ "VAR_39": 30561,
436
+ "VAR_4": 30526,
437
+ "VAR_40": 30562,
438
+ "VAR_41": 30563,
439
+ "VAR_42": 30564,
440
+ "VAR_43": 30565,
441
+ "VAR_44": 30566,
442
+ "VAR_45": 30567,
443
+ "VAR_46": 30568,
444
+ "VAR_47": 30569,
445
+ "VAR_48": 30570,
446
+ "VAR_49": 30571,
447
+ "VAR_5": 30527,
448
+ "VAR_50": 30572,
449
+ "VAR_51": 30573,
450
+ "VAR_52": 30574,
451
+ "VAR_53": 30575,
452
+ "VAR_54": 30576,
453
+ "VAR_55": 30577,
454
+ "VAR_56": 30578,
455
+ "VAR_57": 30579,
456
+ "VAR_58": 30580,
457
+ "VAR_59": 30581,
458
+ "VAR_6": 30528,
459
+ "VAR_60": 30582,
460
+ "VAR_61": 30583,
461
+ "VAR_62": 30584,
462
+ "VAR_63": 30585,
463
+ "VAR_64": 30586,
464
+ "VAR_65": 30587,
465
+ "VAR_66": 30588,
466
+ "VAR_67": 30589,
467
+ "VAR_68": 30590,
468
+ "VAR_69": 30591,
469
+ "VAR_7": 30529,
470
+ "VAR_70": 30592,
471
+ "VAR_71": 30593,
472
+ "VAR_72": 30594,
473
+ "VAR_73": 30595,
474
+ "VAR_74": 30596,
475
+ "VAR_75": 30597,
476
+ "VAR_76": 30598,
477
+ "VAR_77": 30599,
478
+ "VAR_78": 30600,
479
+ "VAR_79": 30601,
480
+ "VAR_8": 30530,
481
+ "VAR_80": 30602,
482
+ "VAR_81": 30603,
483
+ "VAR_82": 30604,
484
+ "VAR_83": 30605,
485
+ "VAR_84": 30606,
486
+ "VAR_85": 30607,
487
+ "VAR_86": 30608,
488
+ "VAR_87": 30609,
489
+ "VAR_88": 30610,
490
+ "VAR_89": 30611,
491
+ "VAR_9": 30531,
492
+ "VAR_90": 30612,
493
+ "VAR_91": 30613,
494
+ "VAR_92": 30614,
495
+ "VAR_93": 30615,
496
+ "VAR_94": 30616,
497
+ "VAR_95": 30617,
498
+ "VAR_96": 30618,
499
+ "VAR_97": 30619,
500
+ "VAR_98": 30620,
501
+ "VAR_99": 30621
502
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "name_or_path": "/root/data/thesis/data/mpnet_GAT_open_redirect_v3_masked",
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_tokens_map_file": "/root/.cache/huggingface/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/7dbbc90392e2f80f3d3c277d6e90027e55de9125/special_tokens_map.json",
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff