test-model-full / tokenizer.json
llk2why's picture
Upload tokenizer
4fbe58a verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "BertNormalizer",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": null,
"lowercase": true
},
"pre_tokenizer": {
"type": "BertPreTokenizer"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
2
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
3
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": true
},
"model": {
"type": "WordPiece",
"unk_token": "[UNK]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[PAD]": 0,
"[UNK]": 1,
"[CLS]": 2,
"[SEP]": 3,
"[MASK]": 4,
"!": 5,
"\"": 6,
"#": 7,
"$": 8,
"%": 9,
"&": 10,
"'": 11,
"(": 12,
")": 13,
"*": 14,
"+": 15,
",": 16,
"-": 17,
".": 18,
"/": 19,
"0": 20,
"1": 21,
"2": 22,
"3": 23,
"4": 24,
"5": 25,
"6": 26,
"7": 27,
"8": 28,
"9": 29,
":": 30,
";": 31,
"<": 32,
"=": 33,
">": 34,
"?": 35,
"@": 36,
"A": 37,
"B": 38,
"C": 39,
"D": 40,
"E": 41,
"F": 42,
"G": 43,
"H": 44,
"I": 45,
"J": 46,
"K": 47,
"L": 48,
"M": 49,
"N": 50,
"O": 51,
"P": 52,
"Q": 53,
"R": 54,
"S": 55,
"T": 56,
"U": 57,
"V": 58,
"W": 59,
"X": 60,
"Y": 61,
"Z": 62,
"[": 63,
"\\": 64,
"]": 65,
"^": 66,
"_": 67,
"`": 68,
"b": 70,
"c": 71,
"d": 72,
"e": 73,
"f": 74,
"g": 75,
"h": 76,
"j": 78,
"k": 79,
"l": 80,
"m": 81,
"n": 82,
"o": 83,
"p": 84,
"q": 85,
"r": 86,
"s": 87,
"t": 88,
"u": 89,
"v": 90,
"w": 91,
"x": 92,
"y": 93,
"z": 94,
"{": 95,
"|": 96,
"}": 97,
"~": 98,
"the": 99,
"be": 100,
"to": 101,
"of": 102,
"and": 103,
"a": 104,
"in": 105,
"that": 106,
"have": 107,
"i": 108,
"it": 109,
"for": 110,
"not": 111,
"on": 112,
"with": 113,
"he": 114,
"as": 115,
"you": 116,
"do": 117,
"at": 118,
"this": 119,
"but": 120,
"his": 121,
"by": 122,
"from": 123,
"they": 124,
"we": 125,
"say": 126,
"her": 127,
"she": 128,
"or": 129,
"an": 130,
"will": 131,
"my": 132,
"one": 133,
"all": 134,
"would": 135,
"there": 136,
"their": 137,
"what": 138,
"so": 139,
"up": 140,
"out": 141,
"if": 142,
"about": 143,
"who": 144,
"get": 145,
"which": 146,
"go": 147,
"me": 148,
"when": 149,
"make": 150,
"can": 151,
"like": 152,
"time": 153,
"no": 154,
"just": 155,
"him": 156,
"know": 157,
"take": 158,
"people": 159,
"into": 160,
"year": 161,
"your": 162,
"good": 163,
"some": 164,
"could": 165,
"them": 166,
"see": 167,
"other": 168,
"than": 169,
"then": 170,
"now": 171,
"look": 172,
"only": 173,
"come": 174,
"its": 175,
"over": 176,
"think": 177,
"also": 178,
"back": 179,
"after": 180,
"use": 181,
"two": 182,
"how": 183,
"our": 184,
"work": 185,
"first": 186,
"well": 187,
"way": 188,
"even": 189,
"new": 190,
"want": 191,
"because": 192,
"any": 193,
"these": 194,
"give": 195,
"day": 196,
"most": 197,
"us ": 198
}
}
}