alarm_prediction_tokenizer3 / tokenizer.json
hamzagorgulu's picture
Upload tokenizer
4a49b27
raw
history blame
11.2 kB
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 40,
"strategy": "LongestFirst",
"stride": 0
},
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"fuse_unk": false,
"vocab": {
"<|endoftext|>": 0,
"!": 1,
"\"": 2,
"#": 3,
"$": 4,
"%": 5,
"&": 6,
"'": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"1": 17,
"2": 18,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
";": 27,
"<": 28,
"=": 29,
">": 30,
"?": 31,
"@": 32,
"A": 33,
"B": 34,
"C": 35,
"D": 36,
"E": 37,
"F": 38,
"G": 39,
"H": 40,
"I": 41,
"J": 42,
"K": 43,
"L": 44,
"M": 45,
"N": 46,
"O": 47,
"P": 48,
"Q": 49,
"R": 50,
"S": 51,
"T": 52,
"U": 53,
"V": 54,
"W": 55,
"X": 56,
"Y": 57,
"Z": 58,
"[": 59,
"\\": 60,
"]": 61,
"^": 62,
"_": 63,
"`": 64,
"a": 65,
"b": 66,
"c": 67,
"d": 68,
"e": 69,
"f": 70,
"g": 71,
"h": 72,
"i": 73,
"j": 74,
"k": 75,
"l": 76,
"m": 77,
"n": 78,
"o": 79,
"p": 80,
"q": 81,
"r": 82,
"s": 83,
"t": 84,
"u": 85,
"v": 86,
"w": 87,
"x": 88,
"y": 89,
"z": 90,
"{": 91,
"|": 92,
"}": 93,
"~": 94,
"¡": 95,
"¢": 96,
"£": 97,
"¤": 98,
"¥": 99,
"¦": 100,
"§": 101,
"¨": 102,
"©": 103,
"ª": 104,
"«": 105,
"¬": 106,
"®": 107,
"¯": 108,
"°": 109,
"±": 110,
"²": 111,
"³": 112,
"´": 113,
"µ": 114,
"¶": 115,
"·": 116,
"¸": 117,
"¹": 118,
"º": 119,
"»": 120,
"¼": 121,
"½": 122,
"¾": 123,
"¿": 124,
"À": 125,
"Á": 126,
"Â": 127,
"Ã": 128,
"Ä": 129,
"Å": 130,
"Æ": 131,
"Ç": 132,
"È": 133,
"É": 134,
"Ê": 135,
"Ë": 136,
"Ì": 137,
"Í": 138,
"Î": 139,
"Ï": 140,
"Ð": 141,
"Ñ": 142,
"Ò": 143,
"Ó": 144,
"Ô": 145,
"Õ": 146,
"Ö": 147,
"×": 148,
"Ø": 149,
"Ù": 150,
"Ú": 151,
"Û": 152,
"Ü": 153,
"Ý": 154,
"Þ": 155,
"ß": 156,
"à": 157,
"á": 158,
"â": 159,
"ã": 160,
"ä": 161,
"å": 162,
"æ": 163,
"ç": 164,
"è": 165,
"é": 166,
"ê": 167,
"ë": 168,
"ì": 169,
"í": 170,
"î": 171,
"ï": 172,
"ð": 173,
"ñ": 174,
"ò": 175,
"ó": 176,
"ô": 177,
"õ": 178,
"ö": 179,
"÷": 180,
"ø": 181,
"ù": 182,
"ú": 183,
"û": 184,
"ü": 185,
"ý": 186,
"þ": 187,
"ÿ": 188,
"Ā": 189,
"ā": 190,
"Ă": 191,
"ă": 192,
"Ą": 193,
"ą": 194,
"Ć": 195,
"ć": 196,
"Ĉ": 197,
"ĉ": 198,
"Ċ": 199,
"ċ": 200,
"Č": 201,
"č": 202,
"Ď": 203,
"ď": 204,
"Đ": 205,
"đ": 206,
"Ē": 207,
"ē": 208,
"Ĕ": 209,
"ĕ": 210,
"Ė": 211,
"ė": 212,
"Ę": 213,
"ę": 214,
"Ě": 215,
"ě": 216,
"Ĝ": 217,
"ĝ": 218,
"Ğ": 219,
"ğ": 220,
"Ġ": 221,
"ġ": 222,
"Ģ": 223,
"ģ": 224,
"Ĥ": 225,
"ĥ": 226,
"Ħ": 227,
"ħ": 228,
"Ĩ": 229,
"ĩ": 230,
"Ī": 231,
"ī": 232,
"Ĭ": 233,
"ĭ": 234,
"Į": 235,
"į": 236,
"İ": 237,
"ı": 238,
"IJ": 239,
"ij": 240,
"Ĵ": 241,
"ĵ": 242,
"Ķ": 243,
"ķ": 244,
"ĸ": 245,
"Ĺ": 246,
"ĺ": 247,
"Ļ": 248,
"ļ": 249,
"Ľ": 250,
"ľ": 251,
"Ŀ": 252,
"ŀ": 253,
"Ł": 254,
"ł": 255,
"Ń": 256,
"Ġ4": 257,
"00": 258,
"Ġ47": 259,
"AN": 260,
"ANN": 261,
"AL": 262,
"Ġ48": 263,
"TI": 264,
"PAL": 265,
"007": 266,
"003": 267,
"03": 268,
"LI": 269,
"TAL": 270,
"FLI": 271,
"AB": 272,
"002": 273,
"032": 274,
"15": 275,
"01": 276,
"1503": 277,
"AI": 278,
"70": 279,
"870": 280,
"014": 281,
"UA": 282,
"039": 283,
"PI": 284,
"010": 285,
"CD": 286,
"08": 287,
"608": 288,
"SI": 289,
"02": 290,
"900": 291,
"802": 292,
"USI": 293,
"LIC": 294,
"FI": 295,
"FS": 296,
"FSL": 297,
"019": 298,
"151": 299,
"001": 300,
"1512": 301,
"47": 302,
"FIC": 303,
"XL": 304,
"18": 305,
"XA": 306,
"SIO": 307,
"ĠSIO": 308,
"034": 309,
"48": 310,
"150": 311,
"152": 312,
"50": 313,
"850": 314,
"155": 315,
"1555": 316,
"ABC": 317,
"04": 318,
"013": 319,
"PIC": 320,
"153": 321,
"015": 322,
"1535": 323,
"17": 324,
"1527": 325,
"AH": 326,
"HA": 327,
"TIC": 328,
"1505": 329,
"016": 330,
"AIC": 331,
"1001": 332,
"DI": 333,
"PDI": 334,
"1522": 335,
"027": 336,
"043": 337,
"FD": 338,
"037": 339,
"005": 340,
"AAH": 341,
"012": 342,
"1507": 343,
"009": 344,
"TAH": 345,
"171": 346,
"1506": 347,
"042": 348,
"418": 349,
"004": 350,
"176": 351,
"AAHH": 352,
"1710": 353,
"1502": 354,
"41": 355,
"1504": 356,
"036": 357,
"1520": 358,
"1769": 359,
"CE": 360,
"MS": 361,
"SO": 362,
"CEMS": 363,
"AM": 364,
"CC": 365,
"MCC": 366,
"PS": 367,
"PMCC": 368,
"011": 369,
"AMPS": 370,
"2003": 371,
"1518": 372,
"HH": 373,
"022": 374,
"041": 375,
"1551": 376,
"1515": 377,
"414": 378,
"FFIC": 379,
"1509": 380,
"416": 381,
"1501": 382,
"1533": 383,
"1701": 384,
"BAL": 385,
"021": 386,
"CI": 387,
"HCI": 388,
"006": 389,
"AMPSA": 390,
"1521": 391,
"AMPSC": 392,
"BA": 393,
"2021": 394,
"FAL": 395,
"1510": 396,
"1513": 397,
"AMPSB": 398,
"FALL": 399,
"1003": 400,
"170": 401,
"1005": 402,
"TX": 403,
"156": 404,
"159": 405,
"1705": 406,
"TXI": 407,
"1516": 408,
"1534": 409,
"1760": 410,
"1508": 411,
"1523": 412,
"1529": 413,
"FF": 414,
"1511": 415,
"1514": 416,
"1567": 417,
"1594": 418,
"FFX": 419,
"035": 420,
"020": 421,
"12": 422,
"40": 423,
"1540": 424,
"008": 425,
"024": 426,
"1711": 427,
"24": 428,
"618": 429,
"808": 430,
"1525": 431,
"046": 432,
"1724": 433,
"1563": 434,
"1592": 435,
"16": 436,
"202": 437,
"31": 438,
"34": 439,
"1734": 440,
"1631": 441,
"2025": 442,
"038": 443
},
"merges": [
"Ġ 4",
"0 0",
"Ġ4 7",
"A N",
"AN N",
"A L",
"Ġ4 8",
"T I",
"P AL",
"00 7",
"00 3",
"0 3",
"L I",
"T AL",
"F LI",
"A B",
"00 2",
"03 2",
"1 5",
"0 1",
"15 03",
"A I",
"7 0",
"8 70",
"01 4",
"U A",
"03 9",
"P I",
"01 0",
"C D",
"0 8",
"6 08",
"S I",
"0 2",
"9 00",
"8 02",
"U SI",
"LI C",
"F I",
"F S",
"FS L",
"01 9",
"15 1",
"00 1",
"151 2",
"4 7",
"FI C",
"X L",
"1 8",
"X A",
"SI O",
"Ġ SIO",
"03 4",
"4 8",
"15 0",
"15 2",
"5 0",
"8 50",
"15 5",
"155 5",
"AB C",
"0 4",
"01 3",
"PI C",
"15 3",
"0 15",
"153 5",
"1 7",
"152 7",
"A H",
"H A",
"TI C",
"150 5",
"01 6",
"AI C",
"1 001",
"D I",
"P DI",
"152 2",
"02 7",
"04 3",
"F D",
"03 7",
"00 5",
"A AH",
"01 2",
"150 7",
"00 9",
"T AH",
"17 1",
"150 6",
"04 2",
"4 18",
"00 4",
"17 6",
"AAH H",
"171 0",
"15 02",
"4 1",
"150 4",
"03 6",
"152 0",
"176 9",
"C E",
"M S",
"S O",
"CE MS",
"A M",
"C C",
"M CC",
"P S",
"P MCC",
"01 1",
"AM PS",
"2 003",
"151 8",
"H H",
"02 2",
"04 1",
"155 1",
"15 15",
"41 4",
"F FIC",
"150 9",
"41 6",
"15 01",
"153 3",
"17 01",
"B AL",
"02 1",
"C I",
"H CI",
"00 6",
"AMPS A",
"152 1",
"AMPS C",
"B A",
"2 021",
"F AL",
"151 0",
"151 3",
"AMPS B",
"FAL L",
"1 003",
"1 70",
"1 005",
"T X",
"15 6",
"15 9",
"170 5",
"TX I",
"151 6",
"153 4",
"176 0",
"15 08",
"152 3",
"152 9",
"F F",
"151 1",
"151 4",
"156 7",
"159 4",
"FF X",
"03 5",
"02 0",
"1 2",
"4 0",
"15 40",
"00 8",
"02 4",
"171 1",
"2 4",
"6 18",
"8 08",
"152 5",
"04 6",
"17 24",
"156 3",
"159 2",
"1 6",
"2 02",
"3 1",
"3 4",
"17 34",
"16 31",
"202 5",
"03 8"
]
}
}