grosenthal's picture
Upload tokenizer
d64b580
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "BertNormalizer",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": null,
"lowercase": true
},
"pre_tokenizer": {
"type": "BertPreTokenizer"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
2
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
3
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": true
},
"model": {
"type": "WordPiece",
"unk_token": "[UNK]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[PAD]": 0,
"[UNK]": 1,
"[CLS]": 2,
"[SEP]": 3,
"[MASK]": 4,
"(": 5,
")": 6,
",": 7,
"0": 8,
"1": 9,
"2": 10,
"3": 11,
"4": 12,
"5": 13,
"6": 14,
"7": 15,
"8": 16,
"9": 17,
"[": 18,
"]": 19,
"a": 20,
"b": 21,
"c": 22,
"d": 23,
"e": 24,
"f": 25,
"g": 26,
"h": 27,
"i": 28,
"j": 29,
"l": 30,
"m": 31,
"n": 32,
"o": 33,
"p": 34,
"q": 35,
"r": 36,
"s": 37,
"t": 38,
"u": 39,
"v": 40,
"x": 41,
"y": 42,
"##3": 43,
"##1": 44,
"##p": 45,
"##r": 46,
"##e": 47,
"##s": 48,
"##a": 49,
"##c": 50,
"##t": 51,
"##i": 52,
"##v": 53,
"##m": 54,
"##2": 55,
"##n": 56,
"##d": 57,
"##f": 58,
"##l": 59,
"##b": 60,
"##u": 61,
"##o": 62,
"##6": 63,
"##0": 64,
"##g": 65,
"##x": 66,
"##7": 67,
"##y": 68,
"##j": 69,
"##q": 70,
"##4": 71,
"##5": 72,
"##9": 73,
"##8": 74,
"##ac": 75,
"##iv": 76,
"##ive": 77,
"##31": 78,
"##pa": 79,
"##on": 80,
"##re": 81,
"ad": 82,
"##os": 83,
"##om": 84,
"##sm": 85,
"##act": 86,
"##pos": 87,
"##sf": 88,
"##nom": 89,
"##acc": 90,
"##active": 91,
"##in": 92,
"##11": 93,
"##at": 94,
"##pre": 95,
"##pres": 96,
"##en": 97,
"##er": 98,
"pr": 99,
"##gen": 100,
"##presactive": 101,
"##ind": 102,
"##dat": 103,
"adj": 104,
"pron": 105,
"con": 106,
"n2": 107,
"vpa": 108,
"##pp": 109,
"##ss": 110,
"vpar": 111,
"##ppl": 112,
"conj": 113,
"n31": 114,
"##per": 115,
"##vpos": 116,
"advpos": 117,
"v31": 118,
"##um": 119,
"##pass": 120,
"##perf": 121,
"##passive": 122,
"##accsf": 123,
"##ind3": 124,
"n21": 125,
"in": 126,
"##fu": 127,
"##bl": 128,
"n11": 129,
"pre": 130,
"##sn": 131,
"##im": 132,
"##oc": 133,
"##passiveppl": 134,
"##pabl": 135,
"vpar31": 136,
"prepabl": 137,
"##32": 138,
"##sc": 139,
"##it": 140,
"##nomsf": 141,
"##datsm": 142,
"qu": 143,
"##pm": 144,
"##2s": 145,
"##li": 146,
"##nomsm": 147,
"adj11": 148,
"v31presactive": 149,
"##perfpassiveppl": 150,
"##ind3p": 151,
"##loc": 152,
"##or": 153,
"##61": 154,
"##fut": 155,
"n3": 156,
"##10": 157,
"##si": 158,
"##ar": 159,
"##ll": 160,
"##accsm": 161,
"##gensm": 162,
"pron10": 163,
"##perfactive": 164,
"et": 165,
"ex": 166,
"num": 167,
"vi": 168,
"##1presactive": 169,
"##pf": 170,
"##p2s": 171,
"##sx": 172,
"##nomsn": 173,
"##accsc": 174,
"##gensf": 175,
"##presactiveppl": 176,
"adj32": 177,
"n31accsf": 178,
"##ind3s": 179,
"##imp2s": 180,
"an": 181,
"ca": 182,
"cum": 183,
"ill": 184,
"pu": 185,
"##pe": 186,
"##px": 187,
"##st": 188,
"##te": 189,
"##41": 190,
"pron61": 191,
"n21datsm": 192,
"n33": 193,
"di": 194,
"se": 195,
"v11": 196,
"##su": 197,
"##al": 198,
"##tu": 199,
"##il": 200,
"##lv": 201,
"##ur": 202,
"##xx": 203,
"##qu": 204,
"##gensn": 205,
"##datpx": 206,
"adj1": 207,
"pron41": 208,
"n22": 209,
"vpar11": 210,
"n31loc": 211,
"v31perfactive": 212,
"n21accsm": 213,
"n11accsf": 214,
"n11nomsf": 215,
"n11gensf": 216,
"##futactive": 217,
"expe": 218,
"##accscpos": 219,
"au": 220,
"ali": 221,
"mor": 222,
"n9": 223,
"non": 224,
"si": 225,
"so": 226,
"un": 227,
"v2": 228,
"v61": 229,
"##pi": 230,
"##rtu": 231,
"##el": 232,
"##ct": 233,
"##ir": 234,
"##di": 235,
"##bil": 236,
"##un": 237,
"##8xx": 238,
"##nompm": 239,
"##accpf": 240,
"##inf": 241,
"##prespassive": 242,
"##ind2s": 243,
"##datsx": 244,
"n21gensm": 245,
"##itat": 246,
"##datsmor": 247,
"que": 248,
"##nomsmpos": 249,
"##nomsmperfpassiveppl": 250,
"v31presactiveind3p": 251,
"##futpassiveppl": 252,
"##sibil": 253,
"virtu": 254,
"##1presactiveimp2s": 255,
"adj32accscpos": 256,
"##sub": 257,
"expet": 258,
"n98xx": 259,
"v21presactiveimp2s": 260,
"##datsmord": 261,
"virtut": 262,
"n98xxm": 263,
"cl": 264,
"co": 265,
"cre": 266,
"de": 267,
"dom": 268,
"en": 269,
"ef": 270,
"lact": 271,
"ne": 272,
"n32": 273,
"sa": 274,
"su": 275,
"tre": 276,
"ter": 277,
"tur": 278,
"va": 279,
"v7": 280,
"v5": 281,
"##3p": 282,
"##34": 283,
"##1s": 284,
"##pc": 285,
"##ru": 286,
"##ef": 287,
"##eli": 288,
"##ear": 289,
"##sim": 290,
"##av": 291,
"##ab": 292,
"##abl": 293,
"##com": 294,
"##ter": 295,
"##is": 296,
"##voc": 297,
"##20": 298,
"##ly": 299,
"##0x": 300,
"##ge": 301,
"##gn": 302,
"##4nomsn": 303,
"##5nomsmpos": 304,
"##ivir": 305,
"##nomsc": 306,
"##nompc": 307,
"##accpm": 308,
"##inos": 309,
"##inar": 310,
"##ati": 311,
"##eni": 312,
"pro": 313,
"##gensx": 314,
"##datsn": 315,
"pron31": 316,
"vpar34": 317,
"n31nomsf": 318,
"n31nomsm": 319,
"n31gensf": 320,
"##accsfpos": 321,
"inv": 322,
"##fug": 323,
"##fusi": 324,
"vpar31datsm": 325,
"vpar31datpx": 326,
"vpar31accpf": 327,
"vpar31nomsmperfpassiveppl": 328,
"adj11gensm": 329,
"adj11nomsn": 330,
"v31presactiveind": 331,
"v31presactiveimp2s": 332,
"n34nomsn": 333,
"pron10nompm": 334,
"numer": 335,
"num20": 336,
"visibil": 337,
"##1presactiveind3p": 338,
"adj32nomsc": 339,
"anim": 340,
"ante": 341,
"cael": 342,
"pull": 343,
"pulv": 344,
"##str": 345,
"pron61datsx": 346,
"n33nomsf": 347,
"n33loc": 348,
"dix": 349,
"sed": 350,
"v11prespassive": 351,
"##que": 352,
"adj13": 353,
"adj15nomsmpos": 354,
"pron41gensx": 355,
"n22gensn": 356,
"n22datsn": 357,
"n31locpm": 358,
"mort": 359,
"sign": 360,
"solv": 361,
"##und": 362,
"##inf0x": 363,
"##sub3p": 364,
"domin": 365,
"enim": 366,
"n32gensn": 367,
"terr": 368,
"v51presactiveind3p": 369,
"##elic": 370,
"##early": 371,
"##comp": 372,
"##vocsm": 373,
"##nompcpresactiveppl": 374,
"vpar31datpxpresactiveppl": 375,
"vpar31accpfperfpassiveppl": 376,
"adj11gensmpos": 377,
"num20datsmord": 378,
"adj32nomscpos": 379,
"pulvinar": 380,
"3r": 381,
"ar": 382,
"ae": 383,
"at": 384,
"ab": 385,
"bivir": 386,
"cu": 387,
"cen": 388,
"car": 389,
"cur": 390,
"cir": 391,
"cru": 392,
"da": 393,
"dum": 394,
"er": 395,
"es": 396,
"el": 397,
"eg": 398,
"ess": 399,
"equ": 400,
"fi": 401,
"fu": 402,
"fac": 403,
"fund": 404,
"felic": 405,
"ho": 406,
"ip": 407,
"ir": 408,
"im": 409,
"iu": 410,
"le": 411,
"lon": 412,
"lat": 413,
"lum": 414,
"loc": 415,
"ma": 416,
"mo": 417,
"min": 418,
"ni": 419,
"nat": 420,
"nun": 421,
"nav": 422,
"of": 423,
"ob": 424,
"oper": 425,
"par": 426,
"rab": 427,
"rati": 428,
"spe": 429,
"sal": 430,
"sur": 431,
"tu": 432,
"to": 433,
"ur": 434,
"ve": 435,
"vac": 436,
"viv": 437,
"voc": 438,
"v32": 439,
"vit": 440,
"v41": 441,
"vel": 442,
"veni": 443,
"xxx": 444,
"##3presactive": 445,
"##1p": 446,
"##12": 447,
"##1nom": 448,
"##1abl": 449,
"##pt": 450,
"##pon": 451,
"##pacc": 452,
"##et": 453,
"##em": 454,
"##ed": 455,
"##eo": 456,
"##ex": 457,
"##ect": 458,
"##sv": 459,
"##spos": 460,
"##spi": 461,
"##an": 462,
"##ass": 463,
"##aef": 464,
"##age": 465,
"##ci": 466,
"##cum": 467,
"##car": 468,
"##cund": 469,
"##tr": 470,
"##til": 471,
"##ic": 472,
"##isibil": 473,
"##mat": 474,
"##mef": 475,
"##2p": 476,
"##2nomsm": 477,
"##21nom": 478,
"##nct": 479,
"##nis": 480,
"##dx": 481,
"##delic": 482,
"##dex": 483,
"##fen": 484,
"##fect": 485,
"##fan": 486,
"##lum": 487,
"##ul": 488,
"##usv": 489,
"##62nomsm": 490,
"##gi": 491,
"##gy": 492,
"##gli": 493,
"##ginos": 494,
"##xage": 495,
"##51abl": 496,
"##ivi": 497,
"##31accscpos": 498,
"##patr": 499,
"##req": 500,
"adtu": 501,
"##smcar": 502,
"##posinf": 503,
"##sfearly": 504,
"##nompf": 505,
"##inx": 506,
"##11abl": 507,
"##press": 508,
"##presind3s": 509,
"praef": 510,
"##genpm": 511,
"##genpf": 512,
"##genpx": 513,
"##genter": 514,
"##presactiveind3s": 515,
"##ind1s": 516,
"##datsf": 517,
"adj31accscpos": 518,
"pron62nomsm": 519,
"pron51abl": 520,
"consu": 521,
"##sser": 522,
"vpar32": 523,
"vpar21nom": 524,
"n31accsm": 525,
"v31futactive": 526,
"v31prespassive": 527,
"v31presind3s": 528,
"##ump": 529,
"##passiveind3p": 530,
"##accsffutpassiveppl": 531,
"n21loc": 532,
"n21vocsm": 533,
"n21datsf": 534,
"inter": 535,
"infusi": 536,
"index": 537,
"infan": 538,
"##fus": 539,
"prepacc": 540,
"vpar31gensm": 541,
"vpar31vocsm": 542,
"vpar31nompcpresactiveppl": 543,
"##scul": 544,
"##iter": 545,
"##itusv": 546,
"##nomsfpos": 547,
"##liginos": 548,
"adj11accsc": 549,
"adj11nomsmpos": 550,
"adj11accsfpos": 551,
"adj11genpm": 552,
"adj11nomsfpos": 553,
"v31presactiveind3s": 554,
"v31presactiveinf0x": 555,
"##lociter": 556,
"##oritat": 557,
"##futpassiveind3p": 558,
"##sili": 559,
"##gensmfutpassiveppl": 560,
"pron10accsf": 561,
"pron10nomsf": 562,
"pron10accsm": 563,
"pron10nomsn": 564,
"pron10nompf": 565,
"##perfactiveind3s": 566,
"##perfactiveind2s": 567,
"##perfactivesub3p": 568,
"exiv": 569,
"exsili": 570,
"num12": 571,
"num11abl": 572,
"videlic": 573,
"##1presactiveind3s": 574,
"##sxpresactiveppl": 575,
"##accscpresactiveppl": 576,
"adj32genpx": 577,
"ann": 578,
"castr": 579,
"caed": 580,
"caliginos": 581,
"put": 582,
"##stin": 583,
"##tem": 584,
"##tere": 585,
"##tesim": 586,
"pron61accsf": 587,
"pron61datpx": 588,
"pron61accpm": 589,
"n33genpf": 590,
"dige": 591,
"dispos": 592,
"secund": 593,
"sexage": 594,
"v11futactive": 595,
"v11perfactiveind2s": 596,
"##alitat": 597,
"pron41accsm": 598,
"pron41datpx": 599,
"vpar11accpf": 600,
"vpar11accpm": 601,
"vpar11gensmfutpassiveppl": 602,
"vpar11accscpresactiveppl": 603,
"n31locsm": 604,
"n31locsfearly": 605,
"v31perfactiveind3p": 606,
"v31perfactiveind3s": 607,
"v31perfactiveind2s": 608,
"v31perfactivesub3p": 609,
"##futactiveppl": 610,
"##futactiveind1s": 611,
"expetere": 612,
"auct": 613,
"aufug": 614,
"autem": 615,
"aliqu": 616,
"sol": 617,
"undi": 618,
"univir": 619,
"v61presactiveind3s": 620,
"v61perfactiveind3s": 621,
"v61futactiveind1s": 622,
"##pid": 623,
"##die": 624,
"##dic": 625,
"##ind2searly": 626,
"##datsxposinf": 627,
"##sub1s": 628,
"expeto": 629,
"expetivi": 630,
"expetitusv": 631,
"cleo": 632,
"class": 633,
"colum": 634,
"codic": 635,
"cred": 636,
"creat": 637,
"effug": 638,
"effusi": 639,
"nec": 640,
"negli": 641,
"sapi": 642,
"sanct": 643,
"suspi": 644,
"sustin": 645,
"tremef": 646,
"trepid": 647,
"turp": 648,
"turb": 649,
"vas": 650,
"vascul": 651,
"v73presactive": 652,
"v71presactiveind3s": 653,
"##atio": 654,
"profect": 655,
"pron31nomsn": 656,
"pron31nompm": 657,
"vpar34nomsmperfpassiveppl": 658,
"vpar34nompcpresactiveppl": 659,
"inveni": 660,
"invisibil": 661,
"vpar31datsmperfpassiveppl": 662,
"vpar31datsmfutpassiveppl": 663,
"adj11nomsnpos": 664,
"adj11nomsncomp": 665,
"v31presactiveind1p": 666,
"v31presactiveind2p": 667,
"antepon": 668,
"##strinx": 669,
"n33locsf": 670,
"n33locpf": 671,
"v11prespassiveinf0x": 672,
"v11prespassivesub1s": 673,
"adj13accsfpos": 674,
"adj13datsxposinf": 675,
"mortalitat": 676,
"dominic": 677,
"3rd": 678,
"armat": 679,
"aegy": 680,
"atque": 681,
"cui": 682,
"centesim": 683,
"carnis": 684,
"curs": 685,
"circum": 686,
"cruc": 687,
"daem": 688,
"erump": 689,
"elab": 690,
"equit": 691,
"fili": 692,
"fug": 693,
"faci": 694,
"felicitat": 695,
"hodie": 696,
"ips": 697,
"irru": 698,
"impress": 699,
"iust": 700,
"lesser": 701,
"longi": 702,
"latit": 703,
"luminos": 704,
"mai": 705,
"mod": 706,
"minor": 707,
"nisi": 708,
"natal": 709,
"nunc": 710,
"offen": 711,
"obstrinx": 712,
"operatio": 713,
"parit": 714,
"ration": 715,
"speci": 716,
"surg": 717,
"tot": 718,
"urb": 719,
"velociter": 720,
"vacu": 721,
"v32perfactivesub3p": 722,
"v41futpassiveind3p": 723,
"xxxdx": 724,
"adtul": 725,
"##smcard": 726,
"praefer": 727,
"pron51ablsc": 728,
"consul": 729,
"vpar32accsffutpassiveppl": 730,
"vpar21nomsxpresactiveppl": 731,
"v31futactiveind3p": 732,
"v31prespassiveind3p": 733,
"n21locpm": 734,
"infusion": 735,
"infantil": 736,
"vpar31gensmperfpassiveppl": 737,
"vpar31vocsmperfpassiveppl": 738,
"adj11accsccomp": 739,
"adj11genpmpos": 740,
"num12datsmord": 741,
"num11ablsmcard": 742,
"videlicet": 743,
"adj32genpxpos": 744,
"digest": 745,
"disposit": 746,
"sexagesim": 747,
"v11futactiveimp2s": 748,
"vpar11accpffutactiveppl": 749,
"vpar11accpmperfpassiveppl": 750,
"auctoritat": 751,
"undique": 752,
"cleopatr": 753,
"classis": 754,
"columb": 755,
"effugav": 756,
"effusio": 757,
"negligenter": 758,
"suspic": 759,
"tremefact": 760,
"v73presactiveind2searly": 761,
"profecto": 762,
"adj13datsxposinfreq": 763,
"aegypt": 764,
"circumfus": 765,
"daemon": 766,
"offend": 767,
"special": 768
}
}
}