sage-m2m100-1.2B / tokenizer_config.json
ai-forever's picture
Upload tokenizer
87eb5bd verified
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14241": {
"content": "__af__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14242": {
"content": "__am__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14243": {
"content": "__ar__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14244": {
"content": "__ast__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14245": {
"content": "__az__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14246": {
"content": "__ba__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14247": {
"content": "__be__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14248": {
"content": "__bg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14249": {
"content": "__bn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14250": {
"content": "__br__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14251": {
"content": "__bs__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14252": {
"content": "__ca__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14253": {
"content": "__ceb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14254": {
"content": "__cs__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14255": {
"content": "__cy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14256": {
"content": "__da__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14257": {
"content": "__de__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14258": {
"content": "__el__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14259": {
"content": "__en__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14260": {
"content": "__es__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14261": {
"content": "__et__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14262": {
"content": "__fa__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14263": {
"content": "__ff__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14264": {
"content": "__fi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14265": {
"content": "__fr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14266": {
"content": "__fy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14267": {
"content": "__ga__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14268": {
"content": "__gd__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14269": {
"content": "__gl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14270": {
"content": "__gu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14271": {
"content": "__ha__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14272": {
"content": "__he__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14273": {
"content": "__hi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14274": {
"content": "__hr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14275": {
"content": "__ht__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14276": {
"content": "__hu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14277": {
"content": "__hy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14278": {
"content": "__id__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14279": {
"content": "__ig__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14280": {
"content": "__ilo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14281": {
"content": "__is__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14282": {
"content": "__it__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14283": {
"content": "__ja__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14284": {
"content": "__jv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14285": {
"content": "__ka__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14286": {
"content": "__kk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14287": {
"content": "__km__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14288": {
"content": "__kn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14289": {
"content": "__ko__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14290": {
"content": "__lb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14291": {
"content": "__lg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14292": {
"content": "__ln__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14293": {
"content": "__lo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14294": {
"content": "__lt__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14295": {
"content": "__lv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14296": {
"content": "__mg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14297": {
"content": "__mk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14298": {
"content": "__ml__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14299": {
"content": "__mn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14300": {
"content": "__mr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14301": {
"content": "__ms__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14302": {
"content": "__my__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14303": {
"content": "__ne__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14304": {
"content": "__nl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14305": {
"content": "__no__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14306": {
"content": "__ns__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14307": {
"content": "__oc__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14308": {
"content": "__or__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14309": {
"content": "__pa__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14310": {
"content": "__pl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14311": {
"content": "__ps__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14312": {
"content": "__pt__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14313": {
"content": "__ro__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14314": {
"content": "__ru__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14315": {
"content": "__sd__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14316": {
"content": "__si__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14317": {
"content": "__sk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14318": {
"content": "__sl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14319": {
"content": "__so__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14320": {
"content": "__sq__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14321": {
"content": "__sr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14322": {
"content": "__ss__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14323": {
"content": "__su__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14324": {
"content": "__sv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14325": {
"content": "__sw__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14326": {
"content": "__ta__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14327": {
"content": "__th__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14328": {
"content": "__tl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14329": {
"content": "__tn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14330": {
"content": "__tr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14331": {
"content": "__uk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14332": {
"content": "__ur__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14333": {
"content": "__uz__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14334": {
"content": "__vi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14335": {
"content": "__wo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14336": {
"content": "__xh__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14337": {
"content": "__yi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14338": {
"content": "__yo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14339": {
"content": "__zh__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14340": {
"content": "__zu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"__af__",
"__am__",
"__ar__",
"__ast__",
"__az__",
"__ba__",
"__be__",
"__bg__",
"__bn__",
"__br__",
"__bs__",
"__ca__",
"__ceb__",
"__cs__",
"__cy__",
"__da__",
"__de__",
"__el__",
"__en__",
"__es__",
"__et__",
"__fa__",
"__ff__",
"__fi__",
"__fr__",
"__fy__",
"__ga__",
"__gd__",
"__gl__",
"__gu__",
"__ha__",
"__he__",
"__hi__",
"__hr__",
"__ht__",
"__hu__",
"__hy__",
"__id__",
"__ig__",
"__ilo__",
"__is__",
"__it__",
"__ja__",
"__jv__",
"__ka__",
"__kk__",
"__km__",
"__kn__",
"__ko__",
"__lb__",
"__lg__",
"__ln__",
"__lo__",
"__lt__",
"__lv__",
"__mg__",
"__mk__",
"__ml__",
"__mn__",
"__mr__",
"__ms__",
"__my__",
"__ne__",
"__nl__",
"__no__",
"__ns__",
"__oc__",
"__or__",
"__pa__",
"__pl__",
"__ps__",
"__pt__",
"__ro__",
"__ru__",
"__sd__",
"__si__",
"__sk__",
"__sl__",
"__so__",
"__sq__",
"__sr__",
"__ss__",
"__su__",
"__sv__",
"__sw__",
"__ta__",
"__th__",
"__tl__",
"__tn__",
"__tr__",
"__uk__",
"__ur__",
"__uz__",
"__vi__",
"__wo__",
"__xh__",
"__yi__",
"__yo__",
"__zh__",
"__zu__"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"eos_token": "</s>",
"extra_ids": 0,
"language_codes": "m2m100",
"model_max_length": 1000000000000000019884624838656,
"num_madeup_words": 8,
"pad_token": "<pad>",
"sep_token": "</s>",
"sp_model_kwargs": {},
"src_lang": "ru",
"tgt_lang": "ru",
"tokenizer_class": "M2M100Tokenizer",
"unk_token": "<unk>"
}