|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"4": { |
|
"content": "<special0>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"5": { |
|
"content": "<special1>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"6": { |
|
"content": "<special2>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"7": { |
|
"content": "<special3>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"8": { |
|
"content": "<special4>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"9": { |
|
"content": "<special5>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"10": { |
|
"content": "<special6>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"11": { |
|
"content": "<special7>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"12": { |
|
"content": "<special8>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"13": { |
|
"content": "<special9>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"<special0>", |
|
"<special1>", |
|
"<special2>", |
|
"<special3>", |
|
"<special4>", |
|
"<special5>", |
|
"<special6>", |
|
"<special7>", |
|
"<special8>", |
|
"<special9>" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "</s>", |
|
"do_lowercase_and_remove_accent": false, |
|
"id2lang": { |
|
"0": "af", |
|
"1": "als", |
|
"10": "be", |
|
"11": "bg", |
|
"12": "bn", |
|
"13": "br", |
|
"14": "bs", |
|
"15": "ca", |
|
"16": "ceb", |
|
"17": "ckb", |
|
"18": "cs", |
|
"19": "cy", |
|
"2": "am", |
|
"20": "da", |
|
"21": "de", |
|
"22": "el", |
|
"23": "en", |
|
"24": "eo", |
|
"25": "es", |
|
"26": "et", |
|
"27": "eu", |
|
"28": "fa", |
|
"29": "fi", |
|
"3": "an", |
|
"30": "fr", |
|
"31": "fy", |
|
"32": "ga", |
|
"33": "gan", |
|
"34": "gl", |
|
"35": "gu", |
|
"36": "he", |
|
"37": "hi", |
|
"38": "hr", |
|
"39": "hu", |
|
"4": "ang", |
|
"40": "hy", |
|
"41": "ia", |
|
"42": "id", |
|
"43": "is", |
|
"44": "it", |
|
"45": "ja", |
|
"46": "jv", |
|
"47": "ka", |
|
"48": "kk", |
|
"49": "kn", |
|
"5": "ar", |
|
"50": "ko", |
|
"51": "ku", |
|
"52": "la", |
|
"53": "lb", |
|
"54": "lt", |
|
"55": "lv", |
|
"56": "mk", |
|
"57": "ml", |
|
"58": "mn", |
|
"59": "mr", |
|
"6": "arz", |
|
"60": "ms", |
|
"61": "my", |
|
"62": "nds", |
|
"63": "ne", |
|
"64": "nl", |
|
"65": "nn", |
|
"66": "no", |
|
"67": "oc", |
|
"68": "pl", |
|
"69": "pt", |
|
"7": "ast", |
|
"70": "ro", |
|
"71": "ru", |
|
"72": "scn", |
|
"73": "sco", |
|
"74": "sh", |
|
"75": "si", |
|
"76": "simple", |
|
"77": "sk", |
|
"78": "sl", |
|
"79": "sq", |
|
"8": "az", |
|
"80": "sr", |
|
"81": "sv", |
|
"82": "sw", |
|
"83": "ta", |
|
"84": "te", |
|
"85": "th", |
|
"86": "tl", |
|
"87": "tr", |
|
"88": "tt", |
|
"89": "uk", |
|
"9": "bar", |
|
"90": "ur", |
|
"91": "uz", |
|
"92": "vi", |
|
"93": "war", |
|
"94": "wuu", |
|
"95": "yi", |
|
"96": "zh", |
|
"97": "zh_classical", |
|
"98": "zh_min_nan", |
|
"99": "zh_yue" |
|
}, |
|
"lang2id": { |
|
"af": 0, |
|
"als": 1, |
|
"am": 2, |
|
"an": 3, |
|
"ang": 4, |
|
"ar": 5, |
|
"arz": 6, |
|
"ast": 7, |
|
"az": 8, |
|
"bar": 9, |
|
"be": 10, |
|
"bg": 11, |
|
"bn": 12, |
|
"br": 13, |
|
"bs": 14, |
|
"ca": 15, |
|
"ceb": 16, |
|
"ckb": 17, |
|
"cs": 18, |
|
"cy": 19, |
|
"da": 20, |
|
"de": 21, |
|
"el": 22, |
|
"en": 23, |
|
"eo": 24, |
|
"es": 25, |
|
"et": 26, |
|
"eu": 27, |
|
"fa": 28, |
|
"fi": 29, |
|
"fr": 30, |
|
"fy": 31, |
|
"ga": 32, |
|
"gan": 33, |
|
"gl": 34, |
|
"gu": 35, |
|
"he": 36, |
|
"hi": 37, |
|
"hr": 38, |
|
"hu": 39, |
|
"hy": 40, |
|
"ia": 41, |
|
"id": 42, |
|
"is": 43, |
|
"it": 44, |
|
"ja": 45, |
|
"jv": 46, |
|
"ka": 47, |
|
"kk": 48, |
|
"kn": 49, |
|
"ko": 50, |
|
"ku": 51, |
|
"la": 52, |
|
"lb": 53, |
|
"lt": 54, |
|
"lv": 55, |
|
"mk": 56, |
|
"ml": 57, |
|
"mn": 58, |
|
"mr": 59, |
|
"ms": 60, |
|
"my": 61, |
|
"nds": 62, |
|
"ne": 63, |
|
"nl": 64, |
|
"nn": 65, |
|
"no": 66, |
|
"oc": 67, |
|
"pl": 68, |
|
"pt": 69, |
|
"ro": 70, |
|
"ru": 71, |
|
"scn": 72, |
|
"sco": 73, |
|
"sh": 74, |
|
"si": 75, |
|
"simple": 76, |
|
"sk": 77, |
|
"sl": 78, |
|
"sq": 79, |
|
"sr": 80, |
|
"sv": 81, |
|
"sw": 82, |
|
"ta": 83, |
|
"te": 84, |
|
"th": 85, |
|
"tl": 86, |
|
"tr": 87, |
|
"tt": 88, |
|
"uk": 89, |
|
"ur": 90, |
|
"uz": 91, |
|
"vi": 92, |
|
"war": 93, |
|
"wuu": 94, |
|
"yi": 95, |
|
"zh": 96, |
|
"zh_classical": 97, |
|
"zh_min_nan": 98, |
|
"zh_yue": 99 |
|
}, |
|
"mask_token": "<special1>", |
|
"model_max_length": 512, |
|
"pad_token": "<pad>", |
|
"sep_token": "</s>", |
|
"tokenizer_class": "XLMTokenizer", |
|
"unk_token": "<unk>" |
|
} |
|
|