|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256001": { |
|
"content": "__ace__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256002": { |
|
"content": "__ace_Latn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256003": { |
|
"content": "__acm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256004": { |
|
"content": "__acq__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256005": { |
|
"content": "__aeb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256006": { |
|
"content": "__afr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256007": { |
|
"content": "__ajp__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256008": { |
|
"content": "__aka__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256009": { |
|
"content": "__amh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256010": { |
|
"content": "__apc__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256011": { |
|
"content": "__arb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256012": { |
|
"content": "__ars__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256013": { |
|
"content": "__ary__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256014": { |
|
"content": "__arz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256015": { |
|
"content": "__asm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256016": { |
|
"content": "__ast__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256017": { |
|
"content": "__awa__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256018": { |
|
"content": "__ayr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256019": { |
|
"content": "__azb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256020": { |
|
"content": "__azj__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256021": { |
|
"content": "__bak__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256022": { |
|
"content": "__bam__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256023": { |
|
"content": "__ban__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256024": { |
|
"content": "__bel__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256025": { |
|
"content": "__bem__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256026": { |
|
"content": "__ben__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256027": { |
|
"content": "__bho__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256028": { |
|
"content": "__bjn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256029": { |
|
"content": "__bjn_Latn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256030": { |
|
"content": "__bod__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256031": { |
|
"content": "__bos__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256032": { |
|
"content": "__bug__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256033": { |
|
"content": "__bul__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256034": { |
|
"content": "__cat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256035": { |
|
"content": "__ceb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256036": { |
|
"content": "__ces__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256037": { |
|
"content": "__cjk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256038": { |
|
"content": "__ckb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256039": { |
|
"content": "__crh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256040": { |
|
"content": "__cym__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256041": { |
|
"content": "__dan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256042": { |
|
"content": "__deu__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256043": { |
|
"content": "__dik__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256044": { |
|
"content": "__dyu__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256045": { |
|
"content": "__dzo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256046": { |
|
"content": "__ell__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256047": { |
|
"content": "__eng__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256048": { |
|
"content": "__epo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256049": { |
|
"content": "__est__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256050": { |
|
"content": "__eus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256051": { |
|
"content": "__ewe__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256052": { |
|
"content": "__fao__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256053": { |
|
"content": "__pes__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256054": { |
|
"content": "__fij__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256055": { |
|
"content": "__fin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256056": { |
|
"content": "__fon__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256057": { |
|
"content": "__fra__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256058": { |
|
"content": "__fur__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256059": { |
|
"content": "__fuv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256060": { |
|
"content": "__gla__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256061": { |
|
"content": "__gle__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256062": { |
|
"content": "__glg__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256063": { |
|
"content": "__grn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256064": { |
|
"content": "__guj__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256065": { |
|
"content": "__hat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256066": { |
|
"content": "__hau__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256067": { |
|
"content": "__heb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256068": { |
|
"content": "__hin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256069": { |
|
"content": "__hne__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256070": { |
|
"content": "__hrv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256071": { |
|
"content": "__hun__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256072": { |
|
"content": "__hye__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256073": { |
|
"content": "__ibo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256074": { |
|
"content": "__ilo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256075": { |
|
"content": "__ind__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256076": { |
|
"content": "__isl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256077": { |
|
"content": "__ita__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256078": { |
|
"content": "__jav__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256079": { |
|
"content": "__jpn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256080": { |
|
"content": "__kab__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256081": { |
|
"content": "__kac__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256082": { |
|
"content": "__kam__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256083": { |
|
"content": "__kan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256084": { |
|
"content": "__kas__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256085": { |
|
"content": "__kas_Deva__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256086": { |
|
"content": "__kat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256087": { |
|
"content": "__knc__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256088": { |
|
"content": "__knc_Latn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256089": { |
|
"content": "__kaz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256090": { |
|
"content": "__kbp__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256091": { |
|
"content": "__kea__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256092": { |
|
"content": "__khm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256093": { |
|
"content": "__kik__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256094": { |
|
"content": "__kin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256095": { |
|
"content": "__kir__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256096": { |
|
"content": "__kmb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256097": { |
|
"content": "__kon__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256098": { |
|
"content": "__kor__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256099": { |
|
"content": "__kmr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256100": { |
|
"content": "__lao__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256101": { |
|
"content": "__lvs__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256102": { |
|
"content": "__lij__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256103": { |
|
"content": "__lim__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256104": { |
|
"content": "__lin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256105": { |
|
"content": "__lit__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256106": { |
|
"content": "__lmo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256107": { |
|
"content": "__ltg__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256108": { |
|
"content": "__ltz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256109": { |
|
"content": "__lua__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256110": { |
|
"content": "__lug__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256111": { |
|
"content": "__luo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256112": { |
|
"content": "__lus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256113": { |
|
"content": "__mag__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256114": { |
|
"content": "__mai__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256115": { |
|
"content": "__mal__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256116": { |
|
"content": "__mar__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256117": { |
|
"content": "__min__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256118": { |
|
"content": "__mkd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256119": { |
|
"content": "__plt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256120": { |
|
"content": "__mlt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256121": { |
|
"content": "__mni__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256122": { |
|
"content": "__khk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256123": { |
|
"content": "__mos__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256124": { |
|
"content": "__mri__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256125": { |
|
"content": "__zsm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256126": { |
|
"content": "__mya__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256127": { |
|
"content": "__nld__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256128": { |
|
"content": "__nno__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256129": { |
|
"content": "__nob__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256130": { |
|
"content": "__npi__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256131": { |
|
"content": "__nso__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256132": { |
|
"content": "__nus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256133": { |
|
"content": "__nya__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256134": { |
|
"content": "__oci__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256135": { |
|
"content": "__gaz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256136": { |
|
"content": "__ory__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256137": { |
|
"content": "__pag__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256138": { |
|
"content": "__pan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256139": { |
|
"content": "__pap__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256140": { |
|
"content": "__pol__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256141": { |
|
"content": "__por__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256142": { |
|
"content": "__prs__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256143": { |
|
"content": "__pbt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256144": { |
|
"content": "__quy__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256145": { |
|
"content": "__ron__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256146": { |
|
"content": "__run__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256147": { |
|
"content": "__rus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256148": { |
|
"content": "__sag__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256149": { |
|
"content": "__san__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256150": { |
|
"content": "__sat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256151": { |
|
"content": "__scn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256152": { |
|
"content": "__shn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256153": { |
|
"content": "__sin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256154": { |
|
"content": "__slk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256155": { |
|
"content": "__slv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256156": { |
|
"content": "__smo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256157": { |
|
"content": "__sna__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256158": { |
|
"content": "__snd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256159": { |
|
"content": "__som__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256160": { |
|
"content": "__sot__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256161": { |
|
"content": "__spa__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256162": { |
|
"content": "__als__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256163": { |
|
"content": "__srd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256164": { |
|
"content": "__srp__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256165": { |
|
"content": "__ssw__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256166": { |
|
"content": "__sun__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256167": { |
|
"content": "__swe__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256168": { |
|
"content": "__swh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256169": { |
|
"content": "__szl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256170": { |
|
"content": "__tam__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256171": { |
|
"content": "__tat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256172": { |
|
"content": "__tel__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256173": { |
|
"content": "__tgk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256174": { |
|
"content": "__tgl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256175": { |
|
"content": "__tha__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256176": { |
|
"content": "__tir__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256177": { |
|
"content": "__taq__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256178": { |
|
"content": "__taq_Tfng__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256179": { |
|
"content": "__tpi__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256180": { |
|
"content": "__tsn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256181": { |
|
"content": "__tso__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256182": { |
|
"content": "__tuk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256183": { |
|
"content": "__tum__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256184": { |
|
"content": "__tur__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256185": { |
|
"content": "__twi__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256186": { |
|
"content": "__tzm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256187": { |
|
"content": "__uig__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256188": { |
|
"content": "__ukr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256189": { |
|
"content": "__umb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256190": { |
|
"content": "__urd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256191": { |
|
"content": "__uzn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256192": { |
|
"content": "__vec__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256193": { |
|
"content": "__vie__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256194": { |
|
"content": "__war__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256195": { |
|
"content": "__wol__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256196": { |
|
"content": "__xho__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256197": { |
|
"content": "__ydd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256198": { |
|
"content": "__yor__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256199": { |
|
"content": "__yue__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256200": { |
|
"content": "__cmn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256201": { |
|
"content": "__cmn_Hant__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256202": { |
|
"content": "__zul__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256203": { |
|
"content": "<MINED_DATA>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256204": { |
|
"content": "<MMT_BT_DATA>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256205": { |
|
"content": "<SMT_BT_DATA>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"__ace__", |
|
"__ace_Latn__", |
|
"__acm__", |
|
"__acq__", |
|
"__aeb__", |
|
"__afr__", |
|
"__ajp__", |
|
"__aka__", |
|
"__amh__", |
|
"__apc__", |
|
"__arb__", |
|
"__ars__", |
|
"__ary__", |
|
"__arz__", |
|
"__asm__", |
|
"__ast__", |
|
"__awa__", |
|
"__ayr__", |
|
"__azb__", |
|
"__azj__", |
|
"__bak__", |
|
"__bam__", |
|
"__ban__", |
|
"__bel__", |
|
"__bem__", |
|
"__ben__", |
|
"__bho__", |
|
"__bjn__", |
|
"__bjn_Latn__", |
|
"__bod__", |
|
"__bos__", |
|
"__bug__", |
|
"__bul__", |
|
"__cat__", |
|
"__ceb__", |
|
"__ces__", |
|
"__cjk__", |
|
"__ckb__", |
|
"__crh__", |
|
"__cym__", |
|
"__dan__", |
|
"__deu__", |
|
"__dik__", |
|
"__dyu__", |
|
"__dzo__", |
|
"__ell__", |
|
"__eng__", |
|
"__epo__", |
|
"__est__", |
|
"__eus__", |
|
"__ewe__", |
|
"__fao__", |
|
"__pes__", |
|
"__fij__", |
|
"__fin__", |
|
"__fon__", |
|
"__fra__", |
|
"__fur__", |
|
"__fuv__", |
|
"__gla__", |
|
"__gle__", |
|
"__glg__", |
|
"__grn__", |
|
"__guj__", |
|
"__hat__", |
|
"__hau__", |
|
"__heb__", |
|
"__hin__", |
|
"__hne__", |
|
"__hrv__", |
|
"__hun__", |
|
"__hye__", |
|
"__ibo__", |
|
"__ilo__", |
|
"__ind__", |
|
"__isl__", |
|
"__ita__", |
|
"__jav__", |
|
"__jpn__", |
|
"__kab__", |
|
"__kac__", |
|
"__kam__", |
|
"__kan__", |
|
"__kas__", |
|
"__kas_Deva__", |
|
"__kat__", |
|
"__knc__", |
|
"__knc_Latn__", |
|
"__kaz__", |
|
"__kbp__", |
|
"__kea__", |
|
"__khm__", |
|
"__kik__", |
|
"__kin__", |
|
"__kir__", |
|
"__kmb__", |
|
"__kon__", |
|
"__kor__", |
|
"__kmr__", |
|
"__lao__", |
|
"__lvs__", |
|
"__lij__", |
|
"__lim__", |
|
"__lin__", |
|
"__lit__", |
|
"__lmo__", |
|
"__ltg__", |
|
"__ltz__", |
|
"__lua__", |
|
"__lug__", |
|
"__luo__", |
|
"__lus__", |
|
"__mag__", |
|
"__mai__", |
|
"__mal__", |
|
"__mar__", |
|
"__min__", |
|
"__mkd__", |
|
"__plt__", |
|
"__mlt__", |
|
"__mni__", |
|
"__khk__", |
|
"__mos__", |
|
"__mri__", |
|
"__zsm__", |
|
"__mya__", |
|
"__nld__", |
|
"__nno__", |
|
"__nob__", |
|
"__npi__", |
|
"__nso__", |
|
"__nus__", |
|
"__nya__", |
|
"__oci__", |
|
"__gaz__", |
|
"__ory__", |
|
"__pag__", |
|
"__pan__", |
|
"__pap__", |
|
"__pol__", |
|
"__por__", |
|
"__prs__", |
|
"__pbt__", |
|
"__quy__", |
|
"__ron__", |
|
"__run__", |
|
"__rus__", |
|
"__sag__", |
|
"__san__", |
|
"__sat__", |
|
"__scn__", |
|
"__shn__", |
|
"__sin__", |
|
"__slk__", |
|
"__slv__", |
|
"__smo__", |
|
"__sna__", |
|
"__snd__", |
|
"__som__", |
|
"__sot__", |
|
"__spa__", |
|
"__als__", |
|
"__srd__", |
|
"__srp__", |
|
"__ssw__", |
|
"__sun__", |
|
"__swe__", |
|
"__swh__", |
|
"__szl__", |
|
"__tam__", |
|
"__tat__", |
|
"__tel__", |
|
"__tgk__", |
|
"__tgl__", |
|
"__tha__", |
|
"__tir__", |
|
"__taq__", |
|
"__taq_Tfng__", |
|
"__tpi__", |
|
"__tsn__", |
|
"__tso__", |
|
"__tuk__", |
|
"__tum__", |
|
"__tur__", |
|
"__twi__", |
|
"__tzm__", |
|
"__uig__", |
|
"__ukr__", |
|
"__umb__", |
|
"__urd__", |
|
"__uzn__", |
|
"__vec__", |
|
"__vie__", |
|
"__war__", |
|
"__wol__", |
|
"__xho__", |
|
"__ydd__", |
|
"__yor__", |
|
"__yue__", |
|
"__cmn__", |
|
"__cmn_Hant__", |
|
"__zul__", |
|
"<MINED_DATA>", |
|
"<MMT_BT_DATA>", |
|
"<SMT_BT_DATA>" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"language_code": [ |
|
"ace", |
|
"ace_Latn", |
|
"acm", |
|
"acq", |
|
"aeb", |
|
"afr", |
|
"ajp", |
|
"aka", |
|
"amh", |
|
"apc", |
|
"arb", |
|
"ars", |
|
"ary", |
|
"arz", |
|
"asm", |
|
"ast", |
|
"awa", |
|
"ayr", |
|
"azb", |
|
"azj", |
|
"bak", |
|
"bam", |
|
"ban", |
|
"bel", |
|
"bem", |
|
"ben", |
|
"bho", |
|
"bjn", |
|
"bjn_Latn", |
|
"bod", |
|
"bos", |
|
"bug", |
|
"bul", |
|
"cat", |
|
"ceb", |
|
"ces", |
|
"cjk", |
|
"ckb", |
|
"crh", |
|
"cym", |
|
"dan", |
|
"deu", |
|
"dik", |
|
"dyu", |
|
"dzo", |
|
"ell", |
|
"eng", |
|
"epo", |
|
"est", |
|
"eus", |
|
"ewe", |
|
"fao", |
|
"pes", |
|
"fij", |
|
"fin", |
|
"fon", |
|
"fra", |
|
"fur", |
|
"fuv", |
|
"gla", |
|
"gle", |
|
"glg", |
|
"grn", |
|
"guj", |
|
"hat", |
|
"hau", |
|
"heb", |
|
"hin", |
|
"hne", |
|
"hrv", |
|
"hun", |
|
"hye", |
|
"ibo", |
|
"ilo", |
|
"ind", |
|
"isl", |
|
"ita", |
|
"jav", |
|
"jpn", |
|
"kab", |
|
"kac", |
|
"kam", |
|
"kan", |
|
"kas", |
|
"kas_Deva", |
|
"kat", |
|
"knc", |
|
"knc_Latn", |
|
"kaz", |
|
"kbp", |
|
"kea", |
|
"khm", |
|
"kik", |
|
"kin", |
|
"kir", |
|
"kmb", |
|
"kon", |
|
"kor", |
|
"kmr", |
|
"lao", |
|
"lvs", |
|
"lij", |
|
"lim", |
|
"lin", |
|
"lit", |
|
"lmo", |
|
"ltg", |
|
"ltz", |
|
"lua", |
|
"lug", |
|
"luo", |
|
"lus", |
|
"mag", |
|
"mai", |
|
"mal", |
|
"mar", |
|
"min", |
|
"mkd", |
|
"plt", |
|
"mlt", |
|
"mni", |
|
"khk", |
|
"mos", |
|
"mri", |
|
"zsm", |
|
"mya", |
|
"nld", |
|
"nno", |
|
"nob", |
|
"npi", |
|
"nso", |
|
"nus", |
|
"nya", |
|
"oci", |
|
"gaz", |
|
"ory", |
|
"pag", |
|
"pan", |
|
"pap", |
|
"pol", |
|
"por", |
|
"prs", |
|
"pbt", |
|
"quy", |
|
"ron", |
|
"run", |
|
"rus", |
|
"sag", |
|
"san", |
|
"sat", |
|
"scn", |
|
"shn", |
|
"sin", |
|
"slk", |
|
"slv", |
|
"smo", |
|
"sna", |
|
"snd", |
|
"som", |
|
"sot", |
|
"spa", |
|
"als", |
|
"srd", |
|
"srp", |
|
"ssw", |
|
"sun", |
|
"swe", |
|
"swh", |
|
"szl", |
|
"tam", |
|
"tat", |
|
"tel", |
|
"tgk", |
|
"tgl", |
|
"tha", |
|
"tir", |
|
"taq", |
|
"taq_Tfng", |
|
"tpi", |
|
"tsn", |
|
"tso", |
|
"tuk", |
|
"tum", |
|
"tur", |
|
"twi", |
|
"tzm", |
|
"uig", |
|
"ukr", |
|
"umb", |
|
"urd", |
|
"uzn", |
|
"vec", |
|
"vie", |
|
"war", |
|
"wol", |
|
"xho", |
|
"ydd", |
|
"yor", |
|
"yue", |
|
"cmn", |
|
"cmn_Hant", |
|
"zul" |
|
], |
|
"model_max_length": 1000000000000000019884624838656, |
|
"pad_token": "<pad>", |
|
"processor_class": "SeamlessM4TProcessor", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"src_lang": "__eng__", |
|
"tgt_lang": "__fra__", |
|
"tokenizer_class": "SeamlessM4TTokenizer", |
|
"tokenizer_file": null, |
|
"unk_token": "<unk>" |
|
} |
|
|