tokenizer-arena / stats /character_stats.json
eson's picture
fix lru_cache and fix qwen_vocab
9d1b27e
{
"FacebookAI/xlm-roberta-base": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/FacebookAI/xlm-roberta-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">xlm-roberta-base</a>",
"organization": "Facebook",
"vocab_size": 250002,
"num(digit)": 2728,
"len(digit)": "1,3,9",
"num(space)": 1,
"len(space)": "1,1,1",
"num(ar)": 14644,
"len(ar)": "1,4,16",
"num(zh)": 18457,
"len(zh)": "1,2,16",
"num(ja)": 20572,
"len(ja)": "1,2,16",
"num(ja-kana)": 3434,
"len(ja-kana)": "1,3,12",
"num(ko)": 5373,
"len(ko)": "1,2,8"
},
"clue/roberta_chinese_clue_tiny": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/clue/roberta_chinese_clue_tiny\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">roberta-chinese-clue</a>",
"organization": "CLUE",
"vocab_size": 8021,
"num(digit)": 230,
"len(digit)": "1,4,10",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 30,
"len(ar)": "1,2,3",
"num(zh)": 5689,
"len(zh)": "1,1,1",
"num(ja)": 5691,
"len(ja)": "1,1,3",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"dbmdz/bert-base-german-uncased": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/dbmdz/bert-base-german-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-german-uncased</a>",
"organization": "dbmdz",
"vocab_size": 31102,
"num(digit)": 1733,
"len(digit)": "1,4,12",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 0,
"len(zh)": "-",
"num(ja)": 0,
"len(ja)": "-",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"google-bert/bert-base-cased": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-cased</a>",
"organization": "Google",
"vocab_size": 28996,
"num(digit)": 926,
"len(digit)": "1,4,11",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 94,
"len(ar)": "1,3,4",
"num(zh)": 226,
"len(zh)": "1,2,3",
"num(ja)": 390,
"len(ja)": "1,2,3",
"num(ja-kana)": 164,
"len(ja-kana)": "1,2,3",
"num(ko)": 10,
"len(ko)": "1,2,3"
},
"google-bert/bert-base-chinese": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-chinese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-chinese</a>",
"organization": "Google",
"vocab_size": 21128,
"num(digit)": 1451,
"len(digit)": "1,3,12",
"num(space)": 2,
"len(space)": "1,2,3",
"num(ar)": 30,
"len(ar)": "1,2,3",
"num(zh)": 14642,
"len(zh)": "1,2,3",
"num(ja)": 15197,
"len(ja)": "1,3,15",
"num(ja-kana)": 553,
"len(ja-kana)": "1,3,15",
"num(ko)": 0,
"len(ko)": "-"
},
"google-bert/bert-base-german-cased": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-german-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-german-cased</a>",
"organization": "Google",
"vocab_size": 30000,
"num(digit)": 4065,
"len(digit)": "1,11,22",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 0,
"len(zh)": "-",
"num(ja)": 0,
"len(ja)": "-",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"google-bert/bert-base-multilingual-cased": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-multilingual-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-multilingual-cased</a>",
"organization": "Google",
"vocab_size": 119547,
"num(digit)": 2583,
"len(digit)": "1,3,13",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 4873,
"len(ar)": "1,5,14",
"num(zh)": 13542,
"len(zh)": "1,2,3",
"num(ja)": 14880,
"len(ja)": "1,3,10",
"num(ja-kana)": 1336,
"len(ja-kana)": "1,4,10",
"num(ko)": 3271,
"len(ko)": "1,3,6"
},
"google-bert/bert-base-multilingual-uncased": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-multilingual-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-multilingual-uncased</a>",
"organization": "Google",
"vocab_size": 105879,
"num(digit)": 2510,
"len(digit)": "1,3,13",
"num(space)": 2,
"len(space)": "1,2,3",
"num(ar)": 4530,
"len(ar)": "1,5,13",
"num(zh)": 16658,
"len(zh)": "1,2,3",
"num(ja)": 17858,
"len(ja)": "1,3,10",
"num(ja-kana)": 1188,
"len(ja-kana)": "1,4,10",
"num(ko)": 0,
"len(ko)": "-"
},
"google-bert/bert-base-uncased": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-uncased</a>",
"organization": "Google",
"vocab_size": 30522,
"num(digit)": 2056,
"len(digit)": "1,4,11",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 88,
"len(ar)": "1,3,5",
"num(zh)": 488,
"len(zh)": "1,2,3",
"num(ja)": 676,
"len(ja)": "1,2,3",
"num(ja-kana)": 188,
"len(ja-kana)": "1,2,3",
"num(ko)": 0,
"len(ko)": "-"
},
"google/mobilebert-uncased": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/mobilebert-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">mobilebert-uncased</a>",
"organization": "Google",
"vocab_size": 30522,
"num(digit)": 2056,
"len(digit)": "1,4,11",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 88,
"len(ar)": "1,3,5",
"num(zh)": 488,
"len(zh)": "1,2,3",
"num(ja)": 676,
"len(ja)": "1,2,3",
"num(ja-kana)": 188,
"len(ja-kana)": "1,2,3",
"num(ko)": 0,
"len(ko)": "-"
},
"tohoku-nlp/bert-base-japanese": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tohoku-nlp/bert-base-japanese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-japanese</a>",
"organization": "Tohoku",
"vocab_size": 32000,
"num(digit)": 669,
"len(digit)": "1,3,5",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 10,
"len(ar)": "1,3,3",
"num(zh)": 18792,
"len(zh)": "1,2,11",
"num(ja)": 28367,
"len(ja)": "1,2,13",
"num(ja-kana)": 12359,
"len(ja-kana)": "1,4,13",
"num(ko)": 0,
"len(ko)": "-"
},
"gpt-4": {
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4</a>",
"organization": "OpenAI",
"vocab_size": 100277,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 47472,
"len(space)": "1,7,128",
"num(ar)": 113,
"len(ar)": "1,2,10",
"num(zh)": 868,
"len(zh)": "1,1,7",
"num(ja)": 1035,
"len(ja)": "1,1,7",
"num(ja-kana)": 169,
"len(ja-kana)": "1,1,7",
"num(ko)": 299,
"len(ko)": "1,2,4"
},
"llama3": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>",
"organization": "Meta",
"vocab_size": 128256,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 60860,
"len(space)": "1,6,128",
"num(ar)": 3810,
"len(ar)": "1,4,11",
"num(zh)": 4424,
"len(zh)": "1,1,7",
"num(ja)": 5387,
"len(ja)": "1,2,8",
"num(ja-kana)": 1086,
"len(ja-kana)": "1,2,8",
"num(ko)": 2281,
"len(ko)": "1,2,6"
},
"google-t5/t5-large": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-t5/t5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">t5</a>",
"organization": "Google",
"vocab_size": 32100,
"num(digit)": 1133,
"len(digit)": "1,3,13",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 0,
"len(zh)": "-",
"num(ja)": 0,
"len(ja)": "-",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"google/byt5-small": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/byt5-small\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">byt5-small</a>",
"organization": "Google",
"vocab_size": 384,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 10,
"len(space)": "1,1,1",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 0,
"len(zh)": "-",
"num(ja)": 0,
"len(ja)": "-",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"google/mt5-large": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/mt5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">mt5-large</a>",
"organization": "Google",
"vocab_size": 250100,
"num(digit)": 16829,
"len(digit)": "1,4,16",
"num(space)": 1,
"len(space)": "1,1,1",
"num(ar)": 7459,
"len(ar)": "1,3,16",
"num(zh)": 21489,
"len(zh)": "1,2,16",
"num(ja)": 27078,
"len(ja)": "1,2,16",
"num(ja-kana)": 9160,
"len(ja-kana)": "1,3,14",
"num(ko)": 4041,
"len(ko)": "1,1,10"
},
"lmsys/fastchat-t5-3b-v1.0": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/lmsys/fastchat-t5-3b-v1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">fastchat-t5-3b-v1.0</a>",
"organization": "LMSYS",
"vocab_size": 32110,
"num(digit)": 1033,
"len(digit)": "1,3,8",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 0,
"len(zh)": "-",
"num(ja)": 0,
"len(ja)": "-",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"paust/pko-t5-large": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/paust/pko-t5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">pko-t5-large</a>",
"organization": "PAUST",
"vocab_size": 50358,
"num(digit)": 51,
"len(digit)": "1,2,3",
"num(space)": 10,
"len(space)": "1,1,1",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 0,
"len(zh)": "-",
"num(ja)": 0,
"len(ja)": "-",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 49050,
"len(ko)": "1,2,16"
},
"bloom": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/bigscience/bloom\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bloom</a>",
"organization": "BigScience",
"vocab_size": 250680,
"num(digit)": 6629,
"len(digit)": "1,4,50",
"num(space)": 140180,
"len(space)": "1,6,600",
"num(ar)": 20854,
"len(ar)": "1,5,16",
"num(zh)": 30603,
"len(zh)": "1,2,23",
"num(ja)": 30816,
"len(ja)": "1,2,23",
"num(ja-kana)": 214,
"len(ja-kana)": "1,1,3",
"num(ko)": 338,
"len(ko)": "1,1,3"
},
"llama": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/huggyllama/llama-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama</a>",
"organization": "Meta",
"vocab_size": 32000,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"ClueAI/ChatYuan-large-v2": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClueAI/ChatYuan-large-v2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">ChatYuan-large-v2</a>",
"organization": "CLUE",
"vocab_size": 32128,
"num(digit)": 740,
"len(digit)": "1,3,9",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 2,
"len(ar)": "1,1,1",
"num(zh)": 29591,
"len(zh)": "1,2,16",
"num(ja)": 29736,
"len(ja)": "1,2,16",
"num(ja-kana)": 145,
"len(ja-kana)": "1,1,2",
"num(ko)": 0,
"len(ko)": "-"
},
"Meta/llama3": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>",
"organization": "Meta",
"vocab_size": 128256,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 60860,
"len(space)": "1,6,128",
"num(ar)": 3810,
"len(ar)": "1,4,11",
"num(zh)": 4424,
"len(zh)": "1,1,7",
"num(ja)": 5387,
"len(ja)": "1,2,8",
"num(ja-kana)": 1086,
"len(ja-kana)": "1,2,8",
"num(ko)": 2281,
"len(ko)": "1,2,6"
},
"openai/gpt-4": {
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4</a>",
"organization": "OpenAI",
"vocab_size": 100277,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 47472,
"len(space)": "1,7,128",
"num(ar)": 113,
"len(ar)": "1,2,10",
"num(zh)": 868,
"len(zh)": "1,1,7",
"num(ja)": 1035,
"len(ja)": "1,1,7",
"num(ja-kana)": 169,
"len(ja-kana)": "1,1,7",
"num(ko)": 299,
"len(ko)": "1,2,4"
},
"gradientai/Llama-3-8B-Instruct-Gradient-1048k": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>",
"organization": "Meta",
"vocab_size": 128256,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 60860,
"len(space)": "1,6,128",
"num(ar)": 3810,
"len(ar)": "1,4,11",
"num(zh)": 4424,
"len(zh)": "1,1,7",
"num(ja)": 5387,
"len(ja)": "1,2,8",
"num(ja-kana)": 1086,
"len(ja-kana)": "1,2,8",
"num(ko)": 2281,
"len(ko)": "1,2,6"
},
"bigscience/bloom": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/bigscience/bloom\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bloom</a>",
"organization": "BigScience",
"vocab_size": 250680,
"num(digit)": 6629,
"len(digit)": "1,4,50",
"num(space)": 140180,
"len(space)": "1,6,600",
"num(ar)": 20854,
"len(ar)": "1,5,16",
"num(zh)": 30603,
"len(zh)": "1,2,23",
"num(ja)": 30816,
"len(ja)": "1,2,23",
"num(ja-kana)": 214,
"len(ja-kana)": "1,1,3",
"num(ko)": 338,
"len(ko)": "1,1,3"
},
"huggyllama/llama-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/huggyllama/llama-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama</a>",
"organization": "Meta",
"vocab_size": 32000,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"baichuan-inc/Baichuan-7B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/baichuan-inc/Baichuan-7B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">baichuan</a>",
"organization": "Baichuan",
"vocab_size": 64000,
"num(digit)": 335,
"len(digit)": "1,14,14",
"num(space)": 13,
"len(space)": "1,1,1",
"num(ar)": 299,
"len(ar)": "1,1,2",
"num(zh)": 27676,
"len(zh)": "1,1,9",
"num(ja)": 28522,
"len(ja)": "1,1,9",
"num(ja-kana)": 178,
"len(ja-kana)": "1,1,1",
"num(ko)": 1591,
"len(ko)": "1,1,1"
},
"01-ai/Yi-34B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-34B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-34B</a>",
"organization": "Yi",
"vocab_size": 64000,
"num(digit)": 200,
"len(digit)": "1,13,15",
"num(space)": 24274,
"len(space)": "1,7,16",
"num(ar)": 18,
"len(ar)": "1,1,4",
"num(zh)": 21356,
"len(zh)": "1,2,12",
"num(ja)": 21407,
"len(ja)": "1,2,12",
"num(ja-kana)": 51,
"len(ja-kana)": "1,1,2",
"num(ko)": 28,
"len(ko)": "1,1,2"
},
"01-ai/Yi-6B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-6B</a>",
"organization": "Yi",
"vocab_size": 64000,
"num(digit)": 200,
"len(digit)": "1,13,15",
"num(space)": 24274,
"len(space)": "1,7,16",
"num(ar)": 18,
"len(ar)": "1,1,4",
"num(zh)": 21356,
"len(zh)": "1,2,12",
"num(ja)": 21407,
"len(ja)": "1,2,12",
"num(ja-kana)": 51,
"len(ja-kana)": "1,1,2",
"num(ko)": 28,
"len(ko)": "1,1,2"
},
"01-ai/Yi-VL-34B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-VL-34B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-VL-34B</a>",
"organization": "Yi",
"vocab_size": 64000,
"num(digit)": 200,
"len(digit)": "1,13,15",
"num(space)": 43,
"len(space)": "1,2,15",
"num(ar)": 18,
"len(ar)": "1,1,4",
"num(zh)": 21356,
"len(zh)": "1,2,12",
"num(ja)": 21407,
"len(ja)": "1,2,12",
"num(ja-kana)": 51,
"len(ja-kana)": "1,1,2",
"num(ko)": 28,
"len(ko)": "1,1,2"
},
"ClassCat/gpt2-base-french": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClassCat/gpt2-base-french\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-french</a>",
"organization": "ClassCat",
"vocab_size": 50000,
"num(digit)": 1833,
"len(digit)": "1,4,5",
"num(space)": 31889,
"len(space)": "1,7,32",
"num(ar)": 41,
"len(ar)": "1,1,4",
"num(zh)": 27,
"len(zh)": "1,1,1",
"num(ja)": 46,
"len(ja)": "1,1,2",
"num(ja-kana)": 19,
"len(ja-kana)": "1,1,2",
"num(ko)": 0,
"len(ko)": "-"
},
"ClassCat/gpt2-base-spanish": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClassCat/gpt2-base-spanish\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-spanish</a>",
"organization": "ClassCat",
"vocab_size": 50000,
"num(digit)": 1492,
"len(digit)": "1,4,9",
"num(space)": 34496,
"len(space)": "1,8,32",
"num(ar)": 36,
"len(ar)": "1,1,4",
"num(zh)": 13,
"len(zh)": "1,1,1",
"num(ja)": 36,
"len(ja)": "1,1,2",
"num(ja-kana)": 23,
"len(ja-kana)": "1,1,2",
"num(ko)": 0,
"len(ko)": "-"
},
"ClueAI/PromptCLUE-base": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClueAI/PromptCLUE-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">PromptCLUE-base</a>",
"organization": "CLUE",
"vocab_size": 32128,
"num(digit)": 740,
"len(digit)": "1,3,9",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 2,
"len(ar)": "1,1,1",
"num(zh)": 29591,
"len(zh)": "1,2,16",
"num(ja)": 29736,
"len(ja)": "1,2,16",
"num(ja-kana)": 145,
"len(ja-kana)": "1,1,2",
"num(ko)": 0,
"len(ko)": "-"
},
"CohereForAI/aya-101": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/CohereForAI/aya-101\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">aya-101</a>",
"organization": "Cohere For AI",
"vocab_size": 250100,
"num(digit)": 16829,
"len(digit)": "1,4,16",
"num(space)": 1,
"len(space)": "1,1,1",
"num(ar)": 7459,
"len(ar)": "1,3,16",
"num(zh)": 21489,
"len(zh)": "1,2,16",
"num(ja)": 27078,
"len(ja)": "1,2,16",
"num(ja-kana)": 9160,
"len(ja-kana)": "1,3,14",
"num(ko)": 4041,
"len(ko)": "1,1,10"
},
"EleutherAI/gpt-neox-20b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/EleutherAI/gpt-neox-20b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-neox-20b</a>",
"organization": "EleutherAI",
"vocab_size": 50277,
"num(digit)": 2036,
"len(digit)": "1,3,35",
"num(space)": 28996,
"len(space)": "1,7,512",
"num(ar)": 94,
"len(ar)": "1,2,4",
"num(zh)": 313,
"len(zh)": "1,1,2",
"num(ja)": 480,
"len(ja)": "1,1,4",
"num(ja-kana)": 167,
"len(ja-kana)": "1,1,4",
"num(ko)": 25,
"len(ko)": "1,1,2"
},
"HuggingFaceH4/starchat-alpha": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/HuggingFaceH4/starchat-alpha\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">starchat-alpha</a>",
"organization": "-",
"vocab_size": 49156,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 16515,
"len(space)": "1,6,256",
"num(ar)": 84,
"len(ar)": "1,2,4",
"num(zh)": 2030,
"len(zh)": "1,1,7",
"num(ja)": 2368,
"len(ja)": "1,1,8",
"num(ja-kana)": 360,
"len(ja-kana)": "1,2,8",
"num(ko)": 491,
"len(ko)": "1,2,5"
},
"HuggingFaceH4/zephyr-7b-beta": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/HuggingFaceH4/zephyr-7b-beta\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">zephyr-7b-beta</a>",
"organization": "HuggingFace",
"vocab_size": 32000,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 85,
"len(space)": "1,3,15",
"num(ar)": 71,
"len(ar)": "1,1,2",
"num(zh)": 1459,
"len(zh)": "1,1,2",
"num(ja)": 1593,
"len(ja)": "1,1,2",
"num(ja-kana)": 134,
"len(ja-kana)": "1,1,1",
"num(ko)": 346,
"len(ko)": "1,1,1"
},
"LLM360/CrystalCoder": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/LLM360/CrystalCoder\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">CrystalCoder</a>",
"organization": "MBZUAI",
"vocab_size": 32022,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"NousResearch/Llama-2-7b-chat-hf": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/NousResearch/Llama-2-7b-chat-hf\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama2</a>",
"organization": "Meta",
"vocab_size": 32001,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"OrionStarAI/Orion-14B-Chat": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/OrionStarAI/Orion-14B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Orion-14B-Chat</a>",
"organization": "OrionStar",
"vocab_size": 84608,
"num(digit)": 1559,
"len(digit)": "1,4,14",
"num(space)": 18383,
"len(space)": "1,6,16",
"num(ar)": 102,
"len(ar)": "1,1,1",
"num(zh)": 46998,
"len(zh)": "1,2,16",
"num(ja)": 49644,
"len(ja)": "1,2,16",
"num(ja-kana)": 2987,
"len(ja-kana)": "1,3,11",
"num(ko)": 5110,
"len(ko)": "1,2,7"
},
"Qwen/Qwen-7B-Chat": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen-7B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen</a>",
"organization": "Alibaba",
"vocab_size": 151851,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 55883,
"len(space)": "1,6,128",
"num(ar)": 4018,
"len(ar)": "1,3,12",
"num(zh)": 25557,
"len(zh)": "1,2,7",
"num(ja)": 27206,
"len(ja)": "1,2,11",
"num(ja-kana)": 2089,
"len(ja-kana)": "1,3,11",
"num(ko)": 3495,
"len(ko)": "1,1,5"
},
"Qwen/Qwen1.5-14B-Chat": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-14B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5</a>",
"organization": "Alibaba",
"vocab_size": 151646,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 55883,
"len(space)": "1,6,128",
"num(ar)": 4018,
"len(ar)": "1,3,12",
"num(zh)": 25557,
"len(zh)": "1,2,7",
"num(ja)": 27206,
"len(ja)": "1,2,11",
"num(ja-kana)": 2089,
"len(ja-kana)": "1,3,11",
"num(ko)": 3495,
"len(ko)": "1,1,5"
},
"Skywork/Skywork-13B-Math": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Skywork/Skywork-13B-Math\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Skywork-13B-Math</a>",
"organization": "Kunlun",
"vocab_size": 65519,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 62,
"len(space)": "1,2,15",
"num(ar)": 56,
"len(ar)": "1,1,2",
"num(zh)": 33913,
"len(zh)": "1,2,5",
"num(ja)": 34064,
"len(ja)": "1,2,5",
"num(ja-kana)": 150,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"Skywork/Skywork-13B-base": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Skywork/Skywork-13B-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Skywork-13B-base</a>",
"organization": "Kunlun",
"vocab_size": 65519,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 62,
"len(space)": "1,2,15",
"num(ar)": 56,
"len(ar)": "1,1,2",
"num(zh)": 33913,
"len(zh)": "1,2,5",
"num(ja)": 34064,
"len(ja)": "1,2,5",
"num(ja-kana)": 150,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"THUDM/chatglm-6b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm-6b</a>",
"organization": "Tsinghua",
"vocab_size": 130344,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 93,
"len(space)": "1,34,80",
"num(ar)": 137,
"len(ar)": "1,2,4",
"num(zh)": 61358,
"len(zh)": "1,2,16",
"num(ja)": 61784,
"len(ja)": "1,2,16",
"num(ja-kana)": 439,
"len(ja-kana)": "1,2,5",
"num(ko)": 114,
"len(ko)": "1,1,3"
},
"THUDM/chatglm2-6b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm2-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm2-6b</a>",
"organization": "Tsinghua",
"vocab_size": 64787,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 67,
"len(space)": "1,2,15",
"num(ar)": 57,
"len(ar)": "1,1,2",
"num(zh)": 30922,
"len(zh)": "1,2,16",
"num(ja)": 31065,
"len(ja)": "1,2,16",
"num(ja-kana)": 143,
"len(ja-kana)": "1,1,1",
"num(ko)": 604,
"len(ko)": "1,1,1"
},
"THUDM/chatglm3-6b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm3-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm3-6b</a>",
"organization": "Tsinghua",
"vocab_size": 64796,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 67,
"len(space)": "1,2,15",
"num(ar)": 57,
"len(ar)": "1,1,2",
"num(zh)": 30922,
"len(zh)": "1,2,16",
"num(ja)": 31065,
"len(ja)": "1,2,16",
"num(ja-kana)": 143,
"len(ja-kana)": "1,1,1",
"num(ko)": 604,
"len(ko)": "1,1,1"
},
"TigerResearch/tigerbot-13b-chat-v2": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/TigerResearch/tigerbot-13b-chat-v2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">tigerbot-13b-chat-v2</a>",
"organization": "Tigerobo",
"vocab_size": 60515,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 28603,
"len(zh)": "1,2,16",
"num(ja)": 28770,
"len(ja)": "1,2,16",
"num(ja-kana)": 167,
"len(ja-kana)": "1,1,2",
"num(ko)": 261,
"len(ko)": "1,1,1"
},
"TigerResearch/tigerbot-70b-chat-v4-4k": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/TigerResearch/tigerbot-70b-chat-v4-4k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">tigerbot-70b-chat-v4-4k</a>",
"organization": "Tigerobo",
"vocab_size": 65110,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 30509,
"len(zh)": "1,2,16",
"num(ja)": 32061,
"len(ja)": "1,2,16",
"num(ja-kana)": 2071,
"len(ja-kana)": "1,2,8",
"num(ko)": 1504,
"len(ko)": "1,1,5"
},
"Upstage/SOLAR-10.7B-v1.0": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Upstage/SOLAR-10.7B-v1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">SOLAR-10.7B-v1.0</a>",
"organization": "-",
"vocab_size": 32000,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 85,
"len(space)": "1,3,15",
"num(ar)": 71,
"len(ar)": "1,1,2",
"num(zh)": 1459,
"len(zh)": "1,1,2",
"num(ja)": 1593,
"len(ja)": "1,1,2",
"num(ja-kana)": 134,
"len(ja-kana)": "1,1,1",
"num(ko)": 346,
"len(ko)": "1,1,1"
},
"WizardLM/WizardCoder-15B-V1.0": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardCoder-15B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardCoder-15B-V1.0</a>",
"organization": "Microsoft",
"vocab_size": 49153,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 16515,
"len(space)": "1,6,256",
"num(ar)": 84,
"len(ar)": "1,2,4",
"num(zh)": 2030,
"len(zh)": "1,1,7",
"num(ja)": 2368,
"len(ja)": "1,1,8",
"num(ja-kana)": 360,
"len(ja-kana)": "1,2,8",
"num(ko)": 491,
"len(ko)": "1,2,5"
},
"WizardLM/WizardCoder-Python-7B-V1.0": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardCoder-Python-7B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardCoder-Python-7B-V1.0</a>",
"organization": "Microsoft",
"vocab_size": 32001,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"WizardLM/WizardLM-7B-V1.0": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardLM-7B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardLM-7B-V1.0</a>",
"organization": "Microsoft",
"vocab_size": 32001,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"WizardLM/WizardMath-70B-V1.0": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardMath-70B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardMath-70B-V1.0</a>",
"organization": "Microsoft",
"vocab_size": 32002,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"abeja/gpt-neox-japanese-2.7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/abeja/gpt-neox-japanese-2.7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-neox-japanese-2.7b</a>",
"organization": "ABEJA",
"vocab_size": 32000,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 15176,
"len(zh)": "1,2,2",
"num(ja)": 31482,
"len(ja)": "1,2,3",
"num(ja-kana)": 16306,
"len(ja-kana)": "1,3,3",
"num(ko)": 0,
"len(ko)": "-"
},
"ai21labs/Jamba-v0.1": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ai21labs/Jamba-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Jamba-v0.1</a>",
"organization": "AI21",
"vocab_size": 65536,
"num(digit)": 1556,
"len(digit)": "1,16,17",
"num(space)": 39501,
"len(space)": "1,7,32",
"num(ar)": 867,
"len(ar)": "1,3,8",
"num(zh)": 1157,
"len(zh)": "1,1,2",
"num(ja)": 1287,
"len(ja)": "1,1,2",
"num(ja-kana)": 130,
"len(ja-kana)": "1,1,2",
"num(ko)": 312,
"len(ko)": "1,1,2"
},
"allenai/OLMo-7B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/allenai/OLMo-7B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">OLMo-7B</a>",
"organization": "Allen AI",
"vocab_size": 50280,
"num(digit)": 2036,
"len(digit)": "1,3,35",
"num(space)": 29019,
"len(space)": "1,7,512",
"num(ar)": 94,
"len(ar)": "1,2,4",
"num(zh)": 313,
"len(zh)": "1,1,2",
"num(ja)": 480,
"len(ja)": "1,1,4",
"num(ja-kana)": 167,
"len(ja-kana)": "1,1,4",
"num(ko)": 25,
"len(ko)": "1,1,2"
},
"baichuan-inc/Baichuan2-7B-Chat": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">baichuan2</a>",
"organization": "Baichuan",
"vocab_size": 125696,
"num(digit)": 1023,
"len(digit)": "1,14,14",
"num(space)": 26013,
"len(space)": "1,7,32",
"num(ar)": 335,
"len(ar)": "1,1,27",
"num(zh)": 70398,
"len(zh)": "1,2,32",
"num(ja)": 71269,
"len(ja)": "1,2,32",
"num(ja-kana)": 206,
"len(ja-kana)": "1,1,9",
"num(ko)": 1595,
"len(ko)": "1,1,2"
},
"ckiplab/gpt2-base-chinese": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ckiplab/gpt2-base-chinese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-chinese</a>",
"organization": "SINICA",
"vocab_size": 21128,
"num(digit)": 1451,
"len(digit)": "1,3,12",
"num(space)": 2,
"len(space)": "1,2,3",
"num(ar)": 30,
"len(ar)": "1,2,3",
"num(zh)": 14642,
"len(zh)": "1,2,3",
"num(ja)": 15197,
"len(ja)": "1,3,15",
"num(ja-kana)": 553,
"len(ja-kana)": "1,3,15",
"num(ko)": 0,
"len(ko)": "-"
},
"cyberagent/open-calm-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/cyberagent/open-calm-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">open-calm-7b</a>",
"organization": "CyberAgent",
"vocab_size": 52000,
"num(digit)": 690,
"len(digit)": "1,3,5",
"num(space)": 1698,
"len(space)": "1,4,33",
"num(ar)": 10,
"len(ar)": "1,1,4",
"num(zh)": 30775,
"len(zh)": "1,3,31",
"num(ja)": 45790,
"len(ja)": "1,3,31",
"num(ja-kana)": 32535,
"len(ja-kana)": "1,3,31",
"num(ko)": 0,
"len(ko)": "-"
},
"databricks/dbrx-instruct": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/databricks/dbrx-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">dbrx-instruct</a>",
"organization": "Databricks",
"vocab_size": 100280,
"num(digit)": 1126,
"len(digit)": "1,3,17",
"num(space)": 47400,
"len(space)": "1,7,128",
"num(ar)": 113,
"len(ar)": "1,2,10",
"num(zh)": 868,
"len(zh)": "1,1,7",
"num(ja)": 1035,
"len(ja)": "1,1,7",
"num(ja-kana)": 169,
"len(ja-kana)": "1,1,7",
"num(ko)": 299,
"len(ko)": "1,2,4"
},
"deepseek-ai/DeepSeek-V2": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/DeepSeek-V2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">DeepSeek-V2</a>",
"organization": "DeepSeek",
"vocab_size": 100002,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 48073,
"len(space)": "1,7,128",
"num(ar)": 48,
"len(ar)": "1,1,4",
"num(zh)": 18052,
"len(zh)": "1,2,16",
"num(ja)": 18090,
"len(ja)": "1,2,16",
"num(ja-kana)": 38,
"len(ja-kana)": "1,1,2",
"num(ko)": 16,
"len(ko)": "1,1,2"
},
"deepseek-ai/deepseek-coder-33b-instruct": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">deepseek-coder-33b-instruct</a>",
"organization": "DeepSeek",
"vocab_size": 32022,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 15254,
"len(space)": "1,6,65",
"num(ar)": 12,
"len(ar)": "1,1,2",
"num(zh)": 4803,
"len(zh)": "1,2,4",
"num(ja)": 4804,
"len(ja)": "1,2,4",
"num(ja-kana)": 1,
"len(ja-kana)": "1,1,1",
"num(ko)": 0,
"len(ko)": "-"
},
"deepseek-ai/deepseek-llm-7b-base": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/deepseek-llm-7b-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">deepseek-llm-7b-base</a>",
"organization": "DeepSeek",
"vocab_size": 100015,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 48073,
"len(space)": "1,7,128",
"num(ar)": 48,
"len(ar)": "1,1,4",
"num(zh)": 18052,
"len(zh)": "1,2,16",
"num(ja)": 18090,
"len(ja)": "1,2,16",
"num(ja-kana)": 38,
"len(ja-kana)": "1,1,2",
"num(ko)": 16,
"len(ko)": "1,1,2"
},
"eson/kplug-base-encoder": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/eson/kplug-base-encoder\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">kplug</a>",
"organization": "JD",
"vocab_size": 10261,
"num(digit)": 420,
"len(digit)": "1,3,12",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 5764,
"len(zh)": "1,1,1",
"num(ja)": 5766,
"len(ja)": "1,1,3",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"fnlp/moss-moon-003-sft": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/fnlp/moss-moon-003-sft\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">moss-moon-003-sft</a>",
"organization": "Fudan",
"vocab_size": 106072,
"num(digit)": 1848,
"len(digit)": "1,3,16",
"num(space)": 33566,
"len(space)": "1,7,102",
"num(ar)": 25,
"len(ar)": "1,1,4",
"num(zh)": 54230,
"len(zh)": "1,2,15",
"num(ja)": 54381,
"len(ja)": "1,2,15",
"num(ja-kana)": 152,
"len(ja-kana)": "1,1,7",
"num(ko)": 0,
"len(ko)": "-"
},
"google/gemma-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/gemma-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gemma-7b</a>",
"organization": "Google",
"vocab_size": 256000,
"num(digit)": 134,
"len(digit)": "1,10,12",
"num(space)": 125662,
"len(space)": "1,7,31",
"num(ar)": 6274,
"len(ar)": "1,4,15",
"num(zh)": 23767,
"len(zh)": "1,2,12",
"num(ja)": 28852,
"len(ja)": "1,2,12",
"num(ja-kana)": 7061,
"len(ja-kana)": "1,3,12",
"num(ko)": 2295,
"len(ko)": "1,1,5"
},
"google/switch-c-2048": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/switch-c-2048\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">switch-c-2048</a>",
"organization": "Google",
"vocab_size": 32100,
"num(digit)": 1133,
"len(digit)": "1,3,13",
"num(space)": 0,
"len(space)": "-",
"num(ar)": 0,
"len(ar)": "-",
"num(zh)": 0,
"len(zh)": "-",
"num(ja)": 0,
"len(ja)": "-",
"num(ja-kana)": 0,
"len(ja-kana)": "-",
"num(ko)": 0,
"len(ko)": "-"
},
"hfl/chinese-alpaca-lora-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-alpaca-lora-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-alpaca-lora-7b</a>",
"organization": "-",
"vocab_size": 49954,
"num(digit)": 614,
"len(digit)": "1,3,5",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 17839,
"len(zh)": "1,2,13",
"num(ja)": 17993,
"len(ja)": "1,2,13",
"num(ja-kana)": 154,
"len(ja-kana)": "1,1,1",
"num(ko)": 135,
"len(ko)": "1,1,1"
},
"hfl/chinese-llama-2-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-llama-2-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-llama-2-7b</a>",
"organization": "-",
"vocab_size": 55296,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 23974,
"len(zh)": "1,2,16",
"num(ja)": 24111,
"len(ja)": "1,2,16",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"hfl/chinese-llama-lora-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-llama-lora-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-llama-lora-7b</a>",
"organization": "-",
"vocab_size": 49953,
"num(digit)": 614,
"len(digit)": "1,3,5",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 17839,
"len(zh)": "1,2,13",
"num(ja)": 17993,
"len(ja)": "1,2,13",
"num(ja-kana)": 154,
"len(ja-kana)": "1,1,1",
"num(ko)": 135,
"len(ko)": "1,1,1"
},
"hfl/llama-3-chinese-8b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/llama-3-chinese-8b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama-3-chinese-8b</a>",
"organization": "-",
"vocab_size": 128256,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 60860,
"len(space)": "1,6,128",
"num(ar)": 3810,
"len(ar)": "1,4,11",
"num(zh)": 4424,
"len(zh)": "1,1,7",
"num(ja)": 5387,
"len(ja)": "1,2,8",
"num(ja-kana)": 1086,
"len(ja-kana)": "1,2,8",
"num(ko)": 2281,
"len(ko)": "1,2,6"
},
"hpcai-tech/grok-1": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hpcai-tech/grok-1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">grok-1</a>",
"organization": "xAI",
"vocab_size": 131072,
"num(digit)": 40,
"len(digit)": "1,6,13",
"num(space)": 399,
"len(space)": "1,3,16",
"num(ar)": 69,
"len(ar)": "1,2,4",
"num(zh)": 1626,
"len(zh)": "1,2,7",
"num(ja)": 3118,
"len(ja)": "1,2,8",
"num(ja-kana)": 1908,
"len(ja-kana)": "1,2,8",
"num(ko)": 67,
"len(ko)": "1,1,2"
},
"internlm/internlm-chat-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm-chat-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm-chat-7b</a>",
"organization": "Shanghai AI Lab",
"vocab_size": 103168,
"num(digit)": 1259,
"len(digit)": "1,3,19",
"num(space)": 33008,
"len(space)": "1,6,128",
"num(ar)": 6702,
"len(ar)": "1,4,16",
"num(zh)": 32000,
"len(zh)": "1,2,15",
"num(ja)": 32866,
"len(ja)": "1,2,15",
"num(ja-kana)": 864,
"len(ja-kana)": "1,2,9",
"num(ko)": 298,
"len(ko)": "1,1,1"
},
"internlm/internlm-xcomposer-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm-xcomposer-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm-xcomposer-7b</a>",
"organization": "Shanghai AI Lab",
"vocab_size": 103168,
"num(digit)": 1261,
"len(digit)": "1,3,19",
"num(space)": 33008,
"len(space)": "1,6,128",
"num(ar)": 6702,
"len(ar)": "1,4,16",
"num(zh)": 32000,
"len(zh)": "1,2,15",
"num(ja)": 32866,
"len(ja)": "1,2,15",
"num(ja-kana)": 864,
"len(ja-kana)": "1,2,9",
"num(ko)": 298,
"len(ko)": "1,1,1"
},
"internlm/internlm2-chat-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm2-chat-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm2-chat-7b</a>",
"organization": "Shanghai AI Lab",
"vocab_size": 92544,
"num(digit)": 1261,
"len(digit)": "1,3,18",
"num(space)": 28681,
"len(space)": "1,7,128",
"num(ar)": 30,
"len(ar)": "1,1,1",
"num(zh)": 31148,
"len(zh)": "1,2,15",
"num(ja)": 31296,
"len(ja)": "1,2,15",
"num(ja-kana)": 148,
"len(ja-kana)": "1,1,1",
"num(ko)": 83,
"len(ko)": "1,1,1"
},
"internlm/internlm2-math-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm2-math-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm2-math-7b</a>",
"organization": "Shanghai AI Lab",
"vocab_size": 92544,
"num(digit)": 1261,
"len(digit)": "1,3,18",
"num(space)": 28681,
"len(space)": "1,7,128",
"num(ar)": 30,
"len(ar)": "1,1,1",
"num(zh)": 31148,
"len(zh)": "1,2,15",
"num(ja)": 31296,
"len(ja)": "1,2,15",
"num(ja-kana)": 148,
"len(ja-kana)": "1,1,1",
"num(ko)": 83,
"len(ko)": "1,1,1"
},
"microsoft/Phi-3-mini-4k-instruct": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Phi-3-mini-4k-instruct</a>",
"organization": "Microsoft",
"vocab_size": 32011,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 61,
"len(space)": "1,2,15",
"num(ar)": 55,
"len(ar)": "1,1,2",
"num(zh)": 700,
"len(zh)": "1,1,1",
"num(ja)": 837,
"len(ja)": "1,1,1",
"num(ja-kana)": 137,
"len(ja-kana)": "1,1,1",
"num(ko)": 111,
"len(ko)": "1,1,1"
},
"microsoft/phi-1": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/phi-1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">phi-1</a>",
"organization": "Microsoft",
"vocab_size": 50295,
"num(digit)": 1691,
"len(digit)": "1,3,16",
"num(space)": 33129,
"len(space)": "1,7,66",
"num(ar)": 22,
"len(ar)": "1,1,3",
"num(zh)": 51,
"len(zh)": "1,1,4",
"num(ja)": 183,
"len(ja)": "1,1,7",
"num(ja-kana)": 133,
"len(ja-kana)": "1,1,7",
"num(ko)": 0,
"len(ko)": "-"
},
"microsoft/phi-2": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/phi-2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">phi-2</a>",
"organization": "Microsoft",
"vocab_size": 50295,
"num(digit)": 1691,
"len(digit)": "1,3,16",
"num(space)": 33129,
"len(space)": "1,7,66",
"num(ar)": 22,
"len(ar)": "1,1,3",
"num(zh)": 51,
"len(zh)": "1,1,4",
"num(ja)": 183,
"len(ja)": "1,1,7",
"num(ja-kana)": 133,
"len(ja-kana)": "1,1,7",
"num(ko)": 0,
"len(ko)": "-"
},
"mistralai/Mistral-7B-v0.1": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/mistralai/Mistral-7B-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Mistral-7B-v0.1</a>",
"organization": "Mistral",
"vocab_size": 32000,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 85,
"len(space)": "1,3,15",
"num(ar)": 71,
"len(ar)": "1,1,2",
"num(zh)": 1459,
"len(zh)": "1,1,2",
"num(ja)": 1593,
"len(ja)": "1,1,2",
"num(ja-kana)": 134,
"len(ja-kana)": "1,1,1",
"num(ko)": 346,
"len(ko)": "1,1,1"
},
"mistralai/Mixtral-8x7B-v0.1": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/mistralai/Mixtral-8x7B-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Mixtral-8x7B-v0.1</a>",
"organization": "Mistral",
"vocab_size": 32000,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 85,
"len(space)": "1,3,15",
"num(ar)": 71,
"len(ar)": "1,1,2",
"num(zh)": 1459,
"len(zh)": "1,1,2",
"num(ja)": 1593,
"len(ja)": "1,1,2",
"num(ja-kana)": 134,
"len(ja-kana)": "1,1,1",
"num(ko)": 346,
"len(ko)": "1,1,1"
},
"openai-community/gpt2": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openai-community/gpt2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2</a>",
"organization": "OpenAI",
"vocab_size": 50257,
"num(digit)": 1691,
"len(digit)": "1,3,16",
"num(space)": 33129,
"len(space)": "1,7,66",
"num(ar)": 22,
"len(ar)": "1,1,3",
"num(zh)": 51,
"len(zh)": "1,1,4",
"num(ja)": 183,
"len(ja)": "1,1,7",
"num(ja-kana)": 133,
"len(ja-kana)": "1,1,7",
"num(ko)": 0,
"len(ko)": "-"
},
"openai/code-davinci-002": {
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">code-davinci-002</a>",
"organization": "OpenAI",
"vocab_size": 50281,
"num(digit)": 1691,
"len(digit)": "1,3,16",
"num(space)": 33175,
"len(space)": "1,7,66",
"num(ar)": 22,
"len(ar)": "1,1,3",
"num(zh)": 51,
"len(zh)": "1,1,4",
"num(ja)": 183,
"len(ja)": "1,1,7",
"num(ja-kana)": 133,
"len(ja-kana)": "1,1,7",
"num(ko)": 0,
"len(ko)": "-"
},
"openai/gpt-3.5-turbo": {
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-3.5-turbo</a>",
"organization": "OpenAI",
"vocab_size": 100277,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 47472,
"len(space)": "1,7,128",
"num(ar)": 113,
"len(ar)": "1,2,10",
"num(zh)": 868,
"len(zh)": "1,1,7",
"num(ja)": 1035,
"len(ja)": "1,1,7",
"num(ja-kana)": 169,
"len(ja-kana)": "1,1,7",
"num(ko)": 299,
"len(ko)": "1,2,4"
},
"openai/gpt-4o": {
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4o</a>",
"organization": "OpenAI",
"vocab_size": 200019,
"num(digit)": 1110,
"len(digit)": "1,3,3",
"num(space)": 109316,
"len(space)": "1,6,128",
"num(ar)": 8055,
"len(ar)": "1,4,12",
"num(zh)": 7563,
"len(zh)": "1,2,11",
"num(ja)": 8292,
"len(ja)": "1,2,11",
"num(ja-kana)": 809,
"len(ja-kana)": "1,2,11",
"num(ko)": 2365,
"len(ko)": "1,2,8"
},
"openai/text-davinci-003": {
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">text-davinci-003</a>",
"organization": "OpenAI",
"vocab_size": 50281,
"num(digit)": 1691,
"len(digit)": "1,3,16",
"num(space)": 33175,
"len(space)": "1,7,66",
"num(ar)": 22,
"len(ar)": "1,1,3",
"num(zh)": 51,
"len(zh)": "1,1,4",
"num(ja)": 183,
"len(ja)": "1,1,7",
"num(ja-kana)": 133,
"len(ja-kana)": "1,1,7",
"num(ko)": 0,
"len(ko)": "-"
},
"thu-coai/CharacterGLM-6B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/thu-coai/CharacterGLM-6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">CharacterGLM-6B</a>",
"organization": "Tsinghua",
"vocab_size": 64789,
"num(digit)": 20,
"len(digit)": "1,1,1",
"num(space)": 67,
"len(space)": "1,2,15",
"num(ar)": 57,
"len(ar)": "1,1,2",
"num(zh)": 30922,
"len(zh)": "1,2,16",
"num(ja)": 31065,
"len(ja)": "1,2,16",
"num(ja-kana)": 143,
"len(ja-kana)": "1,1,1",
"num(ko)": 604,
"len(ko)": "1,1,1"
},
"tiiuae/falcon-180b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tiiuae/falcon-180b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">falcon-180b</a>",
"organization": "TII",
"vocab_size": 65024,
"num(digit)": 1108,
"len(digit)": "1,3,3",
"num(space)": 40202,
"len(space)": "1,7,65",
"num(ar)": 21,
"len(ar)": "1,1,4",
"num(zh)": 1627,
"len(zh)": "1,1,3",
"num(ja)": 1652,
"len(ja)": "1,1,3",
"num(ja-kana)": 25,
"len(ja-kana)": "1,1,1",
"num(ko)": 1,
"len(ko)": "1,1,1"
},
"tiiuae/falcon-7b": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tiiuae/falcon-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">falcon-7b</a>",
"organization": "TII",
"vocab_size": 65024,
"num(digit)": 1108,
"len(digit)": "1,3,3",
"num(space)": 40202,
"len(space)": "1,7,65",
"num(ar)": 21,
"len(ar)": "1,1,4",
"num(zh)": 1627,
"len(zh)": "1,1,3",
"num(ja)": 1652,
"len(ja)": "1,1,3",
"num(ja-kana)": 25,
"len(ja-kana)": "1,1,1",
"num(ko)": 1,
"len(ko)": "1,1,1"
},
"Qwen/Qwen1.5-1.8B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-1.8B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5-1.8B</a>",
"organization": "Alibaba",
"vocab_size": 151646,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 55883,
"len(space)": "1,6,128",
"num(ar)": 4018,
"len(ar)": "1,3,12",
"num(zh)": 25557,
"len(zh)": "1,2,7",
"num(ja)": 27206,
"len(ja)": "1,2,11",
"num(ja-kana)": 2089,
"len(ja-kana)": "1,3,11",
"num(ko)": 3495,
"len(ko)": "1,1,5"
},
"Qwen/Qwen1.5-110B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-110B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5-110B</a>",
"organization": "Alibaba",
"vocab_size": 151646,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 55883,
"len(space)": "1,6,128",
"num(ar)": 4018,
"len(ar)": "1,3,12",
"num(zh)": 25557,
"len(zh)": "1,2,7",
"num(ja)": 27206,
"len(ja)": "1,2,11",
"num(ja-kana)": 2089,
"len(ja-kana)": "1,3,11",
"num(ko)": 3495,
"len(ko)": "1,1,5"
},
"Qwen/Qwen1.5-14B": {
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-14B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5-14B</a>",
"organization": "Alibaba",
"vocab_size": 151646,
"num(digit)": 10,
"len(digit)": "1,1,1",
"num(space)": 55883,
"len(space)": "1,6,128",
"num(ar)": 4018,
"len(ar)": "1,3,12",
"num(zh)": 25557,
"len(zh)": "1,2,7",
"num(ja)": 27206,
"len(ja)": "1,2,11",
"num(ja-kana)": 2089,
"len(ja-kana)": "1,3,11",
"num(ko)": 3495,
"len(ko)": "1,1,5"
}
}