{ "FacebookAI/xlm-roberta-base": { "tokenizer": "xlm-roberta-base", "organization": "Facebook", "vocab_size": 250002, "num(digit)": 2728, "len(digit)": "1,3,9", "num(space)": 1, "len(space)": "1,1,1", "num(ar)": 14644, "len(ar)": "1,4,16", "num(zh)": 18457, "len(zh)": "1,2,16", "num(ja)": 20572, "len(ja)": "1,2,16", "num(ja-kana)": 3434, "len(ja-kana)": "1,3,12", "num(ko)": 5373, "len(ko)": "1,2,8" }, "clue/roberta_chinese_clue_tiny": { "tokenizer": "roberta-chinese-clue", "organization": "CLUE", "vocab_size": 8021, "num(digit)": 230, "len(digit)": "1,4,10", "num(space)": 0, "len(space)": "-", "num(ar)": 30, "len(ar)": "1,2,3", "num(zh)": 5689, "len(zh)": "1,1,1", "num(ja)": 5691, "len(ja)": "1,1,3", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "dbmdz/bert-base-german-uncased": { "tokenizer": "bert-base-german-uncased", "organization": "dbmdz", "vocab_size": 31102, "num(digit)": 1733, "len(digit)": "1,4,12", "num(space)": 0, "len(space)": "-", "num(ar)": 0, "len(ar)": "-", "num(zh)": 0, "len(zh)": "-", "num(ja)": 0, "len(ja)": "-", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "google-bert/bert-base-cased": { "tokenizer": "bert-base-cased", "organization": "Google", "vocab_size": 28996, "num(digit)": 926, "len(digit)": "1,4,11", "num(space)": 0, "len(space)": "-", "num(ar)": 94, "len(ar)": "1,3,4", "num(zh)": 226, "len(zh)": "1,2,3", "num(ja)": 390, "len(ja)": "1,2,3", "num(ja-kana)": 164, "len(ja-kana)": "1,2,3", "num(ko)": 10, "len(ko)": "1,2,3" }, "google-bert/bert-base-chinese": { "tokenizer": "bert-base-chinese", "organization": "Google", "vocab_size": 21128, "num(digit)": 1451, "len(digit)": "1,3,12", "num(space)": 2, "len(space)": "1,2,3", "num(ar)": 30, "len(ar)": "1,2,3", "num(zh)": 14642, "len(zh)": "1,2,3", "num(ja)": 15197, "len(ja)": "1,3,15", "num(ja-kana)": 553, "len(ja-kana)": "1,3,15", "num(ko)": 0, "len(ko)": "-" }, "google-bert/bert-base-german-cased": { "tokenizer": "bert-base-german-cased", "organization": "Google", "vocab_size": 30000, "num(digit)": 4065, "len(digit)": "1,11,22", "num(space)": 0, "len(space)": "-", "num(ar)": 0, "len(ar)": "-", "num(zh)": 0, "len(zh)": "-", "num(ja)": 0, "len(ja)": "-", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "google-bert/bert-base-multilingual-cased": { "tokenizer": "bert-base-multilingual-cased", "organization": "Google", "vocab_size": 119547, "num(digit)": 2583, "len(digit)": "1,3,13", "num(space)": 0, "len(space)": "-", "num(ar)": 4873, "len(ar)": "1,5,14", "num(zh)": 13542, "len(zh)": "1,2,3", "num(ja)": 14880, "len(ja)": "1,3,10", "num(ja-kana)": 1336, "len(ja-kana)": "1,4,10", "num(ko)": 3271, "len(ko)": "1,3,6" }, "google-bert/bert-base-multilingual-uncased": { "tokenizer": "bert-base-multilingual-uncased", "organization": "Google", "vocab_size": 105879, "num(digit)": 2510, "len(digit)": "1,3,13", "num(space)": 2, "len(space)": "1,2,3", "num(ar)": 4530, "len(ar)": "1,5,13", "num(zh)": 16658, "len(zh)": "1,2,3", "num(ja)": 17858, "len(ja)": "1,3,10", "num(ja-kana)": 1188, "len(ja-kana)": "1,4,10", "num(ko)": 0, "len(ko)": "-" }, "google-bert/bert-base-uncased": { "tokenizer": "bert-base-uncased", "organization": "Google", "vocab_size": 30522, "num(digit)": 2056, "len(digit)": "1,4,11", "num(space)": 0, "len(space)": "-", "num(ar)": 88, "len(ar)": "1,3,5", "num(zh)": 488, "len(zh)": "1,2,3", "num(ja)": 676, "len(ja)": "1,2,3", "num(ja-kana)": 188, "len(ja-kana)": "1,2,3", "num(ko)": 0, "len(ko)": "-" }, "google/mobilebert-uncased": { "tokenizer": "mobilebert-uncased", "organization": "Google", "vocab_size": 30522, "num(digit)": 2056, "len(digit)": "1,4,11", "num(space)": 0, "len(space)": "-", "num(ar)": 88, "len(ar)": "1,3,5", "num(zh)": 488, "len(zh)": "1,2,3", "num(ja)": 676, "len(ja)": "1,2,3", "num(ja-kana)": 188, "len(ja-kana)": "1,2,3", "num(ko)": 0, "len(ko)": "-" }, "tohoku-nlp/bert-base-japanese": { "tokenizer": "bert-base-japanese", "organization": "Tohoku", "vocab_size": 32000, "num(digit)": 669, "len(digit)": "1,3,5", "num(space)": 0, "len(space)": "-", "num(ar)": 10, "len(ar)": "1,3,3", "num(zh)": 18792, "len(zh)": "1,2,11", "num(ja)": 28367, "len(ja)": "1,2,13", "num(ja-kana)": 12359, "len(ja-kana)": "1,4,13", "num(ko)": 0, "len(ko)": "-" }, "gpt-4": { "tokenizer": "gpt-4", "organization": "OpenAI", "vocab_size": 100277, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 47472, "len(space)": "1,7,128", "num(ar)": 113, "len(ar)": "1,2,10", "num(zh)": 868, "len(zh)": "1,1,7", "num(ja)": 1035, "len(ja)": "1,1,7", "num(ja-kana)": 169, "len(ja-kana)": "1,1,7", "num(ko)": 299, "len(ko)": "1,2,4" }, "llama3": { "tokenizer": "llama3", "organization": "Meta", "vocab_size": 128256, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 60860, "len(space)": "1,6,128", "num(ar)": 3810, "len(ar)": "1,4,11", "num(zh)": 4424, "len(zh)": "1,1,7", "num(ja)": 5387, "len(ja)": "1,2,8", "num(ja-kana)": 1086, "len(ja-kana)": "1,2,8", "num(ko)": 2281, "len(ko)": "1,2,6" }, "google-t5/t5-large": { "tokenizer": "t5", "organization": "Google", "vocab_size": 32100, "num(digit)": 1133, "len(digit)": "1,3,13", "num(space)": 0, "len(space)": "-", "num(ar)": 0, "len(ar)": "-", "num(zh)": 0, "len(zh)": "-", "num(ja)": 0, "len(ja)": "-", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "google/byt5-small": { "tokenizer": "byt5-small", "organization": "Google", "vocab_size": 384, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 10, "len(space)": "1,1,1", "num(ar)": 0, "len(ar)": "-", "num(zh)": 0, "len(zh)": "-", "num(ja)": 0, "len(ja)": "-", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "google/mt5-large": { "tokenizer": "mt5-large", "organization": "Google", "vocab_size": 250100, "num(digit)": 16829, "len(digit)": "1,4,16", "num(space)": 1, "len(space)": "1,1,1", "num(ar)": 7459, "len(ar)": "1,3,16", "num(zh)": 21489, "len(zh)": "1,2,16", "num(ja)": 27078, "len(ja)": "1,2,16", "num(ja-kana)": 9160, "len(ja-kana)": "1,3,14", "num(ko)": 4041, "len(ko)": "1,1,10" }, "lmsys/fastchat-t5-3b-v1.0": { "tokenizer": "fastchat-t5-3b-v1.0", "organization": "LMSYS", "vocab_size": 32110, "num(digit)": 1033, "len(digit)": "1,3,8", "num(space)": 0, "len(space)": "-", "num(ar)": 0, "len(ar)": "-", "num(zh)": 0, "len(zh)": "-", "num(ja)": 0, "len(ja)": "-", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "paust/pko-t5-large": { "tokenizer": "pko-t5-large", "organization": "PAUST", "vocab_size": 50358, "num(digit)": 51, "len(digit)": "1,2,3", "num(space)": 10, "len(space)": "1,1,1", "num(ar)": 0, "len(ar)": "-", "num(zh)": 0, "len(zh)": "-", "num(ja)": 0, "len(ja)": "-", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 49050, "len(ko)": "1,2,16" }, "bloom": { "tokenizer": "bloom", "organization": "BigScience", "vocab_size": 250680, "num(digit)": 6629, "len(digit)": "1,4,50", "num(space)": 140180, "len(space)": "1,6,600", "num(ar)": 20854, "len(ar)": "1,5,16", "num(zh)": 30603, "len(zh)": "1,2,23", "num(ja)": 30816, "len(ja)": "1,2,23", "num(ja-kana)": 214, "len(ja-kana)": "1,1,3", "num(ko)": 338, "len(ko)": "1,1,3" }, "llama": { "tokenizer": "llama", "organization": "Meta", "vocab_size": 32000, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "ClueAI/ChatYuan-large-v2": { "tokenizer": "ChatYuan-large-v2", "organization": "CLUE", "vocab_size": 32128, "num(digit)": 740, "len(digit)": "1,3,9", "num(space)": 0, "len(space)": "-", "num(ar)": 2, "len(ar)": "1,1,1", "num(zh)": 29591, "len(zh)": "1,2,16", "num(ja)": 29736, "len(ja)": "1,2,16", "num(ja-kana)": 145, "len(ja-kana)": "1,1,2", "num(ko)": 0, "len(ko)": "-" }, "Meta/llama3": { "tokenizer": "llama3", "organization": "Meta", "vocab_size": 128256, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 60860, "len(space)": "1,6,128", "num(ar)": 3810, "len(ar)": "1,4,11", "num(zh)": 4424, "len(zh)": "1,1,7", "num(ja)": 5387, "len(ja)": "1,2,8", "num(ja-kana)": 1086, "len(ja-kana)": "1,2,8", "num(ko)": 2281, "len(ko)": "1,2,6" }, "openai/gpt-4": { "tokenizer": "gpt-4", "organization": "OpenAI", "vocab_size": 100277, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 47472, "len(space)": "1,7,128", "num(ar)": 113, "len(ar)": "1,2,10", "num(zh)": 868, "len(zh)": "1,1,7", "num(ja)": 1035, "len(ja)": "1,1,7", "num(ja-kana)": 169, "len(ja-kana)": "1,1,7", "num(ko)": 299, "len(ko)": "1,2,4" }, "gradientai/Llama-3-8B-Instruct-Gradient-1048k": { "tokenizer": "llama3", "organization": "Meta", "vocab_size": 128256, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 60860, "len(space)": "1,6,128", "num(ar)": 3810, "len(ar)": "1,4,11", "num(zh)": 4424, "len(zh)": "1,1,7", "num(ja)": 5387, "len(ja)": "1,2,8", "num(ja-kana)": 1086, "len(ja-kana)": "1,2,8", "num(ko)": 2281, "len(ko)": "1,2,6" }, "bigscience/bloom": { "tokenizer": "bloom", "organization": "BigScience", "vocab_size": 250680, "num(digit)": 6629, "len(digit)": "1,4,50", "num(space)": 140180, "len(space)": "1,6,600", "num(ar)": 20854, "len(ar)": "1,5,16", "num(zh)": 30603, "len(zh)": "1,2,23", "num(ja)": 30816, "len(ja)": "1,2,23", "num(ja-kana)": 214, "len(ja-kana)": "1,1,3", "num(ko)": 338, "len(ko)": "1,1,3" }, "huggyllama/llama-7b": { "tokenizer": "llama", "organization": "Meta", "vocab_size": 32000, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "baichuan-inc/Baichuan-7B": { "tokenizer": "baichuan", "organization": "Baichuan", "vocab_size": 64000, "num(digit)": 335, "len(digit)": "1,14,14", "num(space)": 13, "len(space)": "1,1,1", "num(ar)": 299, "len(ar)": "1,1,2", "num(zh)": 27676, "len(zh)": "1,1,9", "num(ja)": 28522, "len(ja)": "1,1,9", "num(ja-kana)": 178, "len(ja-kana)": "1,1,1", "num(ko)": 1591, "len(ko)": "1,1,1" }, "01-ai/Yi-34B": { "tokenizer": "Yi-34B", "organization": "Yi", "vocab_size": 64000, "num(digit)": 200, "len(digit)": "1,13,15", "num(space)": 24274, "len(space)": "1,7,16", "num(ar)": 18, "len(ar)": "1,1,4", "num(zh)": 21356, "len(zh)": "1,2,12", "num(ja)": 21407, "len(ja)": "1,2,12", "num(ja-kana)": 51, "len(ja-kana)": "1,1,2", "num(ko)": 28, "len(ko)": "1,1,2" }, "01-ai/Yi-6B": { "tokenizer": "Yi-6B", "organization": "Yi", "vocab_size": 64000, "num(digit)": 200, "len(digit)": "1,13,15", "num(space)": 24274, "len(space)": "1,7,16", "num(ar)": 18, "len(ar)": "1,1,4", "num(zh)": 21356, "len(zh)": "1,2,12", "num(ja)": 21407, "len(ja)": "1,2,12", "num(ja-kana)": 51, "len(ja-kana)": "1,1,2", "num(ko)": 28, "len(ko)": "1,1,2" }, "01-ai/Yi-VL-34B": { "tokenizer": "Yi-VL-34B", "organization": "Yi", "vocab_size": 64000, "num(digit)": 200, "len(digit)": "1,13,15", "num(space)": 43, "len(space)": "1,2,15", "num(ar)": 18, "len(ar)": "1,1,4", "num(zh)": 21356, "len(zh)": "1,2,12", "num(ja)": 21407, "len(ja)": "1,2,12", "num(ja-kana)": 51, "len(ja-kana)": "1,1,2", "num(ko)": 28, "len(ko)": "1,1,2" }, "ClassCat/gpt2-base-french": { "tokenizer": "gpt2-base-french", "organization": "ClassCat", "vocab_size": 50000, "num(digit)": 1833, "len(digit)": "1,4,5", "num(space)": 31889, "len(space)": "1,7,32", "num(ar)": 41, "len(ar)": "1,1,4", "num(zh)": 27, "len(zh)": "1,1,1", "num(ja)": 46, "len(ja)": "1,1,2", "num(ja-kana)": 19, "len(ja-kana)": "1,1,2", "num(ko)": 0, "len(ko)": "-" }, "ClassCat/gpt2-base-spanish": { "tokenizer": "gpt2-base-spanish", "organization": "ClassCat", "vocab_size": 50000, "num(digit)": 1492, "len(digit)": "1,4,9", "num(space)": 34496, "len(space)": "1,8,32", "num(ar)": 36, "len(ar)": "1,1,4", "num(zh)": 13, "len(zh)": "1,1,1", "num(ja)": 36, "len(ja)": "1,1,2", "num(ja-kana)": 23, "len(ja-kana)": "1,1,2", "num(ko)": 0, "len(ko)": "-" }, "ClueAI/PromptCLUE-base": { "tokenizer": "PromptCLUE-base", "organization": "CLUE", "vocab_size": 32128, "num(digit)": 740, "len(digit)": "1,3,9", "num(space)": 0, "len(space)": "-", "num(ar)": 2, "len(ar)": "1,1,1", "num(zh)": 29591, "len(zh)": "1,2,16", "num(ja)": 29736, "len(ja)": "1,2,16", "num(ja-kana)": 145, "len(ja-kana)": "1,1,2", "num(ko)": 0, "len(ko)": "-" }, "CohereForAI/aya-101": { "tokenizer": "aya-101", "organization": "Cohere For AI", "vocab_size": 250100, "num(digit)": 16829, "len(digit)": "1,4,16", "num(space)": 1, "len(space)": "1,1,1", "num(ar)": 7459, "len(ar)": "1,3,16", "num(zh)": 21489, "len(zh)": "1,2,16", "num(ja)": 27078, "len(ja)": "1,2,16", "num(ja-kana)": 9160, "len(ja-kana)": "1,3,14", "num(ko)": 4041, "len(ko)": "1,1,10" }, "EleutherAI/gpt-neox-20b": { "tokenizer": "gpt-neox-20b", "organization": "EleutherAI", "vocab_size": 50277, "num(digit)": 2036, "len(digit)": "1,3,35", "num(space)": 28996, "len(space)": "1,7,512", "num(ar)": 94, "len(ar)": "1,2,4", "num(zh)": 313, "len(zh)": "1,1,2", "num(ja)": 480, "len(ja)": "1,1,4", "num(ja-kana)": 167, "len(ja-kana)": "1,1,4", "num(ko)": 25, "len(ko)": "1,1,2" }, "HuggingFaceH4/starchat-alpha": { "tokenizer": "starchat-alpha", "organization": "-", "vocab_size": 49156, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 16515, "len(space)": "1,6,256", "num(ar)": 84, "len(ar)": "1,2,4", "num(zh)": 2030, "len(zh)": "1,1,7", "num(ja)": 2368, "len(ja)": "1,1,8", "num(ja-kana)": 360, "len(ja-kana)": "1,2,8", "num(ko)": 491, "len(ko)": "1,2,5" }, "HuggingFaceH4/zephyr-7b-beta": { "tokenizer": "zephyr-7b-beta", "organization": "HuggingFace", "vocab_size": 32000, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 85, "len(space)": "1,3,15", "num(ar)": 71, "len(ar)": "1,1,2", "num(zh)": 1459, "len(zh)": "1,1,2", "num(ja)": 1593, "len(ja)": "1,1,2", "num(ja-kana)": 134, "len(ja-kana)": "1,1,1", "num(ko)": 346, "len(ko)": "1,1,1" }, "LLM360/CrystalCoder": { "tokenizer": "CrystalCoder", "organization": "MBZUAI", "vocab_size": 32022, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "NousResearch/Llama-2-7b-chat-hf": { "tokenizer": "llama2", "organization": "Meta", "vocab_size": 32001, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "OrionStarAI/Orion-14B-Chat": { "tokenizer": "Orion-14B-Chat", "organization": "OrionStar", "vocab_size": 84608, "num(digit)": 1559, "len(digit)": "1,4,14", "num(space)": 18383, "len(space)": "1,6,16", "num(ar)": 102, "len(ar)": "1,1,1", "num(zh)": 46998, "len(zh)": "1,2,16", "num(ja)": 49644, "len(ja)": "1,2,16", "num(ja-kana)": 2987, "len(ja-kana)": "1,3,11", "num(ko)": 5110, "len(ko)": "1,2,7" }, "Qwen/Qwen-7B-Chat": { "tokenizer": "Qwen", "organization": "Alibaba", "vocab_size": 151851, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 55883, "len(space)": "1,6,128", "num(ar)": 4018, "len(ar)": "1,3,12", "num(zh)": 25557, "len(zh)": "1,2,7", "num(ja)": 27206, "len(ja)": "1,2,11", "num(ja-kana)": 2089, "len(ja-kana)": "1,3,11", "num(ko)": 3495, "len(ko)": "1,1,5" }, "Qwen/Qwen1.5-14B-Chat": { "tokenizer": "Qwen1.5", "organization": "Alibaba", "vocab_size": 151646, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 55883, "len(space)": "1,6,128", "num(ar)": 4018, "len(ar)": "1,3,12", "num(zh)": 25557, "len(zh)": "1,2,7", "num(ja)": 27206, "len(ja)": "1,2,11", "num(ja-kana)": 2089, "len(ja-kana)": "1,3,11", "num(ko)": 3495, "len(ko)": "1,1,5" }, "Skywork/Skywork-13B-Math": { "tokenizer": "Skywork-13B-Math", "organization": "Kunlun", "vocab_size": 65519, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 62, "len(space)": "1,2,15", "num(ar)": 56, "len(ar)": "1,1,2", "num(zh)": 33913, "len(zh)": "1,2,5", "num(ja)": 34064, "len(ja)": "1,2,5", "num(ja-kana)": 150, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "Skywork/Skywork-13B-base": { "tokenizer": "Skywork-13B-base", "organization": "Kunlun", "vocab_size": 65519, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 62, "len(space)": "1,2,15", "num(ar)": 56, "len(ar)": "1,1,2", "num(zh)": 33913, "len(zh)": "1,2,5", "num(ja)": 34064, "len(ja)": "1,2,5", "num(ja-kana)": 150, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "THUDM/chatglm-6b": { "tokenizer": "chatglm-6b", "organization": "Tsinghua", "vocab_size": 130344, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 93, "len(space)": "1,34,80", "num(ar)": 137, "len(ar)": "1,2,4", "num(zh)": 61358, "len(zh)": "1,2,16", "num(ja)": 61784, "len(ja)": "1,2,16", "num(ja-kana)": 439, "len(ja-kana)": "1,2,5", "num(ko)": 114, "len(ko)": "1,1,3" }, "THUDM/chatglm2-6b": { "tokenizer": "chatglm2-6b", "organization": "Tsinghua", "vocab_size": 64787, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 67, "len(space)": "1,2,15", "num(ar)": 57, "len(ar)": "1,1,2", "num(zh)": 30922, "len(zh)": "1,2,16", "num(ja)": 31065, "len(ja)": "1,2,16", "num(ja-kana)": 143, "len(ja-kana)": "1,1,1", "num(ko)": 604, "len(ko)": "1,1,1" }, "THUDM/chatglm3-6b": { "tokenizer": "chatglm3-6b", "organization": "Tsinghua", "vocab_size": 64796, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 67, "len(space)": "1,2,15", "num(ar)": 57, "len(ar)": "1,1,2", "num(zh)": 30922, "len(zh)": "1,2,16", "num(ja)": 31065, "len(ja)": "1,2,16", "num(ja-kana)": 143, "len(ja-kana)": "1,1,1", "num(ko)": 604, "len(ko)": "1,1,1" }, "TigerResearch/tigerbot-13b-chat-v2": { "tokenizer": "tigerbot-13b-chat-v2", "organization": "Tigerobo", "vocab_size": 60515, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 28603, "len(zh)": "1,2,16", "num(ja)": 28770, "len(ja)": "1,2,16", "num(ja-kana)": 167, "len(ja-kana)": "1,1,2", "num(ko)": 261, "len(ko)": "1,1,1" }, "TigerResearch/tigerbot-70b-chat-v4-4k": { "tokenizer": "tigerbot-70b-chat-v4-4k", "organization": "Tigerobo", "vocab_size": 65110, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 30509, "len(zh)": "1,2,16", "num(ja)": 32061, "len(ja)": "1,2,16", "num(ja-kana)": 2071, "len(ja-kana)": "1,2,8", "num(ko)": 1504, "len(ko)": "1,1,5" }, "Upstage/SOLAR-10.7B-v1.0": { "tokenizer": "SOLAR-10.7B-v1.0", "organization": "-", "vocab_size": 32000, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 85, "len(space)": "1,3,15", "num(ar)": 71, "len(ar)": "1,1,2", "num(zh)": 1459, "len(zh)": "1,1,2", "num(ja)": 1593, "len(ja)": "1,1,2", "num(ja-kana)": 134, "len(ja-kana)": "1,1,1", "num(ko)": 346, "len(ko)": "1,1,1" }, "WizardLM/WizardCoder-15B-V1.0": { "tokenizer": "WizardCoder-15B-V1.0", "organization": "Microsoft", "vocab_size": 49153, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 16515, "len(space)": "1,6,256", "num(ar)": 84, "len(ar)": "1,2,4", "num(zh)": 2030, "len(zh)": "1,1,7", "num(ja)": 2368, "len(ja)": "1,1,8", "num(ja-kana)": 360, "len(ja-kana)": "1,2,8", "num(ko)": 491, "len(ko)": "1,2,5" }, "WizardLM/WizardCoder-Python-7B-V1.0": { "tokenizer": "WizardCoder-Python-7B-V1.0", "organization": "Microsoft", "vocab_size": 32001, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "WizardLM/WizardLM-7B-V1.0": { "tokenizer": "WizardLM-7B-V1.0", "organization": "Microsoft", "vocab_size": 32001, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "WizardLM/WizardMath-70B-V1.0": { "tokenizer": "WizardMath-70B-V1.0", "organization": "Microsoft", "vocab_size": 32002, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "abeja/gpt-neox-japanese-2.7b": { "tokenizer": "gpt-neox-japanese-2.7b", "organization": "ABEJA", "vocab_size": 32000, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 0, "len(space)": "-", "num(ar)": 0, "len(ar)": "-", "num(zh)": 15176, "len(zh)": "1,2,2", "num(ja)": 31482, "len(ja)": "1,2,3", "num(ja-kana)": 16306, "len(ja-kana)": "1,3,3", "num(ko)": 0, "len(ko)": "-" }, "ai21labs/Jamba-v0.1": { "tokenizer": "Jamba-v0.1", "organization": "AI21", "vocab_size": 65536, "num(digit)": 1556, "len(digit)": "1,16,17", "num(space)": 39501, "len(space)": "1,7,32", "num(ar)": 867, "len(ar)": "1,3,8", "num(zh)": 1157, "len(zh)": "1,1,2", "num(ja)": 1287, "len(ja)": "1,1,2", "num(ja-kana)": 130, "len(ja-kana)": "1,1,2", "num(ko)": 312, "len(ko)": "1,1,2" }, "allenai/OLMo-7B": { "tokenizer": "OLMo-7B", "organization": "Allen AI", "vocab_size": 50280, "num(digit)": 2036, "len(digit)": "1,3,35", "num(space)": 29019, "len(space)": "1,7,512", "num(ar)": 94, "len(ar)": "1,2,4", "num(zh)": 313, "len(zh)": "1,1,2", "num(ja)": 480, "len(ja)": "1,1,4", "num(ja-kana)": 167, "len(ja-kana)": "1,1,4", "num(ko)": 25, "len(ko)": "1,1,2" }, "baichuan-inc/Baichuan2-7B-Chat": { "tokenizer": "baichuan2", "organization": "Baichuan", "vocab_size": 125696, "num(digit)": 1023, "len(digit)": "1,14,14", "num(space)": 26013, "len(space)": "1,7,32", "num(ar)": 335, "len(ar)": "1,1,27", "num(zh)": 70398, "len(zh)": "1,2,32", "num(ja)": 71269, "len(ja)": "1,2,32", "num(ja-kana)": 206, "len(ja-kana)": "1,1,9", "num(ko)": 1595, "len(ko)": "1,1,2" }, "ckiplab/gpt2-base-chinese": { "tokenizer": "gpt2-base-chinese", "organization": "SINICA", "vocab_size": 21128, "num(digit)": 1451, "len(digit)": "1,3,12", "num(space)": 2, "len(space)": "1,2,3", "num(ar)": 30, "len(ar)": "1,2,3", "num(zh)": 14642, "len(zh)": "1,2,3", "num(ja)": 15197, "len(ja)": "1,3,15", "num(ja-kana)": 553, "len(ja-kana)": "1,3,15", "num(ko)": 0, "len(ko)": "-" }, "cyberagent/open-calm-7b": { "tokenizer": "open-calm-7b", "organization": "CyberAgent", "vocab_size": 52000, "num(digit)": 690, "len(digit)": "1,3,5", "num(space)": 1698, "len(space)": "1,4,33", "num(ar)": 10, "len(ar)": "1,1,4", "num(zh)": 30775, "len(zh)": "1,3,31", "num(ja)": 45790, "len(ja)": "1,3,31", "num(ja-kana)": 32535, "len(ja-kana)": "1,3,31", "num(ko)": 0, "len(ko)": "-" }, "databricks/dbrx-instruct": { "tokenizer": "dbrx-instruct", "organization": "Databricks", "vocab_size": 100280, "num(digit)": 1126, "len(digit)": "1,3,17", "num(space)": 47400, "len(space)": "1,7,128", "num(ar)": 113, "len(ar)": "1,2,10", "num(zh)": 868, "len(zh)": "1,1,7", "num(ja)": 1035, "len(ja)": "1,1,7", "num(ja-kana)": 169, "len(ja-kana)": "1,1,7", "num(ko)": 299, "len(ko)": "1,2,4" }, "deepseek-ai/DeepSeek-V2": { "tokenizer": "DeepSeek-V2", "organization": "DeepSeek", "vocab_size": 100002, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 48073, "len(space)": "1,7,128", "num(ar)": 48, "len(ar)": "1,1,4", "num(zh)": 18052, "len(zh)": "1,2,16", "num(ja)": 18090, "len(ja)": "1,2,16", "num(ja-kana)": 38, "len(ja-kana)": "1,1,2", "num(ko)": 16, "len(ko)": "1,1,2" }, "deepseek-ai/deepseek-coder-33b-instruct": { "tokenizer": "deepseek-coder-33b-instruct", "organization": "DeepSeek", "vocab_size": 32022, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 15254, "len(space)": "1,6,65", "num(ar)": 12, "len(ar)": "1,1,2", "num(zh)": 4803, "len(zh)": "1,2,4", "num(ja)": 4804, "len(ja)": "1,2,4", "num(ja-kana)": 1, "len(ja-kana)": "1,1,1", "num(ko)": 0, "len(ko)": "-" }, "deepseek-ai/deepseek-llm-7b-base": { "tokenizer": "deepseek-llm-7b-base", "organization": "DeepSeek", "vocab_size": 100015, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 48073, "len(space)": "1,7,128", "num(ar)": 48, "len(ar)": "1,1,4", "num(zh)": 18052, "len(zh)": "1,2,16", "num(ja)": 18090, "len(ja)": "1,2,16", "num(ja-kana)": 38, "len(ja-kana)": "1,1,2", "num(ko)": 16, "len(ko)": "1,1,2" }, "eson/kplug-base-encoder": { "tokenizer": "kplug", "organization": "JD", "vocab_size": 10261, "num(digit)": 420, "len(digit)": "1,3,12", "num(space)": 0, "len(space)": "-", "num(ar)": 0, "len(ar)": "-", "num(zh)": 5764, "len(zh)": "1,1,1", "num(ja)": 5766, "len(ja)": "1,1,3", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "fnlp/moss-moon-003-sft": { "tokenizer": "moss-moon-003-sft", "organization": "Fudan", "vocab_size": 106072, "num(digit)": 1848, "len(digit)": "1,3,16", "num(space)": 33566, "len(space)": "1,7,102", "num(ar)": 25, "len(ar)": "1,1,4", "num(zh)": 54230, "len(zh)": "1,2,15", "num(ja)": 54381, "len(ja)": "1,2,15", "num(ja-kana)": 152, "len(ja-kana)": "1,1,7", "num(ko)": 0, "len(ko)": "-" }, "google/gemma-7b": { "tokenizer": "gemma-7b", "organization": "Google", "vocab_size": 256000, "num(digit)": 134, "len(digit)": "1,10,12", "num(space)": 125662, "len(space)": "1,7,31", "num(ar)": 6274, "len(ar)": "1,4,15", "num(zh)": 23767, "len(zh)": "1,2,12", "num(ja)": 28852, "len(ja)": "1,2,12", "num(ja-kana)": 7061, "len(ja-kana)": "1,3,12", "num(ko)": 2295, "len(ko)": "1,1,5" }, "google/switch-c-2048": { "tokenizer": "switch-c-2048", "organization": "Google", "vocab_size": 32100, "num(digit)": 1133, "len(digit)": "1,3,13", "num(space)": 0, "len(space)": "-", "num(ar)": 0, "len(ar)": "-", "num(zh)": 0, "len(zh)": "-", "num(ja)": 0, "len(ja)": "-", "num(ja-kana)": 0, "len(ja-kana)": "-", "num(ko)": 0, "len(ko)": "-" }, "hfl/chinese-alpaca-lora-7b": { "tokenizer": "chinese-alpaca-lora-7b", "organization": "-", "vocab_size": 49954, "num(digit)": 614, "len(digit)": "1,3,5", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 17839, "len(zh)": "1,2,13", "num(ja)": 17993, "len(ja)": "1,2,13", "num(ja-kana)": 154, "len(ja-kana)": "1,1,1", "num(ko)": 135, "len(ko)": "1,1,1" }, "hfl/chinese-llama-2-7b": { "tokenizer": "chinese-llama-2-7b", "organization": "-", "vocab_size": 55296, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 23974, "len(zh)": "1,2,16", "num(ja)": 24111, "len(ja)": "1,2,16", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "hfl/chinese-llama-lora-7b": { "tokenizer": "chinese-llama-lora-7b", "organization": "-", "vocab_size": 49953, "num(digit)": 614, "len(digit)": "1,3,5", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 17839, "len(zh)": "1,2,13", "num(ja)": 17993, "len(ja)": "1,2,13", "num(ja-kana)": 154, "len(ja-kana)": "1,1,1", "num(ko)": 135, "len(ko)": "1,1,1" }, "hfl/llama-3-chinese-8b": { "tokenizer": "llama-3-chinese-8b", "organization": "-", "vocab_size": 128256, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 60860, "len(space)": "1,6,128", "num(ar)": 3810, "len(ar)": "1,4,11", "num(zh)": 4424, "len(zh)": "1,1,7", "num(ja)": 5387, "len(ja)": "1,2,8", "num(ja-kana)": 1086, "len(ja-kana)": "1,2,8", "num(ko)": 2281, "len(ko)": "1,2,6" }, "hpcai-tech/grok-1": { "tokenizer": "grok-1", "organization": "xAI", "vocab_size": 131072, "num(digit)": 40, "len(digit)": "1,6,13", "num(space)": 399, "len(space)": "1,3,16", "num(ar)": 69, "len(ar)": "1,2,4", "num(zh)": 1626, "len(zh)": "1,2,7", "num(ja)": 3118, "len(ja)": "1,2,8", "num(ja-kana)": 1908, "len(ja-kana)": "1,2,8", "num(ko)": 67, "len(ko)": "1,1,2" }, "internlm/internlm-chat-7b": { "tokenizer": "internlm-chat-7b", "organization": "Shanghai AI Lab", "vocab_size": 103168, "num(digit)": 1259, "len(digit)": "1,3,19", "num(space)": 33008, "len(space)": "1,6,128", "num(ar)": 6702, "len(ar)": "1,4,16", "num(zh)": 32000, "len(zh)": "1,2,15", "num(ja)": 32866, "len(ja)": "1,2,15", "num(ja-kana)": 864, "len(ja-kana)": "1,2,9", "num(ko)": 298, "len(ko)": "1,1,1" }, "internlm/internlm-xcomposer-7b": { "tokenizer": "internlm-xcomposer-7b", "organization": "Shanghai AI Lab", "vocab_size": 103168, "num(digit)": 1261, "len(digit)": "1,3,19", "num(space)": 33008, "len(space)": "1,6,128", "num(ar)": 6702, "len(ar)": "1,4,16", "num(zh)": 32000, "len(zh)": "1,2,15", "num(ja)": 32866, "len(ja)": "1,2,15", "num(ja-kana)": 864, "len(ja-kana)": "1,2,9", "num(ko)": 298, "len(ko)": "1,1,1" }, "internlm/internlm2-chat-7b": { "tokenizer": "internlm2-chat-7b", "organization": "Shanghai AI Lab", "vocab_size": 92544, "num(digit)": 1261, "len(digit)": "1,3,18", "num(space)": 28681, "len(space)": "1,7,128", "num(ar)": 30, "len(ar)": "1,1,1", "num(zh)": 31148, "len(zh)": "1,2,15", "num(ja)": 31296, "len(ja)": "1,2,15", "num(ja-kana)": 148, "len(ja-kana)": "1,1,1", "num(ko)": 83, "len(ko)": "1,1,1" }, "internlm/internlm2-math-7b": { "tokenizer": "internlm2-math-7b", "organization": "Shanghai AI Lab", "vocab_size": 92544, "num(digit)": 1261, "len(digit)": "1,3,18", "num(space)": 28681, "len(space)": "1,7,128", "num(ar)": 30, "len(ar)": "1,1,1", "num(zh)": 31148, "len(zh)": "1,2,15", "num(ja)": 31296, "len(ja)": "1,2,15", "num(ja-kana)": 148, "len(ja-kana)": "1,1,1", "num(ko)": 83, "len(ko)": "1,1,1" }, "microsoft/Phi-3-mini-4k-instruct": { "tokenizer": "Phi-3-mini-4k-instruct", "organization": "Microsoft", "vocab_size": 32011, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 61, "len(space)": "1,2,15", "num(ar)": 55, "len(ar)": "1,1,2", "num(zh)": 700, "len(zh)": "1,1,1", "num(ja)": 837, "len(ja)": "1,1,1", "num(ja-kana)": 137, "len(ja-kana)": "1,1,1", "num(ko)": 111, "len(ko)": "1,1,1" }, "microsoft/phi-1": { "tokenizer": "phi-1", "organization": "Microsoft", "vocab_size": 50295, "num(digit)": 1691, "len(digit)": "1,3,16", "num(space)": 33129, "len(space)": "1,7,66", "num(ar)": 22, "len(ar)": "1,1,3", "num(zh)": 51, "len(zh)": "1,1,4", "num(ja)": 183, "len(ja)": "1,1,7", "num(ja-kana)": 133, "len(ja-kana)": "1,1,7", "num(ko)": 0, "len(ko)": "-" }, "microsoft/phi-2": { "tokenizer": "phi-2", "organization": "Microsoft", "vocab_size": 50295, "num(digit)": 1691, "len(digit)": "1,3,16", "num(space)": 33129, "len(space)": "1,7,66", "num(ar)": 22, "len(ar)": "1,1,3", "num(zh)": 51, "len(zh)": "1,1,4", "num(ja)": 183, "len(ja)": "1,1,7", "num(ja-kana)": 133, "len(ja-kana)": "1,1,7", "num(ko)": 0, "len(ko)": "-" }, "mistralai/Mistral-7B-v0.1": { "tokenizer": "Mistral-7B-v0.1", "organization": "Mistral", "vocab_size": 32000, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 85, "len(space)": "1,3,15", "num(ar)": 71, "len(ar)": "1,1,2", "num(zh)": 1459, "len(zh)": "1,1,2", "num(ja)": 1593, "len(ja)": "1,1,2", "num(ja-kana)": 134, "len(ja-kana)": "1,1,1", "num(ko)": 346, "len(ko)": "1,1,1" }, "mistralai/Mixtral-8x7B-v0.1": { "tokenizer": "Mixtral-8x7B-v0.1", "organization": "Mistral", "vocab_size": 32000, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 85, "len(space)": "1,3,15", "num(ar)": 71, "len(ar)": "1,1,2", "num(zh)": 1459, "len(zh)": "1,1,2", "num(ja)": 1593, "len(ja)": "1,1,2", "num(ja-kana)": 134, "len(ja-kana)": "1,1,1", "num(ko)": 346, "len(ko)": "1,1,1" }, "openai-community/gpt2": { "tokenizer": "gpt2", "organization": "OpenAI", "vocab_size": 50257, "num(digit)": 1691, "len(digit)": "1,3,16", "num(space)": 33129, "len(space)": "1,7,66", "num(ar)": 22, "len(ar)": "1,1,3", "num(zh)": 51, "len(zh)": "1,1,4", "num(ja)": 183, "len(ja)": "1,1,7", "num(ja-kana)": 133, "len(ja-kana)": "1,1,7", "num(ko)": 0, "len(ko)": "-" }, "openai/code-davinci-002": { "tokenizer": "code-davinci-002", "organization": "OpenAI", "vocab_size": 50281, "num(digit)": 1691, "len(digit)": "1,3,16", "num(space)": 33175, "len(space)": "1,7,66", "num(ar)": 22, "len(ar)": "1,1,3", "num(zh)": 51, "len(zh)": "1,1,4", "num(ja)": 183, "len(ja)": "1,1,7", "num(ja-kana)": 133, "len(ja-kana)": "1,1,7", "num(ko)": 0, "len(ko)": "-" }, "openai/gpt-3.5-turbo": { "tokenizer": "gpt-3.5-turbo", "organization": "OpenAI", "vocab_size": 100277, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 47472, "len(space)": "1,7,128", "num(ar)": 113, "len(ar)": "1,2,10", "num(zh)": 868, "len(zh)": "1,1,7", "num(ja)": 1035, "len(ja)": "1,1,7", "num(ja-kana)": 169, "len(ja-kana)": "1,1,7", "num(ko)": 299, "len(ko)": "1,2,4" }, "openai/gpt-4o": { "tokenizer": "gpt-4o", "organization": "OpenAI", "vocab_size": 200019, "num(digit)": 1110, "len(digit)": "1,3,3", "num(space)": 109316, "len(space)": "1,6,128", "num(ar)": 8055, "len(ar)": "1,4,12", "num(zh)": 7563, "len(zh)": "1,2,11", "num(ja)": 8292, "len(ja)": "1,2,11", "num(ja-kana)": 809, "len(ja-kana)": "1,2,11", "num(ko)": 2365, "len(ko)": "1,2,8" }, "openai/text-davinci-003": { "tokenizer": "text-davinci-003", "organization": "OpenAI", "vocab_size": 50281, "num(digit)": 1691, "len(digit)": "1,3,16", "num(space)": 33175, "len(space)": "1,7,66", "num(ar)": 22, "len(ar)": "1,1,3", "num(zh)": 51, "len(zh)": "1,1,4", "num(ja)": 183, "len(ja)": "1,1,7", "num(ja-kana)": 133, "len(ja-kana)": "1,1,7", "num(ko)": 0, "len(ko)": "-" }, "thu-coai/CharacterGLM-6B": { "tokenizer": "CharacterGLM-6B", "organization": "Tsinghua", "vocab_size": 64789, "num(digit)": 20, "len(digit)": "1,1,1", "num(space)": 67, "len(space)": "1,2,15", "num(ar)": 57, "len(ar)": "1,1,2", "num(zh)": 30922, "len(zh)": "1,2,16", "num(ja)": 31065, "len(ja)": "1,2,16", "num(ja-kana)": 143, "len(ja-kana)": "1,1,1", "num(ko)": 604, "len(ko)": "1,1,1" }, "tiiuae/falcon-180b": { "tokenizer": "falcon-180b", "organization": "TII", "vocab_size": 65024, "num(digit)": 1108, "len(digit)": "1,3,3", "num(space)": 40202, "len(space)": "1,7,65", "num(ar)": 21, "len(ar)": "1,1,4", "num(zh)": 1627, "len(zh)": "1,1,3", "num(ja)": 1652, "len(ja)": "1,1,3", "num(ja-kana)": 25, "len(ja-kana)": "1,1,1", "num(ko)": 1, "len(ko)": "1,1,1" }, "tiiuae/falcon-7b": { "tokenizer": "falcon-7b", "organization": "TII", "vocab_size": 65024, "num(digit)": 1108, "len(digit)": "1,3,3", "num(space)": 40202, "len(space)": "1,7,65", "num(ar)": 21, "len(ar)": "1,1,4", "num(zh)": 1627, "len(zh)": "1,1,3", "num(ja)": 1652, "len(ja)": "1,1,3", "num(ja-kana)": 25, "len(ja-kana)": "1,1,1", "num(ko)": 1, "len(ko)": "1,1,1" }, "Qwen/Qwen1.5-1.8B": { "tokenizer": "Qwen1.5-1.8B", "organization": "Alibaba", "vocab_size": 151646, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 55883, "len(space)": "1,6,128", "num(ar)": 4018, "len(ar)": "1,3,12", "num(zh)": 25557, "len(zh)": "1,2,7", "num(ja)": 27206, "len(ja)": "1,2,11", "num(ja-kana)": 2089, "len(ja-kana)": "1,3,11", "num(ko)": 3495, "len(ko)": "1,1,5" }, "Qwen/Qwen1.5-110B": { "tokenizer": "Qwen1.5-110B", "organization": "Alibaba", "vocab_size": 151646, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 55883, "len(space)": "1,6,128", "num(ar)": 4018, "len(ar)": "1,3,12", "num(zh)": 25557, "len(zh)": "1,2,7", "num(ja)": 27206, "len(ja)": "1,2,11", "num(ja-kana)": 2089, "len(ja-kana)": "1,3,11", "num(ko)": 3495, "len(ko)": "1,1,5" }, "Qwen/Qwen1.5-14B": { "tokenizer": "Qwen1.5-14B", "organization": "Alibaba", "vocab_size": 151646, "num(digit)": 10, "len(digit)": "1,1,1", "num(space)": 55883, "len(space)": "1,6,128", "num(ar)": 4018, "len(ar)": "1,3,12", "num(zh)": 25557, "len(zh)": "1,2,7", "num(ja)": 27206, "len(ja)": "1,2,11", "num(ja-kana)": 2089, "len(ja-kana)": "1,3,11", "num(ko)": 3495, "len(ko)": "1,1,5" } }