|
{ |
|
"FacebookAI/xlm-roberta-base": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/FacebookAI/xlm-roberta-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">xlm-roberta-base</a>", |
|
"organization": "Facebook", |
|
"vocab_size": 250002, |
|
"num(digit)": 2728, |
|
"len(digit)": "1,3,9", |
|
"num(space)": 1, |
|
"len(space)": "1,1,1", |
|
"num(ar)": 14644, |
|
"len(ar)": "1,4,16", |
|
"num(zh)": 18457, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 20572, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 3434, |
|
"len(ja-kana)": "1,3,12", |
|
"num(ko)": 5373, |
|
"len(ko)": "1,2,8" |
|
}, |
|
"clue/roberta_chinese_clue_tiny": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/clue/roberta_chinese_clue_tiny\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">roberta-chinese-clue</a>", |
|
"organization": "CLUE", |
|
"vocab_size": 8021, |
|
"num(digit)": 230, |
|
"len(digit)": "1,4,10", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 30, |
|
"len(ar)": "1,2,3", |
|
"num(zh)": 5689, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 5691, |
|
"len(ja)": "1,1,3", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"dbmdz/bert-base-german-uncased": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/dbmdz/bert-base-german-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-german-uncased</a>", |
|
"organization": "dbmdz", |
|
"vocab_size": 31102, |
|
"num(digit)": 1733, |
|
"len(digit)": "1,4,12", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 0, |
|
"len(zh)": "-", |
|
"num(ja)": 0, |
|
"len(ja)": "-", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google-bert/bert-base-cased": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-cased</a>", |
|
"organization": "Google", |
|
"vocab_size": 28996, |
|
"num(digit)": 926, |
|
"len(digit)": "1,4,11", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 94, |
|
"len(ar)": "1,3,4", |
|
"num(zh)": 226, |
|
"len(zh)": "1,2,3", |
|
"num(ja)": 390, |
|
"len(ja)": "1,2,3", |
|
"num(ja-kana)": 164, |
|
"len(ja-kana)": "1,2,3", |
|
"num(ko)": 10, |
|
"len(ko)": "1,2,3" |
|
}, |
|
"google-bert/bert-base-chinese": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-chinese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-chinese</a>", |
|
"organization": "Google", |
|
"vocab_size": 21128, |
|
"num(digit)": 1451, |
|
"len(digit)": "1,3,12", |
|
"num(space)": 2, |
|
"len(space)": "1,2,3", |
|
"num(ar)": 30, |
|
"len(ar)": "1,2,3", |
|
"num(zh)": 14642, |
|
"len(zh)": "1,2,3", |
|
"num(ja)": 15197, |
|
"len(ja)": "1,3,15", |
|
"num(ja-kana)": 553, |
|
"len(ja-kana)": "1,3,15", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google-bert/bert-base-german-cased": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-german-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-german-cased</a>", |
|
"organization": "Google", |
|
"vocab_size": 30000, |
|
"num(digit)": 4065, |
|
"len(digit)": "1,11,22", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 0, |
|
"len(zh)": "-", |
|
"num(ja)": 0, |
|
"len(ja)": "-", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google-bert/bert-base-multilingual-cased": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-multilingual-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-multilingual-cased</a>", |
|
"organization": "Google", |
|
"vocab_size": 119547, |
|
"num(digit)": 2583, |
|
"len(digit)": "1,3,13", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 4873, |
|
"len(ar)": "1,5,14", |
|
"num(zh)": 13542, |
|
"len(zh)": "1,2,3", |
|
"num(ja)": 14880, |
|
"len(ja)": "1,3,10", |
|
"num(ja-kana)": 1336, |
|
"len(ja-kana)": "1,4,10", |
|
"num(ko)": 3271, |
|
"len(ko)": "1,3,6" |
|
}, |
|
"google-bert/bert-base-multilingual-uncased": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-multilingual-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-multilingual-uncased</a>", |
|
"organization": "Google", |
|
"vocab_size": 105879, |
|
"num(digit)": 2510, |
|
"len(digit)": "1,3,13", |
|
"num(space)": 2, |
|
"len(space)": "1,2,3", |
|
"num(ar)": 4530, |
|
"len(ar)": "1,5,13", |
|
"num(zh)": 16658, |
|
"len(zh)": "1,2,3", |
|
"num(ja)": 17858, |
|
"len(ja)": "1,3,10", |
|
"num(ja-kana)": 1188, |
|
"len(ja-kana)": "1,4,10", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google-bert/bert-base-uncased": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-uncased</a>", |
|
"organization": "Google", |
|
"vocab_size": 30522, |
|
"num(digit)": 2056, |
|
"len(digit)": "1,4,11", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 88, |
|
"len(ar)": "1,3,5", |
|
"num(zh)": 488, |
|
"len(zh)": "1,2,3", |
|
"num(ja)": 676, |
|
"len(ja)": "1,2,3", |
|
"num(ja-kana)": 188, |
|
"len(ja-kana)": "1,2,3", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google/mobilebert-uncased": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/mobilebert-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">mobilebert-uncased</a>", |
|
"organization": "Google", |
|
"vocab_size": 30522, |
|
"num(digit)": 2056, |
|
"len(digit)": "1,4,11", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 88, |
|
"len(ar)": "1,3,5", |
|
"num(zh)": 488, |
|
"len(zh)": "1,2,3", |
|
"num(ja)": 676, |
|
"len(ja)": "1,2,3", |
|
"num(ja-kana)": 188, |
|
"len(ja-kana)": "1,2,3", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"tohoku-nlp/bert-base-japanese": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tohoku-nlp/bert-base-japanese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-japanese</a>", |
|
"organization": "Tohoku", |
|
"vocab_size": 32000, |
|
"num(digit)": 669, |
|
"len(digit)": "1,3,5", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 10, |
|
"len(ar)": "1,3,3", |
|
"num(zh)": 18792, |
|
"len(zh)": "1,2,11", |
|
"num(ja)": 28367, |
|
"len(ja)": "1,2,13", |
|
"num(ja-kana)": 12359, |
|
"len(ja-kana)": "1,4,13", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"gpt-4": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4</a>", |
|
"organization": "OpenAI", |
|
"vocab_size": 100277, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 47472, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 113, |
|
"len(ar)": "1,2,10", |
|
"num(zh)": 868, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 1035, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 169, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 299, |
|
"len(ko)": "1,2,4" |
|
}, |
|
"llama3": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>", |
|
"organization": "Meta", |
|
"vocab_size": 128256, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 60860, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 3810, |
|
"len(ar)": "1,4,11", |
|
"num(zh)": 4424, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 5387, |
|
"len(ja)": "1,2,8", |
|
"num(ja-kana)": 1086, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 2281, |
|
"len(ko)": "1,2,6" |
|
}, |
|
"google-t5/t5-large": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-t5/t5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">t5</a>", |
|
"organization": "Google", |
|
"vocab_size": 32100, |
|
"num(digit)": 1133, |
|
"len(digit)": "1,3,13", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 0, |
|
"len(zh)": "-", |
|
"num(ja)": 0, |
|
"len(ja)": "-", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google/byt5-small": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/byt5-small\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">byt5-small</a>", |
|
"organization": "Google", |
|
"vocab_size": 384, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 10, |
|
"len(space)": "1,1,1", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 0, |
|
"len(zh)": "-", |
|
"num(ja)": 0, |
|
"len(ja)": "-", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google/mt5-large": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/mt5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">mt5-large</a>", |
|
"organization": "Google", |
|
"vocab_size": 250100, |
|
"num(digit)": 16829, |
|
"len(digit)": "1,4,16", |
|
"num(space)": 1, |
|
"len(space)": "1,1,1", |
|
"num(ar)": 7459, |
|
"len(ar)": "1,3,16", |
|
"num(zh)": 21489, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 27078, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 9160, |
|
"len(ja-kana)": "1,3,14", |
|
"num(ko)": 4041, |
|
"len(ko)": "1,1,10" |
|
}, |
|
"lmsys/fastchat-t5-3b-v1.0": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/lmsys/fastchat-t5-3b-v1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">fastchat-t5-3b-v1.0</a>", |
|
"organization": "LMSYS", |
|
"vocab_size": 32110, |
|
"num(digit)": 1033, |
|
"len(digit)": "1,3,8", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 0, |
|
"len(zh)": "-", |
|
"num(ja)": 0, |
|
"len(ja)": "-", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"paust/pko-t5-large": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/paust/pko-t5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">pko-t5-large</a>", |
|
"organization": "PAUST", |
|
"vocab_size": 50358, |
|
"num(digit)": 51, |
|
"len(digit)": "1,2,3", |
|
"num(space)": 10, |
|
"len(space)": "1,1,1", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 0, |
|
"len(zh)": "-", |
|
"num(ja)": 0, |
|
"len(ja)": "-", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 49050, |
|
"len(ko)": "1,2,16" |
|
}, |
|
"bloom": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/bigscience/bloom\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bloom</a>", |
|
"organization": "BigScience", |
|
"vocab_size": 250680, |
|
"num(digit)": 6629, |
|
"len(digit)": "1,4,50", |
|
"num(space)": 140180, |
|
"len(space)": "1,6,600", |
|
"num(ar)": 20854, |
|
"len(ar)": "1,5,16", |
|
"num(zh)": 30603, |
|
"len(zh)": "1,2,23", |
|
"num(ja)": 30816, |
|
"len(ja)": "1,2,23", |
|
"num(ja-kana)": 214, |
|
"len(ja-kana)": "1,1,3", |
|
"num(ko)": 338, |
|
"len(ko)": "1,1,3" |
|
}, |
|
"llama": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/huggyllama/llama-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama</a>", |
|
"organization": "Meta", |
|
"vocab_size": 32000, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"ClueAI/ChatYuan-large-v2": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClueAI/ChatYuan-large-v2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">ChatYuan-large-v2</a>", |
|
"organization": "CLUE", |
|
"vocab_size": 32128, |
|
"num(digit)": 740, |
|
"len(digit)": "1,3,9", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 2, |
|
"len(ar)": "1,1,1", |
|
"num(zh)": 29591, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 29736, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 145, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"Meta/llama3": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>", |
|
"organization": "Meta", |
|
"vocab_size": 128256, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 60860, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 3810, |
|
"len(ar)": "1,4,11", |
|
"num(zh)": 4424, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 5387, |
|
"len(ja)": "1,2,8", |
|
"num(ja-kana)": 1086, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 2281, |
|
"len(ko)": "1,2,6" |
|
}, |
|
"openai/gpt-4": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4</a>", |
|
"organization": "OpenAI", |
|
"vocab_size": 100277, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 47472, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 113, |
|
"len(ar)": "1,2,10", |
|
"num(zh)": 868, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 1035, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 169, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 299, |
|
"len(ko)": "1,2,4" |
|
}, |
|
"gradientai/Llama-3-8B-Instruct-Gradient-1048k": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>", |
|
"organization": "Meta", |
|
"vocab_size": 128256, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 60860, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 3810, |
|
"len(ar)": "1,4,11", |
|
"num(zh)": 4424, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 5387, |
|
"len(ja)": "1,2,8", |
|
"num(ja-kana)": 1086, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 2281, |
|
"len(ko)": "1,2,6" |
|
}, |
|
"bigscience/bloom": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/bigscience/bloom\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bloom</a>", |
|
"organization": "BigScience", |
|
"vocab_size": 250680, |
|
"num(digit)": 6629, |
|
"len(digit)": "1,4,50", |
|
"num(space)": 140180, |
|
"len(space)": "1,6,600", |
|
"num(ar)": 20854, |
|
"len(ar)": "1,5,16", |
|
"num(zh)": 30603, |
|
"len(zh)": "1,2,23", |
|
"num(ja)": 30816, |
|
"len(ja)": "1,2,23", |
|
"num(ja-kana)": 214, |
|
"len(ja-kana)": "1,1,3", |
|
"num(ko)": 338, |
|
"len(ko)": "1,1,3" |
|
}, |
|
"huggyllama/llama-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/huggyllama/llama-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama</a>", |
|
"organization": "Meta", |
|
"vocab_size": 32000, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"baichuan-inc/Baichuan-7B": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/baichuan-inc/Baichuan-7B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">baichuan</a>", |
|
"organization": "Baichuan", |
|
"vocab_size": 64000, |
|
"num(digit)": 335, |
|
"len(digit)": "1,14,14", |
|
"num(space)": 13, |
|
"len(space)": "1,1,1", |
|
"num(ar)": 299, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 27676, |
|
"len(zh)": "1,1,9", |
|
"num(ja)": 28522, |
|
"len(ja)": "1,1,9", |
|
"num(ja-kana)": 178, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 1591, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"01-ai/Yi-34B": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-34B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-34B</a>", |
|
"organization": "Yi", |
|
"vocab_size": 64000, |
|
"num(digit)": 200, |
|
"len(digit)": "1,13,15", |
|
"num(space)": 24274, |
|
"len(space)": "1,7,16", |
|
"num(ar)": 18, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 21356, |
|
"len(zh)": "1,2,12", |
|
"num(ja)": 21407, |
|
"len(ja)": "1,2,12", |
|
"num(ja-kana)": 51, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 28, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"01-ai/Yi-6B": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-6B</a>", |
|
"organization": "Yi", |
|
"vocab_size": 64000, |
|
"num(digit)": 200, |
|
"len(digit)": "1,13,15", |
|
"num(space)": 24274, |
|
"len(space)": "1,7,16", |
|
"num(ar)": 18, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 21356, |
|
"len(zh)": "1,2,12", |
|
"num(ja)": 21407, |
|
"len(ja)": "1,2,12", |
|
"num(ja-kana)": 51, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 28, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"01-ai/Yi-VL-34B": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-VL-34B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-VL-34B</a>", |
|
"organization": "Yi", |
|
"vocab_size": 64000, |
|
"num(digit)": 200, |
|
"len(digit)": "1,13,15", |
|
"num(space)": 43, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 18, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 21356, |
|
"len(zh)": "1,2,12", |
|
"num(ja)": 21407, |
|
"len(ja)": "1,2,12", |
|
"num(ja-kana)": 51, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 28, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"ClassCat/gpt2-base-french": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClassCat/gpt2-base-french\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-french</a>", |
|
"organization": "ClassCat", |
|
"vocab_size": 50000, |
|
"num(digit)": 1833, |
|
"len(digit)": "1,4,5", |
|
"num(space)": 31889, |
|
"len(space)": "1,7,32", |
|
"num(ar)": 41, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 27, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 46, |
|
"len(ja)": "1,1,2", |
|
"num(ja-kana)": 19, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"ClassCat/gpt2-base-spanish": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClassCat/gpt2-base-spanish\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-spanish</a>", |
|
"organization": "ClassCat", |
|
"vocab_size": 50000, |
|
"num(digit)": 1492, |
|
"len(digit)": "1,4,9", |
|
"num(space)": 34496, |
|
"len(space)": "1,8,32", |
|
"num(ar)": 36, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 13, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 36, |
|
"len(ja)": "1,1,2", |
|
"num(ja-kana)": 23, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"ClueAI/PromptCLUE-base": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClueAI/PromptCLUE-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">PromptCLUE-base</a>", |
|
"organization": "CLUE", |
|
"vocab_size": 32128, |
|
"num(digit)": 740, |
|
"len(digit)": "1,3,9", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 2, |
|
"len(ar)": "1,1,1", |
|
"num(zh)": 29591, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 29736, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 145, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"CohereForAI/aya-101": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/CohereForAI/aya-101\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">aya-101</a>", |
|
"organization": "Cohere For AI", |
|
"vocab_size": 250100, |
|
"num(digit)": 16829, |
|
"len(digit)": "1,4,16", |
|
"num(space)": 1, |
|
"len(space)": "1,1,1", |
|
"num(ar)": 7459, |
|
"len(ar)": "1,3,16", |
|
"num(zh)": 21489, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 27078, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 9160, |
|
"len(ja-kana)": "1,3,14", |
|
"num(ko)": 4041, |
|
"len(ko)": "1,1,10" |
|
}, |
|
"EleutherAI/gpt-neox-20b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/EleutherAI/gpt-neox-20b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-neox-20b</a>", |
|
"organization": "EleutherAI", |
|
"vocab_size": 50277, |
|
"num(digit)": 2036, |
|
"len(digit)": "1,3,35", |
|
"num(space)": 28996, |
|
"len(space)": "1,7,512", |
|
"num(ar)": 94, |
|
"len(ar)": "1,2,4", |
|
"num(zh)": 313, |
|
"len(zh)": "1,1,2", |
|
"num(ja)": 480, |
|
"len(ja)": "1,1,4", |
|
"num(ja-kana)": 167, |
|
"len(ja-kana)": "1,1,4", |
|
"num(ko)": 25, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"HuggingFaceH4/starchat-alpha": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/HuggingFaceH4/starchat-alpha\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">starchat-alpha</a>", |
|
"organization": "-", |
|
"vocab_size": 49156, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 16515, |
|
"len(space)": "1,6,256", |
|
"num(ar)": 84, |
|
"len(ar)": "1,2,4", |
|
"num(zh)": 2030, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 2368, |
|
"len(ja)": "1,1,8", |
|
"num(ja-kana)": 360, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 491, |
|
"len(ko)": "1,2,5" |
|
}, |
|
"HuggingFaceH4/zephyr-7b-beta": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/HuggingFaceH4/zephyr-7b-beta\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">zephyr-7b-beta</a>", |
|
"organization": "HuggingFace", |
|
"vocab_size": 32000, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 85, |
|
"len(space)": "1,3,15", |
|
"num(ar)": 71, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 1459, |
|
"len(zh)": "1,1,2", |
|
"num(ja)": 1593, |
|
"len(ja)": "1,1,2", |
|
"num(ja-kana)": 134, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 346, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"LLM360/CrystalCoder": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/LLM360/CrystalCoder\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">CrystalCoder</a>", |
|
"organization": "MBZUAI", |
|
"vocab_size": 32022, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"NousResearch/Llama-2-7b-chat-hf": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/NousResearch/Llama-2-7b-chat-hf\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama2</a>", |
|
"organization": "Meta", |
|
"vocab_size": 32001, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"OrionStarAI/Orion-14B-Chat": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/OrionStarAI/Orion-14B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Orion-14B-Chat</a>", |
|
"organization": "OrionStar", |
|
"vocab_size": 84608, |
|
"num(digit)": 1559, |
|
"len(digit)": "1,4,14", |
|
"num(space)": 18383, |
|
"len(space)": "1,6,16", |
|
"num(ar)": 102, |
|
"len(ar)": "1,1,1", |
|
"num(zh)": 46998, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 49644, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 2987, |
|
"len(ja-kana)": "1,3,11", |
|
"num(ko)": 5110, |
|
"len(ko)": "1,2,7" |
|
}, |
|
"Qwen/Qwen-7B-Chat": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen-7B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen</a>", |
|
"organization": "Alibaba", |
|
"vocab_size": 151851, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 55883, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 4018, |
|
"len(ar)": "1,3,12", |
|
"num(zh)": 25557, |
|
"len(zh)": "1,2,7", |
|
"num(ja)": 27206, |
|
"len(ja)": "1,2,11", |
|
"num(ja-kana)": 2089, |
|
"len(ja-kana)": "1,3,11", |
|
"num(ko)": 3495, |
|
"len(ko)": "1,1,5" |
|
}, |
|
"Qwen/Qwen1.5-14B-Chat": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-14B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5</a>", |
|
"organization": "Alibaba", |
|
"vocab_size": 151646, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 55883, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 4018, |
|
"len(ar)": "1,3,12", |
|
"num(zh)": 25557, |
|
"len(zh)": "1,2,7", |
|
"num(ja)": 27206, |
|
"len(ja)": "1,2,11", |
|
"num(ja-kana)": 2089, |
|
"len(ja-kana)": "1,3,11", |
|
"num(ko)": 3495, |
|
"len(ko)": "1,1,5" |
|
}, |
|
"Skywork/Skywork-13B-Math": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Skywork/Skywork-13B-Math\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Skywork-13B-Math</a>", |
|
"organization": "Kunlun", |
|
"vocab_size": 65519, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 62, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 56, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 33913, |
|
"len(zh)": "1,2,5", |
|
"num(ja)": 34064, |
|
"len(ja)": "1,2,5", |
|
"num(ja-kana)": 150, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"Skywork/Skywork-13B-base": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Skywork/Skywork-13B-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Skywork-13B-base</a>", |
|
"organization": "Kunlun", |
|
"vocab_size": 65519, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 62, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 56, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 33913, |
|
"len(zh)": "1,2,5", |
|
"num(ja)": 34064, |
|
"len(ja)": "1,2,5", |
|
"num(ja-kana)": 150, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"THUDM/chatglm-6b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm-6b</a>", |
|
"organization": "Tsinghua", |
|
"vocab_size": 130344, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 93, |
|
"len(space)": "1,34,80", |
|
"num(ar)": 137, |
|
"len(ar)": "1,2,4", |
|
"num(zh)": 61358, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 61784, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 439, |
|
"len(ja-kana)": "1,2,5", |
|
"num(ko)": 114, |
|
"len(ko)": "1,1,3" |
|
}, |
|
"THUDM/chatglm2-6b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm2-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm2-6b</a>", |
|
"organization": "Tsinghua", |
|
"vocab_size": 64787, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 67, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 57, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 30922, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 31065, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 143, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 604, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"THUDM/chatglm3-6b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm3-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm3-6b</a>", |
|
"organization": "Tsinghua", |
|
"vocab_size": 64796, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 67, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 57, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 30922, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 31065, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 143, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 604, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"TigerResearch/tigerbot-13b-chat-v2": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/TigerResearch/tigerbot-13b-chat-v2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">tigerbot-13b-chat-v2</a>", |
|
"organization": "Tigerobo", |
|
"vocab_size": 60515, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 28603, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 28770, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 167, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 261, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"TigerResearch/tigerbot-70b-chat-v4-4k": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/TigerResearch/tigerbot-70b-chat-v4-4k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">tigerbot-70b-chat-v4-4k</a>", |
|
"organization": "Tigerobo", |
|
"vocab_size": 65110, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 30509, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 32061, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 2071, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 1504, |
|
"len(ko)": "1,1,5" |
|
}, |
|
"Upstage/SOLAR-10.7B-v1.0": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Upstage/SOLAR-10.7B-v1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">SOLAR-10.7B-v1.0</a>", |
|
"organization": "-", |
|
"vocab_size": 32000, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 85, |
|
"len(space)": "1,3,15", |
|
"num(ar)": 71, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 1459, |
|
"len(zh)": "1,1,2", |
|
"num(ja)": 1593, |
|
"len(ja)": "1,1,2", |
|
"num(ja-kana)": 134, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 346, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"WizardLM/WizardCoder-15B-V1.0": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardCoder-15B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardCoder-15B-V1.0</a>", |
|
"organization": "Microsoft", |
|
"vocab_size": 49153, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 16515, |
|
"len(space)": "1,6,256", |
|
"num(ar)": 84, |
|
"len(ar)": "1,2,4", |
|
"num(zh)": 2030, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 2368, |
|
"len(ja)": "1,1,8", |
|
"num(ja-kana)": 360, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 491, |
|
"len(ko)": "1,2,5" |
|
}, |
|
"WizardLM/WizardCoder-Python-7B-V1.0": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardCoder-Python-7B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardCoder-Python-7B-V1.0</a>", |
|
"organization": "Microsoft", |
|
"vocab_size": 32001, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"WizardLM/WizardLM-7B-V1.0": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardLM-7B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardLM-7B-V1.0</a>", |
|
"organization": "Microsoft", |
|
"vocab_size": 32001, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"WizardLM/WizardMath-70B-V1.0": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardMath-70B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardMath-70B-V1.0</a>", |
|
"organization": "Microsoft", |
|
"vocab_size": 32002, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"abeja/gpt-neox-japanese-2.7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/abeja/gpt-neox-japanese-2.7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-neox-japanese-2.7b</a>", |
|
"organization": "ABEJA", |
|
"vocab_size": 32000, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 15176, |
|
"len(zh)": "1,2,2", |
|
"num(ja)": 31482, |
|
"len(ja)": "1,2,3", |
|
"num(ja-kana)": 16306, |
|
"len(ja-kana)": "1,3,3", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"ai21labs/Jamba-v0.1": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ai21labs/Jamba-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Jamba-v0.1</a>", |
|
"organization": "AI21", |
|
"vocab_size": 65536, |
|
"num(digit)": 1556, |
|
"len(digit)": "1,16,17", |
|
"num(space)": 39501, |
|
"len(space)": "1,7,32", |
|
"num(ar)": 867, |
|
"len(ar)": "1,3,8", |
|
"num(zh)": 1157, |
|
"len(zh)": "1,1,2", |
|
"num(ja)": 1287, |
|
"len(ja)": "1,1,2", |
|
"num(ja-kana)": 130, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 312, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"allenai/OLMo-7B": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/allenai/OLMo-7B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">OLMo-7B</a>", |
|
"organization": "Allen AI", |
|
"vocab_size": 50280, |
|
"num(digit)": 2036, |
|
"len(digit)": "1,3,35", |
|
"num(space)": 29019, |
|
"len(space)": "1,7,512", |
|
"num(ar)": 94, |
|
"len(ar)": "1,2,4", |
|
"num(zh)": 313, |
|
"len(zh)": "1,1,2", |
|
"num(ja)": 480, |
|
"len(ja)": "1,1,4", |
|
"num(ja-kana)": 167, |
|
"len(ja-kana)": "1,1,4", |
|
"num(ko)": 25, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"baichuan-inc/Baichuan2-7B-Chat": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">baichuan2</a>", |
|
"organization": "Baichuan", |
|
"vocab_size": 125696, |
|
"num(digit)": 1023, |
|
"len(digit)": "1,14,14", |
|
"num(space)": 26013, |
|
"len(space)": "1,7,32", |
|
"num(ar)": 335, |
|
"len(ar)": "1,1,27", |
|
"num(zh)": 70398, |
|
"len(zh)": "1,2,32", |
|
"num(ja)": 71269, |
|
"len(ja)": "1,2,32", |
|
"num(ja-kana)": 206, |
|
"len(ja-kana)": "1,1,9", |
|
"num(ko)": 1595, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"ckiplab/gpt2-base-chinese": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ckiplab/gpt2-base-chinese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-chinese</a>", |
|
"organization": "SINICA", |
|
"vocab_size": 21128, |
|
"num(digit)": 1451, |
|
"len(digit)": "1,3,12", |
|
"num(space)": 2, |
|
"len(space)": "1,2,3", |
|
"num(ar)": 30, |
|
"len(ar)": "1,2,3", |
|
"num(zh)": 14642, |
|
"len(zh)": "1,2,3", |
|
"num(ja)": 15197, |
|
"len(ja)": "1,3,15", |
|
"num(ja-kana)": 553, |
|
"len(ja-kana)": "1,3,15", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"cyberagent/open-calm-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/cyberagent/open-calm-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">open-calm-7b</a>", |
|
"organization": "CyberAgent", |
|
"vocab_size": 52000, |
|
"num(digit)": 690, |
|
"len(digit)": "1,3,5", |
|
"num(space)": 1698, |
|
"len(space)": "1,4,33", |
|
"num(ar)": 10, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 30775, |
|
"len(zh)": "1,3,31", |
|
"num(ja)": 45790, |
|
"len(ja)": "1,3,31", |
|
"num(ja-kana)": 32535, |
|
"len(ja-kana)": "1,3,31", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"databricks/dbrx-instruct": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/databricks/dbrx-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">dbrx-instruct</a>", |
|
"organization": "Databricks", |
|
"vocab_size": 100280, |
|
"num(digit)": 1126, |
|
"len(digit)": "1,3,17", |
|
"num(space)": 47400, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 113, |
|
"len(ar)": "1,2,10", |
|
"num(zh)": 868, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 1035, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 169, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 299, |
|
"len(ko)": "1,2,4" |
|
}, |
|
"deepseek-ai/DeepSeek-V2": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/DeepSeek-V2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">DeepSeek-V2</a>", |
|
"organization": "DeepSeek", |
|
"vocab_size": 100002, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 48073, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 48, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 18052, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 18090, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 38, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 16, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"deepseek-ai/deepseek-coder-33b-instruct": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">deepseek-coder-33b-instruct</a>", |
|
"organization": "DeepSeek", |
|
"vocab_size": 32022, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 15254, |
|
"len(space)": "1,6,65", |
|
"num(ar)": 12, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 4803, |
|
"len(zh)": "1,2,4", |
|
"num(ja)": 4804, |
|
"len(ja)": "1,2,4", |
|
"num(ja-kana)": 1, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"deepseek-ai/deepseek-llm-7b-base": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/deepseek-llm-7b-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">deepseek-llm-7b-base</a>", |
|
"organization": "DeepSeek", |
|
"vocab_size": 100015, |
|
"num(digit)": 10, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 48073, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 48, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 18052, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 18090, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 38, |
|
"len(ja-kana)": "1,1,2", |
|
"num(ko)": 16, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"eson/kplug-base-encoder": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/eson/kplug-base-encoder\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">kplug</a>", |
|
"organization": "JD", |
|
"vocab_size": 10261, |
|
"num(digit)": 420, |
|
"len(digit)": "1,3,12", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 5764, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 5766, |
|
"len(ja)": "1,1,3", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"fnlp/moss-moon-003-sft": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/fnlp/moss-moon-003-sft\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">moss-moon-003-sft</a>", |
|
"organization": "Fudan", |
|
"vocab_size": 106072, |
|
"num(digit)": 1848, |
|
"len(digit)": "1,3,16", |
|
"num(space)": 33566, |
|
"len(space)": "1,7,102", |
|
"num(ar)": 25, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 54230, |
|
"len(zh)": "1,2,15", |
|
"num(ja)": 54381, |
|
"len(ja)": "1,2,15", |
|
"num(ja-kana)": 152, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"google/gemma-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/gemma-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gemma-7b</a>", |
|
"organization": "Google", |
|
"vocab_size": 256000, |
|
"num(digit)": 134, |
|
"len(digit)": "1,10,12", |
|
"num(space)": 125662, |
|
"len(space)": "1,7,31", |
|
"num(ar)": 6274, |
|
"len(ar)": "1,4,15", |
|
"num(zh)": 23767, |
|
"len(zh)": "1,2,12", |
|
"num(ja)": 28852, |
|
"len(ja)": "1,2,12", |
|
"num(ja-kana)": 7061, |
|
"len(ja-kana)": "1,3,12", |
|
"num(ko)": 2295, |
|
"len(ko)": "1,1,5" |
|
}, |
|
"google/switch-c-2048": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/switch-c-2048\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">switch-c-2048</a>", |
|
"organization": "Google", |
|
"vocab_size": 32100, |
|
"num(digit)": 1133, |
|
"len(digit)": "1,3,13", |
|
"num(space)": 0, |
|
"len(space)": "-", |
|
"num(ar)": 0, |
|
"len(ar)": "-", |
|
"num(zh)": 0, |
|
"len(zh)": "-", |
|
"num(ja)": 0, |
|
"len(ja)": "-", |
|
"num(ja-kana)": 0, |
|
"len(ja-kana)": "-", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"hfl/chinese-alpaca-lora-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-alpaca-lora-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-alpaca-lora-7b</a>", |
|
"organization": "-", |
|
"vocab_size": 49954, |
|
"num(digit)": 614, |
|
"len(digit)": "1,3,5", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 17839, |
|
"len(zh)": "1,2,13", |
|
"num(ja)": 17993, |
|
"len(ja)": "1,2,13", |
|
"num(ja-kana)": 154, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 135, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"hfl/chinese-llama-2-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-llama-2-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-llama-2-7b</a>", |
|
"organization": "-", |
|
"vocab_size": 55296, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 23974, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 24111, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"hfl/chinese-llama-lora-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-llama-lora-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-llama-lora-7b</a>", |
|
"organization": "-", |
|
"vocab_size": 49953, |
|
"num(digit)": 614, |
|
"len(digit)": "1,3,5", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 17839, |
|
"len(zh)": "1,2,13", |
|
"num(ja)": 17993, |
|
"len(ja)": "1,2,13", |
|
"num(ja-kana)": 154, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 135, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"hfl/llama-3-chinese-8b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/llama-3-chinese-8b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama-3-chinese-8b</a>", |
|
"organization": "-", |
|
"vocab_size": 128256, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 60860, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 3810, |
|
"len(ar)": "1,4,11", |
|
"num(zh)": 4424, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 5387, |
|
"len(ja)": "1,2,8", |
|
"num(ja-kana)": 1086, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 2281, |
|
"len(ko)": "1,2,6" |
|
}, |
|
"hpcai-tech/grok-1": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hpcai-tech/grok-1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">grok-1</a>", |
|
"organization": "xAI", |
|
"vocab_size": 131072, |
|
"num(digit)": 40, |
|
"len(digit)": "1,6,13", |
|
"num(space)": 399, |
|
"len(space)": "1,3,16", |
|
"num(ar)": 69, |
|
"len(ar)": "1,2,4", |
|
"num(zh)": 1626, |
|
"len(zh)": "1,2,7", |
|
"num(ja)": 3118, |
|
"len(ja)": "1,2,8", |
|
"num(ja-kana)": 1908, |
|
"len(ja-kana)": "1,2,8", |
|
"num(ko)": 67, |
|
"len(ko)": "1,1,2" |
|
}, |
|
"internlm/internlm-chat-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm-chat-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm-chat-7b</a>", |
|
"organization": "Shanghai AI Lab", |
|
"vocab_size": 103168, |
|
"num(digit)": 1259, |
|
"len(digit)": "1,3,19", |
|
"num(space)": 33008, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 6702, |
|
"len(ar)": "1,4,16", |
|
"num(zh)": 32000, |
|
"len(zh)": "1,2,15", |
|
"num(ja)": 32866, |
|
"len(ja)": "1,2,15", |
|
"num(ja-kana)": 864, |
|
"len(ja-kana)": "1,2,9", |
|
"num(ko)": 298, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"internlm/internlm-xcomposer-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm-xcomposer-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm-xcomposer-7b</a>", |
|
"organization": "Shanghai AI Lab", |
|
"vocab_size": 103168, |
|
"num(digit)": 1261, |
|
"len(digit)": "1,3,19", |
|
"num(space)": 33008, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 6702, |
|
"len(ar)": "1,4,16", |
|
"num(zh)": 32000, |
|
"len(zh)": "1,2,15", |
|
"num(ja)": 32866, |
|
"len(ja)": "1,2,15", |
|
"num(ja-kana)": 864, |
|
"len(ja-kana)": "1,2,9", |
|
"num(ko)": 298, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"internlm/internlm2-chat-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm2-chat-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm2-chat-7b</a>", |
|
"organization": "Shanghai AI Lab", |
|
"vocab_size": 92544, |
|
"num(digit)": 1261, |
|
"len(digit)": "1,3,18", |
|
"num(space)": 28681, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 30, |
|
"len(ar)": "1,1,1", |
|
"num(zh)": 31148, |
|
"len(zh)": "1,2,15", |
|
"num(ja)": 31296, |
|
"len(ja)": "1,2,15", |
|
"num(ja-kana)": 148, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 83, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"internlm/internlm2-math-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm2-math-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm2-math-7b</a>", |
|
"organization": "Shanghai AI Lab", |
|
"vocab_size": 92544, |
|
"num(digit)": 1261, |
|
"len(digit)": "1,3,18", |
|
"num(space)": 28681, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 30, |
|
"len(ar)": "1,1,1", |
|
"num(zh)": 31148, |
|
"len(zh)": "1,2,15", |
|
"num(ja)": 31296, |
|
"len(ja)": "1,2,15", |
|
"num(ja-kana)": 148, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 83, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"microsoft/Phi-3-mini-4k-instruct": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Phi-3-mini-4k-instruct</a>", |
|
"organization": "Microsoft", |
|
"vocab_size": 32011, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 61, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 55, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 700, |
|
"len(zh)": "1,1,1", |
|
"num(ja)": 837, |
|
"len(ja)": "1,1,1", |
|
"num(ja-kana)": 137, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 111, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"microsoft/phi-1": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/phi-1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">phi-1</a>", |
|
"organization": "Microsoft", |
|
"vocab_size": 50295, |
|
"num(digit)": 1691, |
|
"len(digit)": "1,3,16", |
|
"num(space)": 33129, |
|
"len(space)": "1,7,66", |
|
"num(ar)": 22, |
|
"len(ar)": "1,1,3", |
|
"num(zh)": 51, |
|
"len(zh)": "1,1,4", |
|
"num(ja)": 183, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 133, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"microsoft/phi-2": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/phi-2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">phi-2</a>", |
|
"organization": "Microsoft", |
|
"vocab_size": 50295, |
|
"num(digit)": 1691, |
|
"len(digit)": "1,3,16", |
|
"num(space)": 33129, |
|
"len(space)": "1,7,66", |
|
"num(ar)": 22, |
|
"len(ar)": "1,1,3", |
|
"num(zh)": 51, |
|
"len(zh)": "1,1,4", |
|
"num(ja)": 183, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 133, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"mistralai/Mistral-7B-v0.1": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/mistralai/Mistral-7B-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Mistral-7B-v0.1</a>", |
|
"organization": "Mistral", |
|
"vocab_size": 32000, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 85, |
|
"len(space)": "1,3,15", |
|
"num(ar)": 71, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 1459, |
|
"len(zh)": "1,1,2", |
|
"num(ja)": 1593, |
|
"len(ja)": "1,1,2", |
|
"num(ja-kana)": 134, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 346, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"mistralai/Mixtral-8x7B-v0.1": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/mistralai/Mixtral-8x7B-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Mixtral-8x7B-v0.1</a>", |
|
"organization": "Mistral", |
|
"vocab_size": 32000, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 85, |
|
"len(space)": "1,3,15", |
|
"num(ar)": 71, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 1459, |
|
"len(zh)": "1,1,2", |
|
"num(ja)": 1593, |
|
"len(ja)": "1,1,2", |
|
"num(ja-kana)": 134, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 346, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"openai-community/gpt2": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openai-community/gpt2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2</a>", |
|
"organization": "OpenAI", |
|
"vocab_size": 50257, |
|
"num(digit)": 1691, |
|
"len(digit)": "1,3,16", |
|
"num(space)": 33129, |
|
"len(space)": "1,7,66", |
|
"num(ar)": 22, |
|
"len(ar)": "1,1,3", |
|
"num(zh)": 51, |
|
"len(zh)": "1,1,4", |
|
"num(ja)": 183, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 133, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"openai/code-davinci-002": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">code-davinci-002</a>", |
|
"organization": "OpenAI", |
|
"vocab_size": 50281, |
|
"num(digit)": 1691, |
|
"len(digit)": "1,3,16", |
|
"num(space)": 33175, |
|
"len(space)": "1,7,66", |
|
"num(ar)": 22, |
|
"len(ar)": "1,1,3", |
|
"num(zh)": 51, |
|
"len(zh)": "1,1,4", |
|
"num(ja)": 183, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 133, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"openai/gpt-3.5-turbo": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-3.5-turbo</a>", |
|
"organization": "OpenAI", |
|
"vocab_size": 100277, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 47472, |
|
"len(space)": "1,7,128", |
|
"num(ar)": 113, |
|
"len(ar)": "1,2,10", |
|
"num(zh)": 868, |
|
"len(zh)": "1,1,7", |
|
"num(ja)": 1035, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 169, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 299, |
|
"len(ko)": "1,2,4" |
|
}, |
|
"openai/gpt-4o": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4o</a>", |
|
"organization": "OpenAI", |
|
"vocab_size": 200019, |
|
"num(digit)": 1110, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 109316, |
|
"len(space)": "1,6,128", |
|
"num(ar)": 8055, |
|
"len(ar)": "1,4,12", |
|
"num(zh)": 7563, |
|
"len(zh)": "1,2,11", |
|
"num(ja)": 8292, |
|
"len(ja)": "1,2,11", |
|
"num(ja-kana)": 809, |
|
"len(ja-kana)": "1,2,11", |
|
"num(ko)": 2365, |
|
"len(ko)": "1,2,8" |
|
}, |
|
"openai/text-davinci-003": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">text-davinci-003</a>", |
|
"organization": "OpenAI", |
|
"vocab_size": 50281, |
|
"num(digit)": 1691, |
|
"len(digit)": "1,3,16", |
|
"num(space)": 33175, |
|
"len(space)": "1,7,66", |
|
"num(ar)": 22, |
|
"len(ar)": "1,1,3", |
|
"num(zh)": 51, |
|
"len(zh)": "1,1,4", |
|
"num(ja)": 183, |
|
"len(ja)": "1,1,7", |
|
"num(ja-kana)": 133, |
|
"len(ja-kana)": "1,1,7", |
|
"num(ko)": 0, |
|
"len(ko)": "-" |
|
}, |
|
"thu-coai/CharacterGLM-6B": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/thu-coai/CharacterGLM-6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">CharacterGLM-6B</a>", |
|
"organization": "Tsinghua", |
|
"vocab_size": 64789, |
|
"num(digit)": 20, |
|
"len(digit)": "1,1,1", |
|
"num(space)": 67, |
|
"len(space)": "1,2,15", |
|
"num(ar)": 57, |
|
"len(ar)": "1,1,2", |
|
"num(zh)": 30922, |
|
"len(zh)": "1,2,16", |
|
"num(ja)": 31065, |
|
"len(ja)": "1,2,16", |
|
"num(ja-kana)": 143, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 604, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"tiiuae/falcon-180b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tiiuae/falcon-180b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">falcon-180b</a>", |
|
"organization": "TII", |
|
"vocab_size": 65024, |
|
"num(digit)": 1108, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 40202, |
|
"len(space)": "1,7,65", |
|
"num(ar)": 21, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 1627, |
|
"len(zh)": "1,1,3", |
|
"num(ja)": 1652, |
|
"len(ja)": "1,1,3", |
|
"num(ja-kana)": 25, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 1, |
|
"len(ko)": "1,1,1" |
|
}, |
|
"tiiuae/falcon-7b": { |
|
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tiiuae/falcon-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">falcon-7b</a>", |
|
"organization": "TII", |
|
"vocab_size": 65024, |
|
"num(digit)": 1108, |
|
"len(digit)": "1,3,3", |
|
"num(space)": 40202, |
|
"len(space)": "1,7,65", |
|
"num(ar)": 21, |
|
"len(ar)": "1,1,4", |
|
"num(zh)": 1627, |
|
"len(zh)": "1,1,3", |
|
"num(ja)": 1652, |
|
"len(ja)": "1,1,3", |
|
"num(ja-kana)": 25, |
|
"len(ja-kana)": "1,1,1", |
|
"num(ko)": 1, |
|
"len(ko)": "1,1,1" |
|
} |
|
} |