{ "amber.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "aya_101.cc100-en": { "vocab_size": 250100, "n_bytes": 1124813, "n_tokens": 317881, "n_chars": 1121360 }, "baichuan.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 280108, "n_chars": 1121360 }, "baichuan2.cc100-en": { "vocab_size": 125696, "n_bytes": 1124813, "n_tokens": 269011, "n_chars": 1121360 }, "bert_base_cased.cc100-en": { "vocab_size": 28996, "n_bytes": 1124813, "n_tokens": 288022, "n_chars": 1121360 }, "bert_base_chinese.cc100-en": { "vocab_size": 21128, "n_bytes": 1124813, "n_tokens": 377068, "n_chars": 1121360 }, "bert_base_uncased.cc100-en": { "vocab_size": 30522, "n_bytes": 1124813, "n_tokens": 280575, "n_chars": 1121360 }, "bloom.cc100-en": { "vocab_size": 250680, "n_bytes": 1124813, "n_tokens": 257405, "n_chars": 1121360 }, "byt5_small.cc100-en": { "vocab_size": 384, "n_bytes": 1124813, "n_tokens": 1134813, "n_chars": 1121360 }, "character_glm_6b.cc100-en": { "vocab_size": 64789, "n_bytes": 1124813, "n_tokens": 289347, "n_chars": 1121360 }, "chatglm2_6b.cc100-en": { "vocab_size": 64787, "n_bytes": 1124813, "n_tokens": 289329, "n_chars": 1121360 }, "chatglm3_6b.cc100-en": { "vocab_size": 64796, "n_bytes": 1124813, "n_tokens": 289347, "n_chars": 1121360 }, "chatglm_6b.cc100-en": { "vocab_size": 150344, "n_bytes": 1124813, "n_tokens": 284761, "n_chars": 1121360 }, "chatyuan_large_v2.cc100-en": { "vocab_size": 32128, "n_bytes": 1124813, "n_tokens": 536033, "n_chars": 1121360 }, "chinese_llama.cc100-en": { "vocab_size": 49953, "n_bytes": 1124813, "n_tokens": 291514, "n_chars": 1121360 }, "chinese_llama2.cc100-en": { "vocab_size": 55296, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "code_davinci_002.cc100-en": { "vocab_size": 50281, "n_bytes": 1124813, "n_tokens": 258403, "n_chars": 1121360 }, "crystal_coder.cc100-en": { "vocab_size": 32022, "n_bytes": 1124813, "n_tokens": 284627, "n_chars": 1121360 }, "dbrx_instruct.cc100-en": { "vocab_size": 100280, "n_bytes": 1124813, "n_tokens": 254985, "n_chars": 1121360 }, "deepseek_coder_33b_instruct.cc100-en": { "vocab_size": 32022, "n_bytes": 1124813, "n_tokens": 287408, "n_chars": 1121360 }, "deepseek_llm_7b_base.cc100-en": { "vocab_size": 100015, "n_bytes": 1124813, "n_tokens": 272324, "n_chars": 1121360 }, "falcon_180b.cc100-en": { "vocab_size": 65024, "n_bytes": 1124813, "n_tokens": 262509, "n_chars": 1121360 }, "falcon_7b.cc100-en": { "vocab_size": 65024, "n_bytes": 1124813, "n_tokens": 262509, "n_chars": 1121360 }, "fastchat_t5_3b.cc100-en": { "vocab_size": 32110, "n_bytes": 1124813, "n_tokens": 484941, "n_chars": 1121360 }, "flan_t5_base.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "gemma_7b.cc100-en": { "vocab_size": 256000, "n_bytes": 1124813, "n_tokens": 268010, "n_chars": 1121360 }, "gpt2.cc100-en": { "vocab_size": 50257, "n_bytes": 1124813, "n_tokens": 258428, "n_chars": 1121360 }, "gpt2_chinese.cc100-en": { "vocab_size": 21128, "n_bytes": 1124813, "n_tokens": 392641, "n_chars": 1121360 }, "gpt_35_turbo.cc100-en": { "vocab_size": 100277, "n_bytes": 1124813, "n_tokens": 254985, "n_chars": 1121360 }, "gpt_4.cc100-en": { "vocab_size": 100277, "n_bytes": 1124813, "n_tokens": 254985, "n_chars": 1121360 }, "gpt_nexo_20b.cc100-en": { "vocab_size": 50277, "n_bytes": 1124813, "n_tokens": 259357, "n_chars": 1121360 }, "grok_1.cc100-en": { "vocab_size": 131072, "n_bytes": 1124813, "n_tokens": 258048, "n_chars": 1121360 }, "internlm2_chat_7b.cc100-en": { "vocab_size": 92544, "n_bytes": 1124813, "n_tokens": 271583, "n_chars": 1121360 }, "internlm2_math_7b.cc100-en": { "vocab_size": 92544, "n_bytes": 1124813, "n_tokens": 271583, "n_chars": 1121360 }, "internlm_chat_7b.cc100-en": { "vocab_size": 103168, "n_bytes": 1124813, "n_tokens": 271293, "n_chars": 1121360 }, "internlm_xcomposer_7b.cc100-en": { "vocab_size": 103168, "n_bytes": 1124813, "n_tokens": 271293, "n_chars": 1121360 }, "jamba_v0_1.cc100-en": { "vocab_size": 65536, "n_bytes": 1124813, "n_tokens": 274242, "n_chars": 1121360 }, "kplug.cc100-en": { "vocab_size": 10261, "n_bytes": 1124813, "n_tokens": 393564, "n_chars": 1121360 }, "llama.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "llama2.cc100-en": { "vocab_size": 32001, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "llama3.cc100-en": { "vocab_size": 128256, "n_bytes": 1124813, "n_tokens": 254944, "n_chars": 1121360 }, "mistral_7b.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "mixtral_8_7b.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "mobilebert_uncased.cc100-en": { "vocab_size": 30522, "n_bytes": 1124813, "n_tokens": 280575, "n_chars": 1121360 }, "moss.cc100-en": { "vocab_size": 106072, "n_bytes": 1124813, "n_tokens": 257070, "n_chars": 1121360 }, "mt5_large.cc100-en": { "vocab_size": 250100, "n_bytes": 1124813, "n_tokens": 317881, "n_chars": 1121360 }, "olmo_7b.cc100-en": { "vocab_size": 50280, "n_bytes": 1124813, "n_tokens": 259357, "n_chars": 1121360 }, "orion_14b_chat.cc100-en": { "vocab_size": 84608, "n_bytes": 1124813, "n_tokens": 265948, "n_chars": 1121360 }, "phi_1.cc100-en": { "vocab_size": 50295, "n_bytes": 1124813, "n_tokens": 258409, "n_chars": 1121360 }, "phi_2.cc100-en": { "vocab_size": 50295, "n_bytes": 1124813, "n_tokens": 258409, "n_chars": 1121360 }, "phi_3_mini.cc100-en": { "vocab_size": 32011, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "pko_t5_large.cc100-en": { "vocab_size": 50358, "n_bytes": 1124813, "n_tokens": 658985, "n_chars": 1121360 }, "prompt_clue.cc100-en": { "vocab_size": 32128, "n_bytes": 1124813, "n_tokens": 536033, "n_chars": 1121360 }, "qwen1_5_14b_chat.cc100-en": { "vocab_size": 151646, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "qwen_1_8b_chat.cc100-en": { "vocab_size": 151851, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "qwen_72b_chat.cc100-en": { "vocab_size": 151851, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "qwen_7b_chat.cc100-en": { "vocab_size": 151851, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "roberta_chinese_clue.cc100-en": { "vocab_size": 8021, "n_bytes": 1124813, "n_tokens": 583058, "n_chars": 1121360 }, "skywork_13b_base.cc100-en": { "vocab_size": 65519, "n_bytes": 1124813, "n_tokens": 294617, "n_chars": 1121360 }, "skywork_13b_math.cc100-en": { "vocab_size": 65519, "n_bytes": 1124813, "n_tokens": 294617, "n_chars": 1121360 }, "solar_10_7b.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "starchat_alpha.cc100-en": { "vocab_size": 49156, "n_bytes": 1124813, "n_tokens": 288965, "n_chars": 1121360 }, "switch_c_2048.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "t5_base.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "t5_large.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "t5_small.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "text_davinci_003.cc100-en": { "vocab_size": 50281, "n_bytes": 1124813, "n_tokens": 258403, "n_chars": 1121360 }, "tigerbot_13b_chat_v2.cc100-en": { "vocab_size": 60515, "n_bytes": 1124813, "n_tokens": 285652, "n_chars": 1121360 }, "tigerbot_70b_chat_v4_4k.cc100-en": { "vocab_size": 65110, "n_bytes": 1124813, "n_tokens": 286946, "n_chars": 1121360 }, "wizardcoder_15b_v1.cc100-en": { "vocab_size": 49153, "n_bytes": 1124813, "n_tokens": 288965, "n_chars": 1121360 }, "wizardcoder_python_7b_v1.cc100-en": { "vocab_size": 32001, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "wizardlm_7b_v1.cc100-en": { "vocab_size": 32001, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "wizardmath_70b_v1.cc100-en": { "vocab_size": 32002, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "xlm_roberta.cc100-en": { "vocab_size": 250002, "n_bytes": 1124813, "n_tokens": 300026, "n_chars": 1121360 }, "yi_34b.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 270400, "n_chars": 1121360 }, "yi_6b.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 270400, "n_chars": 1121360 }, "yi_vl34b.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 269738, "n_chars": 1121360 }, "zephyr_7b_beta.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "amber.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "aya_101.cc100-zh-Hans": { "vocab_size": 250100, "n_bytes": 2633047, "n_tokens": 631182, "n_chars": 927311 }, "baichuan.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 626117, "n_chars": 927311 }, "baichuan2.cc100-zh-Hans": { "vocab_size": 125696, "n_bytes": 2633047, "n_tokens": 541464, "n_chars": 927311 }, "bert_base_cased.cc100-zh-Hans": { "vocab_size": 28996, "n_bytes": 2633047, "n_tokens": 899709, "n_chars": 927311 }, "bert_base_chinese.cc100-zh-Hans": { "vocab_size": 21128, "n_bytes": 2633047, "n_tokens": 896599, "n_chars": 927311 }, "bert_base_uncased.cc100-zh-Hans": { "vocab_size": 30522, "n_bytes": 2633047, "n_tokens": 898554, "n_chars": 927311 }, "bloom.cc100-zh-Hans": { "vocab_size": 250680, "n_bytes": 2633047, "n_tokens": 573008, "n_chars": 927311 }, "byt5_small.cc100-zh-Hans": { "vocab_size": 384, "n_bytes": 2633047, "n_tokens": 2643047, "n_chars": 927311 }, "character_glm_6b.cc100-zh-Hans": { "vocab_size": 64789, "n_bytes": 2633047, "n_tokens": 583646, "n_chars": 927311 }, "chatglm2_6b.cc100-zh-Hans": { "vocab_size": 64787, "n_bytes": 2633047, "n_tokens": 583646, "n_chars": 927311 }, "chatglm3_6b.cc100-zh-Hans": { "vocab_size": 64796, "n_bytes": 2633047, "n_tokens": 583646, "n_chars": 927311 }, "chatglm_6b.cc100-zh-Hans": { "vocab_size": 150344, "n_bytes": 2633047, "n_tokens": 527384, "n_chars": 927311 }, "chatyuan_large_v2.cc100-zh-Hans": { "vocab_size": 32128, "n_bytes": 2633047, "n_tokens": 564905, "n_chars": 927311 }, "chinese_llama.cc100-zh-Hans": { "vocab_size": 49953, "n_bytes": 2633047, "n_tokens": 623219, "n_chars": 927311 }, "chinese_llama2.cc100-zh-Hans": { "vocab_size": 55296, "n_bytes": 2633047, "n_tokens": 625766, "n_chars": 927311 }, "code_davinci_002.cc100-zh-Hans": { "vocab_size": 50281, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "crystal_coder.cc100-zh-Hans": { "vocab_size": 32022, "n_bytes": 2633047, "n_tokens": 1320093, "n_chars": 927311 }, "dbrx_instruct.cc100-zh-Hans": { "vocab_size": 100280, "n_bytes": 2633047, "n_tokens": 1084939, "n_chars": 927311 }, "deepseek_coder_33b_instruct.cc100-zh-Hans": { "vocab_size": 32022, "n_bytes": 2633047, "n_tokens": 720577, "n_chars": 927311 }, "deepseek_llm_7b_base.cc100-zh-Hans": { "vocab_size": 100015, "n_bytes": 2633047, "n_tokens": 605081, "n_chars": 927311 }, "falcon_180b.cc100-zh-Hans": { "vocab_size": 65024, "n_bytes": 2633047, "n_tokens": 1124681, "n_chars": 927311 }, "falcon_7b.cc100-zh-Hans": { "vocab_size": 65024, "n_bytes": 2633047, "n_tokens": 1124681, "n_chars": 927311 }, "fastchat_t5_3b.cc100-zh-Hans": { "vocab_size": 32110, "n_bytes": 2633047, "n_tokens": 178974, "n_chars": 927311 }, "flan_t5_base.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173520, "n_chars": 927311 }, "gemma_7b.cc100-zh-Hans": { "vocab_size": 256000, "n_bytes": 2633047, "n_tokens": 641795, "n_chars": 927311 }, "gpt2.cc100-zh-Hans": { "vocab_size": 50257, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "gpt2_chinese.cc100-zh-Hans": { "vocab_size": 21128, "n_bytes": 2633047, "n_tokens": 899506, "n_chars": 927311 }, "gpt_35_turbo.cc100-zh-Hans": { "vocab_size": 100277, "n_bytes": 2633047, "n_tokens": 1084939, "n_chars": 927311 }, "gpt_4.cc100-zh-Hans": { "vocab_size": 100277, "n_bytes": 2633047, "n_tokens": 1084939, "n_chars": 927311 }, "gpt_nexo_20b.cc100-zh-Hans": { "vocab_size": 50277, "n_bytes": 2633047, "n_tokens": 1220529, "n_chars": 927311 }, "grok_1.cc100-zh-Hans": { "vocab_size": 131072, "n_bytes": 2633047, "n_tokens": 1414508, "n_chars": 927311 }, "internlm2_chat_7b.cc100-zh-Hans": { "vocab_size": 92544, "n_bytes": 2633047, "n_tokens": 579976, "n_chars": 927311 }, "internlm2_math_7b.cc100-zh-Hans": { "vocab_size": 92544, "n_bytes": 2633047, "n_tokens": 579976, "n_chars": 927311 }, "internlm_chat_7b.cc100-zh-Hans": { "vocab_size": 103168, "n_bytes": 2633047, "n_tokens": 579109, "n_chars": 927311 }, "internlm_xcomposer_7b.cc100-zh-Hans": { "vocab_size": 103168, "n_bytes": 2633047, "n_tokens": 579109, "n_chars": 927311 }, "jamba_v0_1.cc100-zh-Hans": { "vocab_size": 65536, "n_bytes": 2633047, "n_tokens": 1067054, "n_chars": 927311 }, "kplug.cc100-zh-Hans": { "vocab_size": 10261, "n_bytes": 2633047, "n_tokens": 902451, "n_chars": 927311 }, "llama.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "llama2.cc100-zh-Hans": { "vocab_size": 32001, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "llama3.cc100-zh-Hans": { "vocab_size": 128256, "n_bytes": 2633047, "n_tokens": 747405, "n_chars": 927311 }, "mistral_7b.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "mixtral_8_7b.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "mobilebert_uncased.cc100-zh-Hans": { "vocab_size": 30522, "n_bytes": 2633047, "n_tokens": 898554, "n_chars": 927311 }, "moss.cc100-zh-Hans": { "vocab_size": 106072, "n_bytes": 2633047, "n_tokens": 557455, "n_chars": 927311 }, "mt5_large.cc100-zh-Hans": { "vocab_size": 250100, "n_bytes": 2633047, "n_tokens": 631182, "n_chars": 927311 }, "olmo_7b.cc100-zh-Hans": { "vocab_size": 50280, "n_bytes": 2633047, "n_tokens": 1220529, "n_chars": 927311 }, "orion_14b_chat.cc100-zh-Hans": { "vocab_size": 84608, "n_bytes": 2633047, "n_tokens": 529926, "n_chars": 927311 }, "phi_1.cc100-zh-Hans": { "vocab_size": 50295, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "phi_2.cc100-zh-Hans": { "vocab_size": 50295, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "phi_3_mini.cc100-zh-Hans": { "vocab_size": 32011, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "pko_t5_large.cc100-zh-Hans": { "vocab_size": 50358, "n_bytes": 2633047, "n_tokens": 2533519, "n_chars": 927311 }, "prompt_clue.cc100-zh-Hans": { "vocab_size": 32128, "n_bytes": 2633047, "n_tokens": 564905, "n_chars": 927311 }, "qwen1_5_14b_chat.cc100-zh-Hans": { "vocab_size": 151646, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "qwen_1_8b_chat.cc100-zh-Hans": { "vocab_size": 151851, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "qwen_72b_chat.cc100-zh-Hans": { "vocab_size": 151851, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "qwen_7b_chat.cc100-zh-Hans": { "vocab_size": 151851, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "roberta_chinese_clue.cc100-zh-Hans": { "vocab_size": 8021, "n_bytes": 2633047, "n_tokens": 907144, "n_chars": 927311 }, "skywork_13b_base.cc100-zh-Hans": { "vocab_size": 65519, "n_bytes": 2633047, "n_tokens": 663923, "n_chars": 927311 }, "skywork_13b_math.cc100-zh-Hans": { "vocab_size": 65519, "n_bytes": 2633047, "n_tokens": 663923, "n_chars": 927311 }, "solar_10_7b.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "starchat_alpha.cc100-zh-Hans": { "vocab_size": 49156, "n_bytes": 2633047, "n_tokens": 882018, "n_chars": 927311 }, "switch_c_2048.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "t5_base.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "t5_large.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "t5_small.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "text_davinci_003.cc100-zh-Hans": { "vocab_size": 50281, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "tigerbot_13b_chat_v2.cc100-zh-Hans": { "vocab_size": 60515, "n_bytes": 2633047, "n_tokens": 577385, "n_chars": 927311 }, "tigerbot_70b_chat_v4_4k.cc100-zh-Hans": { "vocab_size": 65110, "n_bytes": 2633047, "n_tokens": 577211, "n_chars": 927311 }, "wizardcoder_15b_v1.cc100-zh-Hans": { "vocab_size": 49153, "n_bytes": 2633047, "n_tokens": 882018, "n_chars": 927311 }, "wizardcoder_python_7b_v1.cc100-zh-Hans": { "vocab_size": 32001, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "wizardlm_7b_v1.cc100-zh-Hans": { "vocab_size": 32001, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "wizardmath_70b_v1.cc100-zh-Hans": { "vocab_size": 32002, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "xlm_roberta.cc100-zh-Hans": { "vocab_size": 250002, "n_bytes": 2633047, "n_tokens": 619844, "n_chars": 927311 }, "yi_34b.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 588729, "n_chars": 927311 }, "yi_6b.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 588729, "n_chars": 927311 }, "yi_vl34b.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 596166, "n_chars": 927311 }, "zephyr_7b_beta.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "amber.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "aya_101.cc100-es": { "vocab_size": 250100, "n_bytes": 1664455, "n_tokens": 472231, "n_chars": 1630297 }, "baichuan.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 585804, "n_chars": 1630297 }, "baichuan2.cc100-es": { "vocab_size": 125696, "n_bytes": 1664455, "n_tokens": 551326, "n_chars": 1630297 }, "bert_base_cased.cc100-es": { "vocab_size": 28996, "n_bytes": 1664455, "n_tokens": 630231, "n_chars": 1630297 }, "bert_base_chinese.cc100-es": { "vocab_size": 21128, "n_bytes": 1664455, "n_tokens": 609419, "n_chars": 1630297 }, "bert_base_uncased.cc100-es": { "vocab_size": 30522, "n_bytes": 1664455, "n_tokens": 558042, "n_chars": 1630297 }, "bloom.cc100-es": { "vocab_size": 250680, "n_bytes": 1664455, "n_tokens": 350793, "n_chars": 1630297 }, "byt5_small.cc100-es": { "vocab_size": 384, "n_bytes": 1664455, "n_tokens": 1674455, "n_chars": 1630297 }, "character_glm_6b.cc100-es": { "vocab_size": 64789, "n_bytes": 1664455, "n_tokens": 566501, "n_chars": 1630297 }, "chatglm2_6b.cc100-es": { "vocab_size": 64787, "n_bytes": 1664455, "n_tokens": 566476, "n_chars": 1630297 }, "chatglm3_6b.cc100-es": { "vocab_size": 64796, "n_bytes": 1664455, "n_tokens": 566501, "n_chars": 1630297 }, "chatglm_6b.cc100-es": { "vocab_size": 150344, "n_bytes": 1664455, "n_tokens": 514848, "n_chars": 1630297 }, "chatyuan_large_v2.cc100-es": { "vocab_size": 32128, "n_bytes": 1664455, "n_tokens": 889530, "n_chars": 1630297 }, "chinese_llama.cc100-es": { "vocab_size": 49953, "n_bytes": 1664455, "n_tokens": 486672, "n_chars": 1630297 }, "chinese_llama2.cc100-es": { "vocab_size": 55296, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "code_davinci_002.cc100-es": { "vocab_size": 50281, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "crystal_coder.cc100-es": { "vocab_size": 32022, "n_bytes": 1664455, "n_tokens": 482235, "n_chars": 1630297 }, "dbrx_instruct.cc100-es": { "vocab_size": 100280, "n_bytes": 1664455, "n_tokens": 433875, "n_chars": 1630297 }, "deepseek_coder_33b_instruct.cc100-es": { "vocab_size": 32022, "n_bytes": 1664455, "n_tokens": 523884, "n_chars": 1630297 }, "deepseek_llm_7b_base.cc100-es": { "vocab_size": 100015, "n_bytes": 1664455, "n_tokens": 480877, "n_chars": 1630297 }, "falcon_180b.cc100-es": { "vocab_size": 65024, "n_bytes": 1664455, "n_tokens": 442138, "n_chars": 1630297 }, "falcon_7b.cc100-es": { "vocab_size": 65024, "n_bytes": 1664455, "n_tokens": 442138, "n_chars": 1630297 }, "fastchat_t5_3b.cc100-es": { "vocab_size": 32110, "n_bytes": 1664455, "n_tokens": 970105, "n_chars": 1630297 }, "flan_t5_base.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706405, "n_chars": 1630297 }, "gemma_7b.cc100-es": { "vocab_size": 256000, "n_bytes": 1664455, "n_tokens": 371321, "n_chars": 1630297 }, "gpt2.cc100-es": { "vocab_size": 50257, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "gpt2_chinese.cc100-es": { "vocab_size": 21128, "n_bytes": 1664455, "n_tokens": 703390, "n_chars": 1630297 }, "gpt_35_turbo.cc100-es": { "vocab_size": 100277, "n_bytes": 1664455, "n_tokens": 433875, "n_chars": 1630297 }, "gpt_4.cc100-es": { "vocab_size": 100277, "n_bytes": 1664455, "n_tokens": 433875, "n_chars": 1630297 }, "gpt_nexo_20b.cc100-es": { "vocab_size": 50277, "n_bytes": 1664455, "n_tokens": 494577, "n_chars": 1630297 }, "grok_1.cc100-es": { "vocab_size": 131072, "n_bytes": 1664455, "n_tokens": 449392, "n_chars": 1630297 }, "internlm2_chat_7b.cc100-es": { "vocab_size": 92544, "n_bytes": 1664455, "n_tokens": 518871, "n_chars": 1630297 }, "internlm2_math_7b.cc100-es": { "vocab_size": 92544, "n_bytes": 1664455, "n_tokens": 518871, "n_chars": 1630297 }, "internlm_chat_7b.cc100-es": { "vocab_size": 103168, "n_bytes": 1664455, "n_tokens": 516572, "n_chars": 1630297 }, "internlm_xcomposer_7b.cc100-es": { "vocab_size": 103168, "n_bytes": 1664455, "n_tokens": 516572, "n_chars": 1630297 }, "jamba_v0_1.cc100-es": { "vocab_size": 65536, "n_bytes": 1664455, "n_tokens": 420883, "n_chars": 1630297 }, "kplug.cc100-es": { "vocab_size": 10261, "n_bytes": 1664455, "n_tokens": 704804, "n_chars": 1630297 }, "llama.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "llama2.cc100-es": { "vocab_size": 32001, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "llama3.cc100-es": { "vocab_size": 128256, "n_bytes": 1664455, "n_tokens": 433289, "n_chars": 1630297 }, "mistral_7b.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "mixtral_8_7b.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "mobilebert_uncased.cc100-es": { "vocab_size": 30522, "n_bytes": 1664455, "n_tokens": 558042, "n_chars": 1630297 }, "moss.cc100-es": { "vocab_size": 106072, "n_bytes": 1664455, "n_tokens": 568539, "n_chars": 1630297 }, "mt5_large.cc100-es": { "vocab_size": 250100, "n_bytes": 1664455, "n_tokens": 472231, "n_chars": 1630297 }, "olmo_7b.cc100-es": { "vocab_size": 50280, "n_bytes": 1664455, "n_tokens": 494577, "n_chars": 1630297 }, "orion_14b_chat.cc100-es": { "vocab_size": 84608, "n_bytes": 1664455, "n_tokens": 628571, "n_chars": 1630297 }, "phi_1.cc100-es": { "vocab_size": 50295, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "phi_2.cc100-es": { "vocab_size": 50295, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "phi_3_mini.cc100-es": { "vocab_size": 32011, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "pko_t5_large.cc100-es": { "vocab_size": 50358, "n_bytes": 1664455, "n_tokens": 1134056, "n_chars": 1630297 }, "prompt_clue.cc100-es": { "vocab_size": 32128, "n_bytes": 1664455, "n_tokens": 889530, "n_chars": 1630297 }, "qwen1_5_14b_chat.cc100-es": { "vocab_size": 151646, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "qwen_1_8b_chat.cc100-es": { "vocab_size": 151851, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "qwen_72b_chat.cc100-es": { "vocab_size": 151851, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "qwen_7b_chat.cc100-es": { "vocab_size": 151851, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "roberta_chinese_clue.cc100-es": { "vocab_size": 8021, "n_bytes": 1664455, "n_tokens": 866564, "n_chars": 1630297 }, "skywork_13b_base.cc100-es": { "vocab_size": 65519, "n_bytes": 1664455, "n_tokens": 492211, "n_chars": 1630297 }, "skywork_13b_math.cc100-es": { "vocab_size": 65519, "n_bytes": 1664455, "n_tokens": 492211, "n_chars": 1630297 }, "solar_10_7b.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "starchat_alpha.cc100-es": { "vocab_size": 49156, "n_bytes": 1664455, "n_tokens": 530592, "n_chars": 1630297 }, "switch_c_2048.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "t5_base.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "t5_large.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "t5_small.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "text_davinci_003.cc100-es": { "vocab_size": 50281, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "tigerbot_13b_chat_v2.cc100-es": { "vocab_size": 60515, "n_bytes": 1664455, "n_tokens": 482553, "n_chars": 1630297 }, "tigerbot_70b_chat_v4_4k.cc100-es": { "vocab_size": 65110, "n_bytes": 1664455, "n_tokens": 484099, "n_chars": 1630297 }, "wizardcoder_15b_v1.cc100-es": { "vocab_size": 49153, "n_bytes": 1664455, "n_tokens": 530592, "n_chars": 1630297 }, "wizardcoder_python_7b_v1.cc100-es": { "vocab_size": 32001, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "wizardlm_7b_v1.cc100-es": { "vocab_size": 32001, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "wizardmath_70b_v1.cc100-es": { "vocab_size": 32002, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "xlm_roberta.cc100-es": { "vocab_size": 250002, "n_bytes": 1664455, "n_tokens": 399850, "n_chars": 1630297 }, "yi_34b.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 577018, "n_chars": 1630297 }, "yi_6b.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 577018, "n_chars": 1630297 }, "yi_vl34b.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 576794, "n_chars": 1630297 }, "zephyr_7b_beta.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "aya_101.cc100-fr": { "vocab_size": 250100, "n_bytes": 1540504, "n_tokens": 470944, "n_chars": 1484970 }, "baichuan.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 540430, "n_chars": 1484970 }, "baichuan2.cc100-fr": { "vocab_size": 125696, "n_bytes": 1540504, "n_tokens": 512313, "n_chars": 1484970 }, "bert_base_cased.cc100-fr": { "vocab_size": 28996, "n_bytes": 1540504, "n_tokens": 583210, "n_chars": 1484970 }, "bert_base_chinese.cc100-fr": { "vocab_size": 21128, "n_bytes": 1540504, "n_tokens": 553134, "n_chars": 1484970 }, "bert_base_uncased.cc100-fr": { "vocab_size": 30522, "n_bytes": 1540504, "n_tokens": 504075, "n_chars": 1484970 }, "bloom.cc100-fr": { "vocab_size": 250680, "n_bytes": 1540504, "n_tokens": 321639, "n_chars": 1484970 }, "byt5_small.cc100-fr": { "vocab_size": 384, "n_bytes": 1540504, "n_tokens": 1550504, "n_chars": 1484970 }, "character_glm_6b.cc100-fr": { "vocab_size": 64789, "n_bytes": 1540504, "n_tokens": 515052, "n_chars": 1484970 }, "chatglm2_6b.cc100-fr": { "vocab_size": 64787, "n_bytes": 1540504, "n_tokens": 515028, "n_chars": 1484970 }, "chatglm3_6b.cc100-fr": { "vocab_size": 64796, "n_bytes": 1540504, "n_tokens": 515052, "n_chars": 1484970 }, "chatglm_6b.cc100-fr": { "vocab_size": 150344, "n_bytes": 1540504, "n_tokens": 499261, "n_chars": 1484970 }, "chatyuan_large_v2.cc100-fr": { "vocab_size": 32128, "n_bytes": 1540504, "n_tokens": 822012, "n_chars": 1484970 }, "chinese_llama.cc100-fr": { "vocab_size": 49953, "n_bytes": 1540504, "n_tokens": 450352, "n_chars": 1484970 }, "chinese_llama2.cc100-fr": { "vocab_size": 55296, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "code_davinci_002.cc100-fr": { "vocab_size": 50281, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "crystal_coder.cc100-fr": { "vocab_size": 32022, "n_bytes": 1540504, "n_tokens": 447243, "n_chars": 1484970 }, "dbrx_instruct.cc100-fr": { "vocab_size": 100280, "n_bytes": 1540504, "n_tokens": 412685, "n_chars": 1484970 }, "deepseek_coder_33b_instruct.cc100-fr": { "vocab_size": 32022, "n_bytes": 1540504, "n_tokens": 537538, "n_chars": 1484970 }, "deepseek_llm_7b_base.cc100-fr": { "vocab_size": 100015, "n_bytes": 1540504, "n_tokens": 507693, "n_chars": 1484970 }, "falcon_180b.cc100-fr": { "vocab_size": 65024, "n_bytes": 1540504, "n_tokens": 407853, "n_chars": 1484970 }, "falcon_7b.cc100-fr": { "vocab_size": 65024, "n_bytes": 1540504, "n_tokens": 407853, "n_chars": 1484970 }, "fastchat_t5_3b.cc100-fr": { "vocab_size": 32110, "n_bytes": 1540504, "n_tokens": 717675, "n_chars": 1484970 }, "flan_t5_base.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476135, "n_chars": 1484970 }, "gemma_7b.cc100-fr": { "vocab_size": 256000, "n_bytes": 1540504, "n_tokens": 374551, "n_chars": 1484970 }, "gpt2.cc100-fr": { "vocab_size": 50257, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "gpt2_chinese.cc100-fr": { "vocab_size": 21128, "n_bytes": 1540504, "n_tokens": 636442, "n_chars": 1484970 }, "gpt_35_turbo.cc100-fr": { "vocab_size": 100277, "n_bytes": 1540504, "n_tokens": 412685, "n_chars": 1484970 }, "gpt_4.cc100-fr": { "vocab_size": 100277, "n_bytes": 1540504, "n_tokens": 412685, "n_chars": 1484970 }, "gpt_nexo_20b.cc100-fr": { "vocab_size": 50277, "n_bytes": 1540504, "n_tokens": 458961, "n_chars": 1484970 }, "grok_1.cc100-fr": { "vocab_size": 131072, "n_bytes": 1540504, "n_tokens": 428298, "n_chars": 1484970 }, "internlm2_chat_7b.cc100-fr": { "vocab_size": 92544, "n_bytes": 1540504, "n_tokens": 496629, "n_chars": 1484970 }, "internlm2_math_7b.cc100-fr": { "vocab_size": 92544, "n_bytes": 1540504, "n_tokens": 496629, "n_chars": 1484970 }, "internlm_chat_7b.cc100-fr": { "vocab_size": 103168, "n_bytes": 1540504, "n_tokens": 495045, "n_chars": 1484970 }, "internlm_xcomposer_7b.cc100-fr": { "vocab_size": 103168, "n_bytes": 1540504, "n_tokens": 495045, "n_chars": 1484970 }, "jamba_v0_1.cc100-fr": { "vocab_size": 65536, "n_bytes": 1540504, "n_tokens": 412899, "n_chars": 1484970 }, "kplug.cc100-fr": { "vocab_size": 10261, "n_bytes": 1540504, "n_tokens": 638107, "n_chars": 1484970 }, "llama.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "llama2.cc100-fr": { "vocab_size": 32001, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "llama3.cc100-fr": { "vocab_size": 128256, "n_bytes": 1540504, "n_tokens": 412146, "n_chars": 1484970 }, "mistral_7b.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 }, "mixtral_8_7b.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 }, "mobilebert_uncased.cc100-fr": { "vocab_size": 30522, "n_bytes": 1540504, "n_tokens": 504075, "n_chars": 1484970 }, "moss.cc100-fr": { "vocab_size": 106072, "n_bytes": 1540504, "n_tokens": 515669, "n_chars": 1484970 }, "mt5_large.cc100-fr": { "vocab_size": 250100, "n_bytes": 1540504, "n_tokens": 470944, "n_chars": 1484970 }, "olmo_7b.cc100-fr": { "vocab_size": 50280, "n_bytes": 1540504, "n_tokens": 458961, "n_chars": 1484970 }, "orion_14b_chat.cc100-fr": { "vocab_size": 84608, "n_bytes": 1540504, "n_tokens": 564107, "n_chars": 1484970 }, "phi_1.cc100-fr": { "vocab_size": 50295, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "phi_2.cc100-fr": { "vocab_size": 50295, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "phi_3_mini.cc100-fr": { "vocab_size": 32011, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "pko_t5_large.cc100-fr": { "vocab_size": 50358, "n_bytes": 1540504, "n_tokens": 1044665, "n_chars": 1484970 }, "prompt_clue.cc100-fr": { "vocab_size": 32128, "n_bytes": 1540504, "n_tokens": 822012, "n_chars": 1484970 }, "qwen1_5_14b_chat.cc100-fr": { "vocab_size": 151646, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "qwen_1_8b_chat.cc100-fr": { "vocab_size": 151851, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "qwen_72b_chat.cc100-fr": { "vocab_size": 151851, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "qwen_7b_chat.cc100-fr": { "vocab_size": 151851, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "roberta_chinese_clue.cc100-fr": { "vocab_size": 8021, "n_bytes": 1540504, "n_tokens": 787363, "n_chars": 1484970 }, "skywork_13b_base.cc100-fr": { "vocab_size": 65519, "n_bytes": 1540504, "n_tokens": 457233, "n_chars": 1484970 }, "skywork_13b_math.cc100-fr": { "vocab_size": 65519, "n_bytes": 1540504, "n_tokens": 457233, "n_chars": 1484970 }, "solar_10_7b.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 }, "starchat_alpha.cc100-fr": { "vocab_size": 49156, "n_bytes": 1540504, "n_tokens": 509958, "n_chars": 1484970 }, "switch_c_2048.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "t5_base.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "t5_large.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "t5_small.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "text_davinci_003.cc100-fr": { "vocab_size": 50281, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "tigerbot_13b_chat_v2.cc100-fr": { "vocab_size": 60515, "n_bytes": 1540504, "n_tokens": 447372, "n_chars": 1484970 }, "tigerbot_70b_chat_v4_4k.cc100-fr": { "vocab_size": 65110, "n_bytes": 1540504, "n_tokens": 448567, "n_chars": 1484970 }, "wizardcoder_15b_v1.cc100-fr": { "vocab_size": 49153, "n_bytes": 1540504, "n_tokens": 509958, "n_chars": 1484970 }, "wizardcoder_python_7b_v1.cc100-fr": { "vocab_size": 32001, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "wizardlm_7b_v1.cc100-fr": { "vocab_size": 32001, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "wizardmath_70b_v1.cc100-fr": { "vocab_size": 32002, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "xlm_roberta.cc100-fr": { "vocab_size": 250002, "n_bytes": 1540504, "n_tokens": 405041, "n_chars": 1484970 }, "yi_34b.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 533106, "n_chars": 1484970 }, "yi_6b.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 533106, "n_chars": 1484970 }, "yi_vl34b.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 532288, "n_chars": 1484970 }, "zephyr_7b_beta.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 }, "gpt_neox_japanese_2_7b.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 1121413, "n_chars": 1121360 }, "gpt_neox_japanese_2_7b.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1049033, "n_chars": 927311 }, "aya_101.cc100-ja": { "vocab_size": 250100, "n_bytes": 1774770, "n_tokens": 300542, "n_chars": 603065 }, "baichuan.cc100-ja": { "vocab_size": 64000, "n_bytes": 1774770, "n_tokens": 591656, "n_chars": 603065 }, "baichuan2.cc100-ja": { "vocab_size": 125696, "n_bytes": 1774770, "n_tokens": 554936, "n_chars": 603065 }, "bert_base_cased.cc100-ja": { "vocab_size": 28996, "n_bytes": 1774770, "n_tokens": 410492, "n_chars": 603065 }, "bert_base_chinese.cc100-ja": { "vocab_size": 21128, "n_bytes": 1774770, "n_tokens": 396831, "n_chars": 603065 }, "bert_base_uncased.cc100-ja": { "vocab_size": 30522, "n_bytes": 1774770, "n_tokens": 580634, "n_chars": 603065 }, "bloom.cc100-ja": { "vocab_size": 250680, "n_bytes": 1774770, "n_tokens": 523592, "n_chars": 603065 }, "byt5_small.cc100-ja": { "vocab_size": 384, "n_bytes": 1774770, "n_tokens": 1784770, "n_chars": 603065 }, "aya_101.cc100-ar": { "vocab_size": 250100, "n_bytes": 2813283, "n_tokens": 631736, "n_chars": 1560987 }, "baichuan.cc100-ar": { "vocab_size": 64000, "n_bytes": 2813283, "n_tokens": 1422976, "n_chars": 1560987 }, "baichuan2.cc100-ar": { "vocab_size": 125696, "n_bytes": 2813283, "n_tokens": 1337285, "n_chars": 1560987 }, "bert_base_cased.cc100-ar": { "vocab_size": 28996, "n_bytes": 2813283, "n_tokens": 1232449, "n_chars": 1560987 }, "bert_base_chinese.cc100-ar": { "vocab_size": 21128, "n_bytes": 2813283, "n_tokens": 536389, "n_chars": 1560987 }, "bert_base_uncased.cc100-ar": { "vocab_size": 30522, "n_bytes": 2813283, "n_tokens": 1269370, "n_chars": 1560987 }, "bloom.cc100-ar": { "vocab_size": 250680, "n_bytes": 2813283, "n_tokens": 427489, "n_chars": 1560987 }, "byt5_small.cc100-ar": { "vocab_size": 384, "n_bytes": 2813283, "n_tokens": 2823283, "n_chars": 1560987 }, "character_glm_6b.cc100-ar": { "vocab_size": 64789, "n_bytes": 2813283, "n_tokens": 1441847, "n_chars": 1560987 }, "chatglm2_6b.cc100-ar": { "vocab_size": 64787, "n_bytes": 2813283, "n_tokens": 1441847, "n_chars": 1560987 }, "chatglm3_6b.cc100-ar": { "vocab_size": 64796, "n_bytes": 2813283, "n_tokens": 1441847, "n_chars": 1560987 }, "chatglm_6b.cc100-ar": { "vocab_size": 150344, "n_bytes": 2813283, "n_tokens": 1097200, "n_chars": 1560987 }, "chatyuan_large_v2.cc100-ar": { "vocab_size": 32128, "n_bytes": 2813283, "n_tokens": 1006313, "n_chars": 1560987 }, "chinese_llama.cc100-ar": { "vocab_size": 49953, "n_bytes": 2813283, "n_tokens": 1421625, "n_chars": 1560987 }, "chinese_llama2.cc100-ar": { "vocab_size": 55296, "n_bytes": 2813283, "n_tokens": 1432081, "n_chars": 1560987 }, "code_davinci_002.cc100-ar": { "vocab_size": 50281, "n_bytes": 2813283, "n_tokens": 1558111, "n_chars": 1560987 }, "crystal_coder.cc100-ar": { "vocab_size": 32022, "n_bytes": 2813283, "n_tokens": 1422081, "n_chars": 1560987 }, "dbrx_instruct.cc100-ar": { "vocab_size": 100280, "n_bytes": 2813283, "n_tokens": 1105640, "n_chars": 1560987 }, "deepseek_coder_33b_instruct.cc100-ar": { "vocab_size": 32022, "n_bytes": 2813283, "n_tokens": 1958863, "n_chars": 1560987 }, "deepseek_llm_7b_base.cc100-ar": { "vocab_size": 100015, "n_bytes": 2813283, "n_tokens": 1426103, "n_chars": 1560987 }, "falcon_180b.cc100-ar": { "vocab_size": 65024, "n_bytes": 2813283, "n_tokens": 1597443, "n_chars": 1560987 }, "falcon_7b.cc100-ar": { "vocab_size": 65024, "n_bytes": 2813283, "n_tokens": 1597443, "n_chars": 1560987 }, "fastchat_t5_3b.cc100-ar": { "vocab_size": 32110, "n_bytes": 2813283, "n_tokens": 832267, "n_chars": 1560987 }, "flan_t5_base.cc100-ar": { "vocab_size": 32100, "n_bytes": 2813283, "n_tokens": 568957, "n_chars": 1560987 }, "gemma_7b.cc100-ar": { "vocab_size": 256000, "n_bytes": 2813283, "n_tokens": 573788, "n_chars": 1560987 }, "gpt2.cc100-ar": { "vocab_size": 50257, "n_bytes": 2813283, "n_tokens": 1558111, "n_chars": 1560987 }, "gpt2_chinese.cc100-ar": { "vocab_size": 21128, "n_bytes": 2813283, "n_tokens": 617677, "n_chars": 1560987 }, "gpt_35_turbo.cc100-ar": { "vocab_size": 100277, "n_bytes": 2813283, "n_tokens": 1105640, "n_chars": 1560987 }, "gpt_4.cc100-ar": { "vocab_size": 100277, "n_bytes": 2813283, "n_tokens": 1105640, "n_chars": 1560987 }, "gpt_neox_japanese_2_7b.cc100-ar": { "vocab_size": 32000, "n_bytes": 2813283, "n_tokens": 2809195, "n_chars": 1560987 }, "gpt_nexo_20b.cc100-ar": { "vocab_size": 50277, "n_bytes": 2813283, "n_tokens": 1106277, "n_chars": 1560987 }, "grok_1.cc100-ar": { "vocab_size": 131072, "n_bytes": 2813283, "n_tokens": 1392088, "n_chars": 1560987 }, "internlm2_chat_7b.cc100-ar": { "vocab_size": 92544, "n_bytes": 2813283, "n_tokens": 1635378, "n_chars": 1560987 }, "internlm2_math_7b.cc100-ar": { "vocab_size": 92544, "n_bytes": 2813283, "n_tokens": 1635378, "n_chars": 1560987 }, "internlm_chat_7b.cc100-ar": { "vocab_size": 103168, "n_bytes": 2813283, "n_tokens": 532046, "n_chars": 1560987 }, "internlm_xcomposer_7b.cc100-ar": { "vocab_size": 103168, "n_bytes": 2813283, "n_tokens": 532046, "n_chars": 1560987 }, "jamba_v0_1.cc100-ar": { "vocab_size": 65536, "n_bytes": 2813283, "n_tokens": 727886, "n_chars": 1560987 }, "kplug.cc100-ar": { "vocab_size": 10261, "n_bytes": 2813283, "n_tokens": 331987, "n_chars": 1560987 }, "llama.cc100-ar": { "vocab_size": 32000, "n_bytes": 2813283, "n_tokens": 1432081, "n_chars": 1560987 }, "llama2.cc100-ar": { "vocab_size": 32001, "n_bytes": 2813283, "n_tokens": 1432081, "n_chars": 1560987 }, "llama3.cc100-ar": { "vocab_size": 128256, "n_bytes": 2813283, "n_tokens": 615514, "n_chars": 1560987 }, "mistral_7b.cc100-ar": { "vocab_size": 32000, "n_bytes": 2813283, "n_tokens": 1406319, "n_chars": 1560987 }, "mixtral_8_7b.cc100-ar": { "vocab_size": 32000, "n_bytes": 2813283, "n_tokens": 1406319, "n_chars": 1560987 }, "mobilebert_uncased.cc100-ar": { "vocab_size": 30522, "n_bytes": 2813283, "n_tokens": 1269370, "n_chars": 1560987 }, "moss.cc100-ar": { "vocab_size": 106072, "n_bytes": 2813283, "n_tokens": 1557671, "n_chars": 1560987 }, "mt5_large.cc100-ar": { "vocab_size": 250100, "n_bytes": 2813283, "n_tokens": 631736, "n_chars": 1560987 }, "olmo_7b.cc100-ar": { "vocab_size": 50280, "n_bytes": 2813283, "n_tokens": 1106277, "n_chars": 1560987 }, "orion_14b_chat.cc100-ar": { "vocab_size": 84608, "n_bytes": 2813283, "n_tokens": 1531053, "n_chars": 1560987 }, "phi_1.cc100-ar": { "vocab_size": 50295, "n_bytes": 2813283, "n_tokens": 1558111, "n_chars": 1560987 }, "phi_2.cc100-ar": { "vocab_size": 50295, "n_bytes": 2813283, "n_tokens": 1558111, "n_chars": 1560987 }, "phi_3_mini.cc100-ar": { "vocab_size": 32011, "n_bytes": 2813283, "n_tokens": 1432081, "n_chars": 1560987 }, "pko_t5_large.cc100-ar": { "vocab_size": 50358, "n_bytes": 2813283, "n_tokens": 2815586, "n_chars": 1560987 }, "prompt_clue.cc100-ar": { "vocab_size": 32128, "n_bytes": 2813283, "n_tokens": 1006313, "n_chars": 1560987 }, "qwen1_5_14b_chat.cc100-ar": { "vocab_size": 151646, "n_bytes": 2813283, "n_tokens": 614959, "n_chars": 1560987 }, "qwen_1_8b_chat.cc100-ar": { "vocab_size": 151851, "n_bytes": 2813283, "n_tokens": 614959, "n_chars": 1560987 }, "qwen_72b_chat.cc100-ar": { "vocab_size": 151851, "n_bytes": 2813283, "n_tokens": 614959, "n_chars": 1560987 }, "qwen_7b_chat.cc100-ar": { "vocab_size": 151851, "n_bytes": 2813283, "n_tokens": 614959, "n_chars": 1560987 }, "roberta_chinese_clue.cc100-ar": { "vocab_size": 8021, "n_bytes": 2813283, "n_tokens": 621762, "n_chars": 1560987 }, "skywork_13b_base.cc100-ar": { "vocab_size": 65519, "n_bytes": 2813283, "n_tokens": 1432065, "n_chars": 1560987 }, "skywork_13b_math.cc100-ar": { "vocab_size": 65519, "n_bytes": 2813283, "n_tokens": 1432065, "n_chars": 1560987 }, "solar_10_7b.cc100-ar": { "vocab_size": 32000, "n_bytes": 2813283, "n_tokens": 1406319, "n_chars": 1560987 }, "starchat_alpha.cc100-ar": { "vocab_size": 49156, "n_bytes": 2813283, "n_tokens": 1195640, "n_chars": 1560987 }, "switch_c_2048.cc100-ar": { "vocab_size": 32100, "n_bytes": 2813283, "n_tokens": 568855, "n_chars": 1560987 }, "t5_base.cc100-ar": { "vocab_size": 32100, "n_bytes": 2813283, "n_tokens": 568855, "n_chars": 1560987 }, "t5_large.cc100-ar": { "vocab_size": 32100, "n_bytes": 2813283, "n_tokens": 568855, "n_chars": 1560987 }, "t5_small.cc100-ar": { "vocab_size": 32100, "n_bytes": 2813283, "n_tokens": 568855, "n_chars": 1560987 }, "text_davinci_003.cc100-ar": { "vocab_size": 50281, "n_bytes": 2813283, "n_tokens": 1558111, "n_chars": 1560987 }, "tigerbot_13b_chat_v2.cc100-ar": { "vocab_size": 60515, "n_bytes": 2813283, "n_tokens": 1422070, "n_chars": 1560987 }, "tigerbot_70b_chat_v4_4k.cc100-ar": { "vocab_size": 65110, "n_bytes": 2813283, "n_tokens": 1422073, "n_chars": 1560987 }, "wizardcoder_15b_v1.cc100-ar": { "vocab_size": 49153, "n_bytes": 2813283, "n_tokens": 1195640, "n_chars": 1560987 }, "wizardcoder_python_7b_v1.cc100-ar": { "vocab_size": 32001, "n_bytes": 2813283, "n_tokens": 1432081, "n_chars": 1560987 }, "wizardlm_7b_v1.cc100-ar": { "vocab_size": 32001, "n_bytes": 2813283, "n_tokens": 1432081, "n_chars": 1560987 }, "wizardmath_70b_v1.cc100-ar": { "vocab_size": 32002, "n_bytes": 2813283, "n_tokens": 1432081, "n_chars": 1560987 }, "xlm_roberta.cc100-ar": { "vocab_size": 250002, "n_bytes": 2813283, "n_tokens": 518287, "n_chars": 1560987 }, "yi_34b.cc100-ar": { "vocab_size": 64000, "n_bytes": 2813283, "n_tokens": 1795801, "n_chars": 1560987 }, "yi_6b.cc100-ar": { "vocab_size": 64000, "n_bytes": 2813283, "n_tokens": 1795801, "n_chars": 1560987 }, "yi_vl34b.cc100-ar": { "vocab_size": 64000, "n_bytes": 2813283, "n_tokens": 1803957, "n_chars": 1560987 }, "zephyr_7b_beta.cc100-ar": { "vocab_size": 32000, "n_bytes": 2813283, "n_tokens": 1406319, "n_chars": 1560987 }, "aya_101.cc100-de": { "vocab_size": 250100, "n_bytes": 1814876, "n_tokens": 480418, "n_chars": 1784021 }, "baichuan.cc100-de": { "vocab_size": 64000, "n_bytes": 1814876, "n_tokens": 680512, "n_chars": 1784021 }, "baichuan2.cc100-de": { "vocab_size": 125696, "n_bytes": 1814876, "n_tokens": 628063, "n_chars": 1784021 }, "bert_base_cased.cc100-de": { "vocab_size": 28996, "n_bytes": 1814876, "n_tokens": 731093, "n_chars": 1784021 }, "bert_base_chinese.cc100-de": { "vocab_size": 21128, "n_bytes": 1814876, "n_tokens": 561246, "n_chars": 1784021 }, "bert_base_uncased.cc100-de": { "vocab_size": 30522, "n_bytes": 1814876, "n_tokens": 646485, "n_chars": 1784021 }, "bloom.cc100-de": { "vocab_size": 250680, "n_bytes": 1814876, "n_tokens": 541170, "n_chars": 1784021 }, "byt5_small.cc100-de": { "vocab_size": 384, "n_bytes": 1814876, "n_tokens": 1824876, "n_chars": 1784021 }, "character_glm_6b.cc100-de": { "vocab_size": 64789, "n_bytes": 1814876, "n_tokens": 639822, "n_chars": 1784021 }, "chatglm2_6b.cc100-de": { "vocab_size": 64787, "n_bytes": 1814876, "n_tokens": 639757, "n_chars": 1784021 }, "chatglm3_6b.cc100-de": { "vocab_size": 64796, "n_bytes": 1814876, "n_tokens": 639822, "n_chars": 1784021 }, "chatglm_6b.cc100-de": { "vocab_size": 150344, "n_bytes": 1814876, "n_tokens": 589464, "n_chars": 1784021 }, "chatyuan_large_v2.cc100-de": { "vocab_size": 32128, "n_bytes": 1814876, "n_tokens": 970463, "n_chars": 1784021 }, "chinese_llama.cc100-de": { "vocab_size": 49953, "n_bytes": 1814876, "n_tokens": 523859, "n_chars": 1784021 }, "chinese_llama2.cc100-de": { "vocab_size": 55296, "n_bytes": 1814876, "n_tokens": 537318, "n_chars": 1784021 }, "code_davinci_002.cc100-de": { "vocab_size": 50281, "n_bytes": 1814876, "n_tokens": 684666, "n_chars": 1784021 }, "crystal_coder.cc100-de": { "vocab_size": 32022, "n_bytes": 1814876, "n_tokens": 527320, "n_chars": 1784021 }, "dbrx_instruct.cc100-de": { "vocab_size": 100280, "n_bytes": 1814876, "n_tokens": 500870, "n_chars": 1784021 }, "deepseek_coder_33b_instruct.cc100-de": { "vocab_size": 32022, "n_bytes": 1814876, "n_tokens": 745618, "n_chars": 1784021 }, "deepseek_llm_7b_base.cc100-de": { "vocab_size": 100015, "n_bytes": 1814876, "n_tokens": 642573, "n_chars": 1784021 }, "falcon_180b.cc100-de": { "vocab_size": 65024, "n_bytes": 1814876, "n_tokens": 497054, "n_chars": 1784021 }, "falcon_7b.cc100-de": { "vocab_size": 65024, "n_bytes": 1814876, "n_tokens": 497054, "n_chars": 1784021 }, "fastchat_t5_3b.cc100-de": { "vocab_size": 32110, "n_bytes": 1814876, "n_tokens": 736989, "n_chars": 1784021 }, "flan_t5_base.cc100-de": { "vocab_size": 32100, "n_bytes": 1814876, "n_tokens": 480254, "n_chars": 1784021 }, "gemma_7b.cc100-de": { "vocab_size": 256000, "n_bytes": 1814876, "n_tokens": 416876, "n_chars": 1784021 }, "gpt2.cc100-de": { "vocab_size": 50257, "n_bytes": 1814876, "n_tokens": 684669, "n_chars": 1784021 }, "gpt2_chinese.cc100-de": { "vocab_size": 21128, "n_bytes": 1814876, "n_tokens": 786497, "n_chars": 1784021 }, "gpt_35_turbo.cc100-de": { "vocab_size": 100277, "n_bytes": 1814876, "n_tokens": 500870, "n_chars": 1784021 }, "gpt_4.cc100-de": { "vocab_size": 100277, "n_bytes": 1814876, "n_tokens": 500870, "n_chars": 1784021 }, "gpt_neox_japanese_2_7b.cc100-de": { "vocab_size": 32000, "n_bytes": 1814876, "n_tokens": 1807780, "n_chars": 1784021 }, "gpt_nexo_20b.cc100-de": { "vocab_size": 50277, "n_bytes": 1814876, "n_tokens": 583628, "n_chars": 1784021 }, "grok_1.cc100-de": { "vocab_size": 131072, "n_bytes": 1814876, "n_tokens": 505220, "n_chars": 1784021 }, "internlm2_chat_7b.cc100-de": { "vocab_size": 92544, "n_bytes": 1814876, "n_tokens": 583917, "n_chars": 1784021 }, "internlm2_math_7b.cc100-de": { "vocab_size": 92544, "n_bytes": 1814876, "n_tokens": 583917, "n_chars": 1784021 }, "internlm_chat_7b.cc100-de": { "vocab_size": 103168, "n_bytes": 1814876, "n_tokens": 580489, "n_chars": 1784021 }, "internlm_xcomposer_7b.cc100-de": { "vocab_size": 103168, "n_bytes": 1814876, "n_tokens": 580489, "n_chars": 1784021 }, "jamba_v0_1.cc100-de": { "vocab_size": 65536, "n_bytes": 1814876, "n_tokens": 535856, "n_chars": 1784021 }, "kplug.cc100-de": { "vocab_size": 10261, "n_bytes": 1814876, "n_tokens": 789053, "n_chars": 1784021 }, "llama.cc100-de": { "vocab_size": 32000, "n_bytes": 1814876, "n_tokens": 537320, "n_chars": 1784021 }, "llama2.cc100-de": { "vocab_size": 32001, "n_bytes": 1814876, "n_tokens": 537320, "n_chars": 1784021 }, "llama3.cc100-de": { "vocab_size": 128256, "n_bytes": 1814876, "n_tokens": 499766, "n_chars": 1784021 }, "mistral_7b.cc100-de": { "vocab_size": 32000, "n_bytes": 1814876, "n_tokens": 577526, "n_chars": 1784021 }, "mixtral_8_7b.cc100-de": { "vocab_size": 32000, "n_bytes": 1814876, "n_tokens": 577526, "n_chars": 1784021 }, "mobilebert_uncased.cc100-de": { "vocab_size": 30522, "n_bytes": 1814876, "n_tokens": 646485, "n_chars": 1784021 }, "moss.cc100-de": { "vocab_size": 106072, "n_bytes": 1814876, "n_tokens": 683401, "n_chars": 1784021 }, "mt5_large.cc100-de": { "vocab_size": 250100, "n_bytes": 1814876, "n_tokens": 480418, "n_chars": 1784021 }, "olmo_7b.cc100-de": { "vocab_size": 50280, "n_bytes": 1814876, "n_tokens": 583628, "n_chars": 1784021 }, "orion_14b_chat.cc100-de": { "vocab_size": 84608, "n_bytes": 1814876, "n_tokens": 744404, "n_chars": 1784021 }, "phi_1.cc100-de": { "vocab_size": 50295, "n_bytes": 1814876, "n_tokens": 684665, "n_chars": 1784021 }, "phi_2.cc100-de": { "vocab_size": 50295, "n_bytes": 1814876, "n_tokens": 684665, "n_chars": 1784021 }, "phi_3_mini.cc100-de": { "vocab_size": 32011, "n_bytes": 1814876, "n_tokens": 537320, "n_chars": 1784021 }, "pko_t5_large.cc100-de": { "vocab_size": 50358, "n_bytes": 1814876, "n_tokens": 1254350, "n_chars": 1784021 }, "prompt_clue.cc100-de": { "vocab_size": 32128, "n_bytes": 1814876, "n_tokens": 970463, "n_chars": 1784021 }, "qwen1_5_14b_chat.cc100-de": { "vocab_size": 151646, "n_bytes": 1814876, "n_tokens": 503561, "n_chars": 1784021 }, "qwen_1_8b_chat.cc100-de": { "vocab_size": 151851, "n_bytes": 1814876, "n_tokens": 503561, "n_chars": 1784021 }, "qwen_72b_chat.cc100-de": { "vocab_size": 151851, "n_bytes": 1814876, "n_tokens": 503561, "n_chars": 1784021 }, "qwen_7b_chat.cc100-de": { "vocab_size": 151851, "n_bytes": 1814876, "n_tokens": 503561, "n_chars": 1784021 }, "roberta_chinese_clue.cc100-de": { "vocab_size": 8021, "n_bytes": 1814876, "n_tokens": 915612, "n_chars": 1784021 }, "skywork_13b_base.cc100-de": { "vocab_size": 65519, "n_bytes": 1814876, "n_tokens": 537308, "n_chars": 1784021 }, "skywork_13b_math.cc100-de": { "vocab_size": 65519, "n_bytes": 1814876, "n_tokens": 537308, "n_chars": 1784021 }, "solar_10_7b.cc100-de": { "vocab_size": 32000, "n_bytes": 1814876, "n_tokens": 577526, "n_chars": 1784021 }, "starchat_alpha.cc100-de": { "vocab_size": 49156, "n_bytes": 1814876, "n_tokens": 620541, "n_chars": 1784021 }, "switch_c_2048.cc100-de": { "vocab_size": 32100, "n_bytes": 1814876, "n_tokens": 480254, "n_chars": 1784021 }, "t5_base.cc100-de": { "vocab_size": 32100, "n_bytes": 1814876, "n_tokens": 480254, "n_chars": 1784021 }, "t5_large.cc100-de": { "vocab_size": 32100, "n_bytes": 1814876, "n_tokens": 480254, "n_chars": 1784021 }, "t5_small.cc100-de": { "vocab_size": 32100, "n_bytes": 1814876, "n_tokens": 480254, "n_chars": 1784021 }, "text_davinci_003.cc100-de": { "vocab_size": 50281, "n_bytes": 1814876, "n_tokens": 684666, "n_chars": 1784021 }, "tigerbot_13b_chat_v2.cc100-de": { "vocab_size": 60515, "n_bytes": 1814876, "n_tokens": 528918, "n_chars": 1784021 }, "tigerbot_70b_chat_v4_4k.cc100-de": { "vocab_size": 65110, "n_bytes": 1814876, "n_tokens": 529170, "n_chars": 1784021 }, "wizardcoder_15b_v1.cc100-de": { "vocab_size": 49153, "n_bytes": 1814876, "n_tokens": 620541, "n_chars": 1784021 }, "wizardcoder_python_7b_v1.cc100-de": { "vocab_size": 32001, "n_bytes": 1814876, "n_tokens": 537320, "n_chars": 1784021 }, "wizardlm_7b_v1.cc100-de": { "vocab_size": 32001, "n_bytes": 1814876, "n_tokens": 537320, "n_chars": 1784021 }, "wizardmath_70b_v1.cc100-de": { "vocab_size": 32002, "n_bytes": 1814876, "n_tokens": 537320, "n_chars": 1784021 }, "xlm_roberta.cc100-de": { "vocab_size": 250002, "n_bytes": 1814876, "n_tokens": 432571, "n_chars": 1784021 }, "yi_34b.cc100-de": { "vocab_size": 64000, "n_bytes": 1814876, "n_tokens": 698366, "n_chars": 1784021 }, "yi_6b.cc100-de": { "vocab_size": 64000, "n_bytes": 1814876, "n_tokens": 698366, "n_chars": 1784021 }, "yi_vl34b.cc100-de": { "vocab_size": 64000, "n_bytes": 1814876, "n_tokens": 697065, "n_chars": 1784021 }, "zephyr_7b_beta.cc100-de": { "vocab_size": 32000, "n_bytes": 1814876, "n_tokens": 577526, "n_chars": 1784021 }, "gpt_neox_japanese_2_7b.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 1658946, "n_chars": 1630297 }, "gpt_neox_japanese_2_7b.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 1524129, "n_chars": 1484970 }, "character_glm_6b.cc100-ja": { "vocab_size": 64789, "n_bytes": 1774770, "n_tokens": 601380, "n_chars": 603065 }, "chatglm2_6b.cc100-ja": { "vocab_size": 64787, "n_bytes": 1774770, "n_tokens": 601380, "n_chars": 603065 }, "chatglm3_6b.cc100-ja": { "vocab_size": 64796, "n_bytes": 1774770, "n_tokens": 601380, "n_chars": 603065 }, "chatglm_6b.cc100-ja": { "vocab_size": 150344, "n_bytes": 1774770, "n_tokens": 489930, "n_chars": 603065 }, "chatyuan_large_v2.cc100-ja": { "vocab_size": 32128, "n_bytes": 1774770, "n_tokens": 575118, "n_chars": 603065 }, "chinese_llama.cc100-ja": { "vocab_size": 49953, "n_bytes": 1774770, "n_tokens": 614177, "n_chars": 603065 }, "chinese_llama2.cc100-ja": { "vocab_size": 55296, "n_bytes": 1774770, "n_tokens": 624362, "n_chars": 603065 }, "code_davinci_002.cc100-ja": { "vocab_size": 50281, "n_bytes": 1774770, "n_tokens": 844362, "n_chars": 603065 }, "crystal_coder.cc100-ja": { "vocab_size": 32022, "n_bytes": 1774770, "n_tokens": 718461, "n_chars": 603065 }, "dbrx_instruct.cc100-ja": { "vocab_size": 100280, "n_bytes": 1774770, "n_tokens": 630348, "n_chars": 603065 }, "deepseek_coder_33b_instruct.cc100-ja": { "vocab_size": 32022, "n_bytes": 1774770, "n_tokens": 1018060, "n_chars": 603065 }, "deepseek_llm_7b_base.cc100-ja": { "vocab_size": 100015, "n_bytes": 1774770, "n_tokens": 761467, "n_chars": 603065 }, "falcon_180b.cc100-ja": { "vocab_size": 65024, "n_bytes": 1774770, "n_tokens": 842458, "n_chars": 603065 }, "falcon_7b.cc100-ja": { "vocab_size": 65024, "n_bytes": 1774770, "n_tokens": 842458, "n_chars": 603065 }, "fastchat_t5_3b.cc100-ja": { "vocab_size": 32110, "n_bytes": 1774770, "n_tokens": 53915, "n_chars": 603065 }, "flan_t5_base.cc100-ja": { "vocab_size": 32100, "n_bytes": 1774770, "n_tokens": 51999, "n_chars": 603065 }, "gemma_7b.cc100-ja": { "vocab_size": 256000, "n_bytes": 1774770, "n_tokens": 317873, "n_chars": 603065 }, "gpt2.cc100-ja": { "vocab_size": 50257, "n_bytes": 1774770, "n_tokens": 844362, "n_chars": 603065 }, "gpt2_chinese.cc100-ja": { "vocab_size": 21128, "n_bytes": 1774770, "n_tokens": 503085, "n_chars": 603065 }, "gpt_35_turbo.cc100-ja": { "vocab_size": 100277, "n_bytes": 1774770, "n_tokens": 630348, "n_chars": 603065 }, "gpt_4.cc100-ja": { "vocab_size": 100277, "n_bytes": 1774770, "n_tokens": 630348, "n_chars": 603065 }, "gpt_neox_japanese_2_7b.cc100-ja": { "vocab_size": 32000, "n_bytes": 1774770, "n_tokens": 410803, "n_chars": 603065 }, "gpt_nexo_20b.cc100-ja": { "vocab_size": 50277, "n_bytes": 1774770, "n_tokens": 605168, "n_chars": 603065 }, "grok_1.cc100-ja": { "vocab_size": 131072, "n_bytes": 1774770, "n_tokens": 497590, "n_chars": 603065 }, "internlm2_chat_7b.cc100-ja": { "vocab_size": 92544, "n_bytes": 1774770, "n_tokens": 595803, "n_chars": 603065 }, "internlm2_math_7b.cc100-ja": { "vocab_size": 92544, "n_bytes": 1774770, "n_tokens": 595803, "n_chars": 603065 }, "internlm_chat_7b.cc100-ja": { "vocab_size": 103168, "n_bytes": 1774770, "n_tokens": 448212, "n_chars": 603065 }, "internlm_xcomposer_7b.cc100-ja": { "vocab_size": 103168, "n_bytes": 1774770, "n_tokens": 448212, "n_chars": 603065 }, "jamba_v0_1.cc100-ja": { "vocab_size": 65536, "n_bytes": 1774770, "n_tokens": 683256, "n_chars": 603065 }, "kplug.cc100-ja": { "vocab_size": 10261, "n_bytes": 1774770, "n_tokens": 338023, "n_chars": 603065 }, "llama.cc100-ja": { "vocab_size": 32000, "n_bytes": 1774770, "n_tokens": 728461, "n_chars": 603065 }, "llama2.cc100-ja": { "vocab_size": 32001, "n_bytes": 1774770, "n_tokens": 728461, "n_chars": 603065 }, "llama3.cc100-ja": { "vocab_size": 128256, "n_bytes": 1774770, "n_tokens": 414715, "n_chars": 603065 }, "mistral_7b.cc100-ja": { "vocab_size": 32000, "n_bytes": 1774770, "n_tokens": 685134, "n_chars": 603065 }, "mixtral_8_7b.cc100-ja": { "vocab_size": 32000, "n_bytes": 1774770, "n_tokens": 685134, "n_chars": 603065 }, "mobilebert_uncased.cc100-ja": { "vocab_size": 30522, "n_bytes": 1774770, "n_tokens": 580634, "n_chars": 603065 }, "moss.cc100-ja": { "vocab_size": 106072, "n_bytes": 1774770, "n_tokens": 600011, "n_chars": 603065 }, "mt5_large.cc100-ja": { "vocab_size": 250100, "n_bytes": 1774770, "n_tokens": 300542, "n_chars": 603065 }, "olmo_7b.cc100-ja": { "vocab_size": 50280, "n_bytes": 1774770, "n_tokens": 605168, "n_chars": 603065 }, "orion_14b_chat.cc100-ja": { "vocab_size": 84608, "n_bytes": 1774770, "n_tokens": 324956, "n_chars": 603065 }, "phi_1.cc100-ja": { "vocab_size": 50295, "n_bytes": 1774770, "n_tokens": 844362, "n_chars": 603065 }, "phi_2.cc100-ja": { "vocab_size": 50295, "n_bytes": 1774770, "n_tokens": 844362, "n_chars": 603065 }, "phi_3_mini.cc100-ja": { "vocab_size": 32011, "n_bytes": 1774770, "n_tokens": 728461, "n_chars": 603065 }, "pko_t5_large.cc100-ja": { "vocab_size": 50358, "n_bytes": 1774770, "n_tokens": 1766950, "n_chars": 603065 }, "prompt_clue.cc100-ja": { "vocab_size": 32128, "n_bytes": 1774770, "n_tokens": 575118, "n_chars": 603065 }, "qwen1_5_14b_chat.cc100-ja": { "vocab_size": 151646, "n_bytes": 1774770, "n_tokens": 377144, "n_chars": 603065 }, "qwen_1_8b_chat.cc100-ja": { "vocab_size": 151851, "n_bytes": 1774770, "n_tokens": 377144, "n_chars": 603065 }, "qwen_72b_chat.cc100-ja": { "vocab_size": 151851, "n_bytes": 1774770, "n_tokens": 377144, "n_chars": 603065 }, "qwen_7b_chat.cc100-ja": { "vocab_size": 151851, "n_bytes": 1774770, "n_tokens": 377144, "n_chars": 603065 }, "roberta_chinese_clue.cc100-ja": { "vocab_size": 8021, "n_bytes": 1774770, "n_tokens": 339411, "n_chars": 603065 }, "skywork_13b_base.cc100-ja": { "vocab_size": 65519, "n_bytes": 1774770, "n_tokens": 603613, "n_chars": 603065 }, "skywork_13b_math.cc100-ja": { "vocab_size": 65519, "n_bytes": 1774770, "n_tokens": 603613, "n_chars": 603065 }, "solar_10_7b.cc100-ja": { "vocab_size": 32000, "n_bytes": 1774770, "n_tokens": 685134, "n_chars": 603065 }, "starchat_alpha.cc100-ja": { "vocab_size": 49156, "n_bytes": 1774770, "n_tokens": 546876, "n_chars": 603065 }, "switch_c_2048.cc100-ja": { "vocab_size": 32100, "n_bytes": 1774770, "n_tokens": 51947, "n_chars": 603065 }, "t5_base.cc100-ja": { "vocab_size": 32100, "n_bytes": 1774770, "n_tokens": 51947, "n_chars": 603065 }, "t5_large.cc100-ja": { "vocab_size": 32100, "n_bytes": 1774770, "n_tokens": 51947, "n_chars": 603065 }, "t5_small.cc100-ja": { "vocab_size": 32100, "n_bytes": 1774770, "n_tokens": 51947, "n_chars": 603065 }, "text_davinci_003.cc100-ja": { "vocab_size": 50281, "n_bytes": 1774770, "n_tokens": 844362, "n_chars": 603065 }, "tigerbot_13b_chat_v2.cc100-ja": { "vocab_size": 60515, "n_bytes": 1774770, "n_tokens": 567792, "n_chars": 603065 }, "tigerbot_70b_chat_v4_4k.cc100-ja": { "vocab_size": 65110, "n_bytes": 1774770, "n_tokens": 406571, "n_chars": 603065 }, "wizardcoder_15b_v1.cc100-ja": { "vocab_size": 49153, "n_bytes": 1774770, "n_tokens": 546876, "n_chars": 603065 }, "wizardcoder_python_7b_v1.cc100-ja": { "vocab_size": 32001, "n_bytes": 1774770, "n_tokens": 728461, "n_chars": 603065 }, "wizardlm_7b_v1.cc100-ja": { "vocab_size": 32001, "n_bytes": 1774770, "n_tokens": 728461, "n_chars": 603065 }, "wizardmath_70b_v1.cc100-ja": { "vocab_size": 32002, "n_bytes": 1774770, "n_tokens": 728461, "n_chars": 603065 }, "xlm_roberta.cc100-ja": { "vocab_size": 250002, "n_bytes": 1774770, "n_tokens": 344820, "n_chars": 603065 }, "yi_34b.cc100-ja": { "vocab_size": 64000, "n_bytes": 1774770, "n_tokens": 740791, "n_chars": 603065 }, "yi_6b.cc100-ja": { "vocab_size": 64000, "n_bytes": 1774770, "n_tokens": 740791, "n_chars": 603065 }, "yi_vl34b.cc100-ja": { "vocab_size": 64000, "n_bytes": 1774770, "n_tokens": 749927, "n_chars": 603065 }, "zephyr_7b_beta.cc100-ja": { "vocab_size": 32000, "n_bytes": 1774770, "n_tokens": 685134, "n_chars": 603065 }, "llama_3_chinese_8b.cc100-ar": { "vocab_size": 128256, "n_bytes": 2813283, "n_tokens": 625514, "n_chars": 1560987 }, "llama_3_chinese_8b.cc100-de": { "vocab_size": 128256, "n_bytes": 1814876, "n_tokens": 509766, "n_chars": 1784021 }, "llama_3_chinese_8b.cc100-en": { "vocab_size": 128256, "n_bytes": 1124813, "n_tokens": 264944, "n_chars": 1121360 }, "llama_3_chinese_8b.cc100-es": { "vocab_size": 128256, "n_bytes": 1664455, "n_tokens": 443289, "n_chars": 1630297 }, "aya_101.cc100-fa": { "vocab_size": 250100, "n_bytes": 2054052, "n_tokens": 429922, "n_chars": 1145876 }, "baichuan.cc100-fa": { "vocab_size": 64000, "n_bytes": 2054052, "n_tokens": 1142057, "n_chars": 1145876 }, "baichuan2.cc100-fa": { "vocab_size": 125696, "n_bytes": 2054052, "n_tokens": 1052077, "n_chars": 1145876 }, "bert_base_cased.cc100-fa": { "vocab_size": 28996, "n_bytes": 2054052, "n_tokens": 903078, "n_chars": 1145876 }, "bert_base_chinese.cc100-fa": { "vocab_size": 21128, "n_bytes": 2054052, "n_tokens": 396414, "n_chars": 1145876 }, "bert_base_uncased.cc100-fa": { "vocab_size": 30522, "n_bytes": 2054052, "n_tokens": 910783, "n_chars": 1145876 }, "bloom.cc100-fa": { "vocab_size": 250680, "n_bytes": 2054052, "n_tokens": 434406, "n_chars": 1145876 }, "byt5_small.cc100-fa": { "vocab_size": 384, "n_bytes": 2054052, "n_tokens": 2064052, "n_chars": 1145876 }, "character_glm_6b.cc100-fa": { "vocab_size": 64789, "n_bytes": 2054052, "n_tokens": 1165051, "n_chars": 1145876 }, "chatglm2_6b.cc100-fa": { "vocab_size": 64787, "n_bytes": 2054052, "n_tokens": 1165051, "n_chars": 1145876 }, "chatglm3_6b.cc100-fa": { "vocab_size": 64796, "n_bytes": 2054052, "n_tokens": 1165051, "n_chars": 1145876 }, "chatglm_6b.cc100-fa": { "vocab_size": 150344, "n_bytes": 2054052, "n_tokens": 910808, "n_chars": 1145876 }, "chatyuan_large_v2.cc100-fa": { "vocab_size": 32128, "n_bytes": 2054052, "n_tokens": 740377, "n_chars": 1145876 }, "chinese_llama.cc100-fa": { "vocab_size": 49953, "n_bytes": 2054052, "n_tokens": 1150750, "n_chars": 1145876 }, "chinese_llama2.cc100-fa": { "vocab_size": 55296, "n_bytes": 2054052, "n_tokens": 1155078, "n_chars": 1145876 }, "code_davinci_002.cc100-fa": { "vocab_size": 50281, "n_bytes": 2054052, "n_tokens": 1292300, "n_chars": 1145876 }, "crystal_coder.cc100-fa": { "vocab_size": 32022, "n_bytes": 2054052, "n_tokens": 1145076, "n_chars": 1145876 }, "dbrx_instruct.cc100-fa": { "vocab_size": 100280, "n_bytes": 2054052, "n_tokens": 818067, "n_chars": 1145876 }, "deepseek_coder_33b_instruct.cc100-fa": { "vocab_size": 32022, "n_bytes": 2054052, "n_tokens": 1326109, "n_chars": 1145876 }, "deepseek_llm_7b_base.cc100-fa": { "vocab_size": 100015, "n_bytes": 2054052, "n_tokens": 973451, "n_chars": 1145876 }, "falcon_180b.cc100-fa": { "vocab_size": 65024, "n_bytes": 2054052, "n_tokens": 1246580, "n_chars": 1145876 }, "falcon_7b.cc100-fa": { "vocab_size": 65024, "n_bytes": 2054052, "n_tokens": 1246580, "n_chars": 1145876 }, "fastchat_t5_3b.cc100-fa": { "vocab_size": 32110, "n_bytes": 2054052, "n_tokens": 712443, "n_chars": 1145876 }, "flan_t5_base.cc100-fa": { "vocab_size": 32100, "n_bytes": 2054052, "n_tokens": 493779, "n_chars": 1145876 }, "gemma_7b.cc100-fa": { "vocab_size": 256000, "n_bytes": 2054052, "n_tokens": 373762, "n_chars": 1145876 }, "gpt2.cc100-fa": { "vocab_size": 50257, "n_bytes": 2054052, "n_tokens": 1292300, "n_chars": 1145876 }, "gpt2_chinese.cc100-fa": { "vocab_size": 21128, "n_bytes": 2054052, "n_tokens": 406174, "n_chars": 1145876 }, "gpt_35_turbo.cc100-fa": { "vocab_size": 100277, "n_bytes": 2054052, "n_tokens": 818067, "n_chars": 1145876 }, "gpt_4.cc100-fa": { "vocab_size": 100277, "n_bytes": 2054052, "n_tokens": 818067, "n_chars": 1145876 }, "gpt_neox_japanese_2_7b.cc100-fa": { "vocab_size": 32000, "n_bytes": 2054052, "n_tokens": 2036715, "n_chars": 1145876 }, "gpt_nexo_20b.cc100-fa": { "vocab_size": 50277, "n_bytes": 2054052, "n_tokens": 866434, "n_chars": 1145876 }, "grok_1.cc100-fa": { "vocab_size": 131072, "n_bytes": 2054052, "n_tokens": 1073281, "n_chars": 1145876 }, "internlm2_chat_7b.cc100-fa": { "vocab_size": 92544, "n_bytes": 2054052, "n_tokens": 1195032, "n_chars": 1145876 }, "internlm2_math_7b.cc100-fa": { "vocab_size": 92544, "n_bytes": 2054052, "n_tokens": 1195032, "n_chars": 1145876 }, "internlm_chat_7b.cc100-fa": { "vocab_size": 103168, "n_bytes": 2054052, "n_tokens": 640945, "n_chars": 1145876 }, "internlm_xcomposer_7b.cc100-fa": { "vocab_size": 103168, "n_bytes": 2054052, "n_tokens": 640945, "n_chars": 1145876 }, "jamba_v0_1.cc100-fa": { "vocab_size": 65536, "n_bytes": 2054052, "n_tokens": 732550, "n_chars": 1145876 }, "kplug.cc100-fa": { "vocab_size": 10261, "n_bytes": 2054052, "n_tokens": 274671, "n_chars": 1145876 }, "llama.cc100-fa": { "vocab_size": 32000, "n_bytes": 2054052, "n_tokens": 1155076, "n_chars": 1145876 }, "llama2.cc100-fa": { "vocab_size": 32001, "n_bytes": 2054052, "n_tokens": 1155076, "n_chars": 1145876 }, "llama3.cc100-fa": { "vocab_size": 128256, "n_bytes": 2054052, "n_tokens": 387448, "n_chars": 1145876 }, "llama_3_chinese_8b.cc100-fa": { "vocab_size": 128256, "n_bytes": 2054052, "n_tokens": 397448, "n_chars": 1145876 }, "mistral_7b.cc100-fa": { "vocab_size": 32000, "n_bytes": 2054052, "n_tokens": 1133278, "n_chars": 1145876 }, "mixtral_8_7b.cc100-fa": { "vocab_size": 32000, "n_bytes": 2054052, "n_tokens": 1133278, "n_chars": 1145876 }, "mobilebert_uncased.cc100-fa": { "vocab_size": 30522, "n_bytes": 2054052, "n_tokens": 910783, "n_chars": 1145876 }, "moss.cc100-fa": { "vocab_size": 106072, "n_bytes": 2054052, "n_tokens": 1285426, "n_chars": 1145876 }, "mt5_large.cc100-fa": { "vocab_size": 250100, "n_bytes": 2054052, "n_tokens": 429922, "n_chars": 1145876 }, "olmo_7b.cc100-fa": { "vocab_size": 50280, "n_bytes": 2054052, "n_tokens": 866434, "n_chars": 1145876 }, "orion_14b_chat.cc100-fa": { "vocab_size": 84608, "n_bytes": 2054052, "n_tokens": 1131108, "n_chars": 1145876 }, "phi_1.cc100-fa": { "vocab_size": 50295, "n_bytes": 2054052, "n_tokens": 1292300, "n_chars": 1145876 }, "phi_2.cc100-fa": { "vocab_size": 50295, "n_bytes": 2054052, "n_tokens": 1292300, "n_chars": 1145876 }, "phi_3_mini.cc100-fa": { "vocab_size": 32011, "n_bytes": 2054052, "n_tokens": 1155076, "n_chars": 1145876 }, "pko_t5_large.cc100-fa": { "vocab_size": 50358, "n_bytes": 2054052, "n_tokens": 2061040, "n_chars": 1145876 }, "prompt_clue.cc100-fa": { "vocab_size": 32128, "n_bytes": 2054052, "n_tokens": 740377, "n_chars": 1145876 }, "qwen1_5_14b_chat.cc100-fa": { "vocab_size": 151646, "n_bytes": 2054052, "n_tokens": 643421, "n_chars": 1145876 }, "qwen_1_8b_chat.cc100-fa": { "vocab_size": 151851, "n_bytes": 2054052, "n_tokens": 643421, "n_chars": 1145876 }, "qwen_72b_chat.cc100-fa": { "vocab_size": 151851, "n_bytes": 2054052, "n_tokens": 643421, "n_chars": 1145876 }, "qwen_7b_chat.cc100-fa": { "vocab_size": 151851, "n_bytes": 2054052, "n_tokens": 643421, "n_chars": 1145876 }, "roberta_chinese_clue.cc100-fa": { "vocab_size": 8021, "n_bytes": 2054052, "n_tokens": 407763, "n_chars": 1145876 }, "skywork_13b_base.cc100-fa": { "vocab_size": 65519, "n_bytes": 2054052, "n_tokens": 1155072, "n_chars": 1145876 }, "skywork_13b_math.cc100-fa": { "vocab_size": 65519, "n_bytes": 2054052, "n_tokens": 1155072, "n_chars": 1145876 }, "solar_10_7b.cc100-fa": { "vocab_size": 32000, "n_bytes": 2054052, "n_tokens": 1133278, "n_chars": 1145876 }, "starchat_alpha.cc100-fa": { "vocab_size": 49156, "n_bytes": 2054052, "n_tokens": 851630, "n_chars": 1145876 }, "switch_c_2048.cc100-fa": { "vocab_size": 32100, "n_bytes": 2054052, "n_tokens": 493767, "n_chars": 1145876 }, "t5_base.cc100-fa": { "vocab_size": 32100, "n_bytes": 2054052, "n_tokens": 493767, "n_chars": 1145876 }, "t5_large.cc100-fa": { "vocab_size": 32100, "n_bytes": 2054052, "n_tokens": 493767, "n_chars": 1145876 }, "t5_small.cc100-fa": { "vocab_size": 32100, "n_bytes": 2054052, "n_tokens": 493767, "n_chars": 1145876 }, "text_davinci_003.cc100-fa": { "vocab_size": 50281, "n_bytes": 2054052, "n_tokens": 1292300, "n_chars": 1145876 }, "tigerbot_13b_chat_v2.cc100-fa": { "vocab_size": 60515, "n_bytes": 2054052, "n_tokens": 1145046, "n_chars": 1145876 }, "tigerbot_70b_chat_v4_4k.cc100-fa": { "vocab_size": 65110, "n_bytes": 2054052, "n_tokens": 1145048, "n_chars": 1145876 }, "wizardcoder_15b_v1.cc100-fa": { "vocab_size": 49153, "n_bytes": 2054052, "n_tokens": 851630, "n_chars": 1145876 }, "wizardcoder_python_7b_v1.cc100-fa": { "vocab_size": 32001, "n_bytes": 2054052, "n_tokens": 1155076, "n_chars": 1145876 }, "wizardlm_7b_v1.cc100-fa": { "vocab_size": 32001, "n_bytes": 2054052, "n_tokens": 1155076, "n_chars": 1145876 }, "wizardmath_70b_v1.cc100-fa": { "vocab_size": 32002, "n_bytes": 2054052, "n_tokens": 1155076, "n_chars": 1145876 }, "xlm_roberta.cc100-fa": { "vocab_size": 250002, "n_bytes": 2054052, "n_tokens": 330926, "n_chars": 1145876 }, "yi_34b.cc100-fa": { "vocab_size": 64000, "n_bytes": 2054052, "n_tokens": 1337264, "n_chars": 1145876 }, "yi_6b.cc100-fa": { "vocab_size": 64000, "n_bytes": 2054052, "n_tokens": 1337264, "n_chars": 1145876 }, "yi_vl34b.cc100-fa": { "vocab_size": 64000, "n_bytes": 2054052, "n_tokens": 1346819, "n_chars": 1145876 }, "zephyr_7b_beta.cc100-fa": { "vocab_size": 32000, "n_bytes": 2054052, "n_tokens": 1133278, "n_chars": 1145876 }, "llama_3_chinese_8b.cc100-fr": { "vocab_size": 128256, "n_bytes": 1540504, "n_tokens": 422146, "n_chars": 1484970 }, "llama_3_chinese_8b.cc100-ja": { "vocab_size": 128256, "n_bytes": 1774770, "n_tokens": 424715, "n_chars": 603065 }, "aya_101.cc100-ko": { "vocab_size": 250100, "n_bytes": 1524839, "n_tokens": 434586, "n_chars": 655190 }, "baichuan.cc100-ko": { "vocab_size": 64000, "n_bytes": 1524839, "n_tokens": 639258, "n_chars": 655190 }, "baichuan2.cc100-ko": { "vocab_size": 125696, "n_bytes": 1524839, "n_tokens": 623358, "n_chars": 655190 }, "bert_base_cased.cc100-ko": { "vocab_size": 28996, "n_bytes": 1524839, "n_tokens": 222828, "n_chars": 655190 }, "bert_base_chinese.cc100-ko": { "vocab_size": 21128, "n_bytes": 1524839, "n_tokens": 219752, "n_chars": 655190 }, "bert_base_uncased.cc100-ko": { "vocab_size": 30522, "n_bytes": 1524839, "n_tokens": 904756, "n_chars": 655190 }, "bloom.cc100-ko": { "vocab_size": 250680, "n_bytes": 1524839, "n_tokens": 742111, "n_chars": 655190 }, "byt5_small.cc100-ko": { "vocab_size": 384, "n_bytes": 1524839, "n_tokens": 1534839, "n_chars": 655190 }, "character_glm_6b.cc100-ko": { "vocab_size": 64789, "n_bytes": 1524839, "n_tokens": 672160, "n_chars": 655190 }, "chatglm2_6b.cc100-ko": { "vocab_size": 64787, "n_bytes": 1524839, "n_tokens": 672156, "n_chars": 655190 }, "chatglm3_6b.cc100-ko": { "vocab_size": 64796, "n_bytes": 1524839, "n_tokens": 672160, "n_chars": 655190 }, "chatglm_6b.cc100-ko": { "vocab_size": 150344, "n_bytes": 1524839, "n_tokens": 939630, "n_chars": 655190 }, "chatyuan_large_v2.cc100-ko": { "vocab_size": 32128, "n_bytes": 1524839, "n_tokens": 354411, "n_chars": 655190 }, "chinese_llama.cc100-ko": { "vocab_size": 49953, "n_bytes": 1524839, "n_tokens": 913553, "n_chars": 655190 }, "chinese_llama2.cc100-ko": { "vocab_size": 55296, "n_bytes": 1524839, "n_tokens": 963427, "n_chars": 655190 }, "code_davinci_002.cc100-ko": { "vocab_size": 50281, "n_bytes": 1524839, "n_tokens": 1308993, "n_chars": 655190 }, "crystal_coder.cc100-ko": { "vocab_size": 32022, "n_bytes": 1524839, "n_tokens": 954428, "n_chars": 655190 }, "dbrx_instruct.cc100-ko": { "vocab_size": 100280, "n_bytes": 1524839, "n_tokens": 652277, "n_chars": 655190 }, "deepseek_coder_33b_instruct.cc100-ko": { "vocab_size": 32022, "n_bytes": 1524839, "n_tokens": 1454805, "n_chars": 655190 }, "deepseek_llm_7b_base.cc100-ko": { "vocab_size": 100015, "n_bytes": 1524839, "n_tokens": 1081983, "n_chars": 655190 }, "falcon_180b.cc100-ko": { "vocab_size": 65024, "n_bytes": 1524839, "n_tokens": 1330568, "n_chars": 655190 }, "falcon_7b.cc100-ko": { "vocab_size": 65024, "n_bytes": 1524839, "n_tokens": 1330568, "n_chars": 655190 }, "fastchat_t5_3b.cc100-ko": { "vocab_size": 32110, "n_bytes": 1524839, "n_tokens": 484953, "n_chars": 655190 }, "flan_t5_base.cc100-ko": { "vocab_size": 32100, "n_bytes": 1524839, "n_tokens": 344457, "n_chars": 655190 }, "gemma_7b.cc100-ko": { "vocab_size": 256000, "n_bytes": 1524839, "n_tokens": 464410, "n_chars": 655190 }, "gpt2.cc100-ko": { "vocab_size": 50257, "n_bytes": 1524839, "n_tokens": 1309029, "n_chars": 655190 }, "gpt2_chinese.cc100-ko": { "vocab_size": 21128, "n_bytes": 1524839, "n_tokens": 1055974, "n_chars": 655190 }, "gpt_35_turbo.cc100-ko": { "vocab_size": 100277, "n_bytes": 1524839, "n_tokens": 652277, "n_chars": 655190 }, "gpt_4.cc100-ko": { "vocab_size": 100277, "n_bytes": 1524839, "n_tokens": 652277, "n_chars": 655190 }, "gpt_neox_japanese_2_7b.cc100-ko": { "vocab_size": 32000, "n_bytes": 1524839, "n_tokens": 1512832, "n_chars": 655190 }, "gpt_nexo_20b.cc100-ko": { "vocab_size": 50277, "n_bytes": 1524839, "n_tokens": 973288, "n_chars": 655190 }, "grok_1.cc100-ko": { "vocab_size": 131072, "n_bytes": 1524839, "n_tokens": 1152005, "n_chars": 655190 }, "internlm2_chat_7b.cc100-ko": { "vocab_size": 92544, "n_bytes": 1524839, "n_tokens": 1008524, "n_chars": 655190 }, "internlm2_math_7b.cc100-ko": { "vocab_size": 92544, "n_bytes": 1524839, "n_tokens": 1008524, "n_chars": 655190 }, "internlm_chat_7b.cc100-ko": { "vocab_size": 103168, "n_bytes": 1524839, "n_tokens": 839609, "n_chars": 655190 }, "internlm_xcomposer_7b.cc100-ko": { "vocab_size": 103168, "n_bytes": 1524839, "n_tokens": 839609, "n_chars": 655190 }, "jamba_v0_1.cc100-ko": { "vocab_size": 65536, "n_bytes": 1524839, "n_tokens": 715688, "n_chars": 655190 }, "kplug.cc100-ko": { "vocab_size": 10261, "n_bytes": 1524839, "n_tokens": 222771, "n_chars": 655190 }, "llama.cc100-ko": { "vocab_size": 32000, "n_bytes": 1524839, "n_tokens": 964428, "n_chars": 655190 }, "llama2.cc100-ko": { "vocab_size": 32001, "n_bytes": 1524839, "n_tokens": 964428, "n_chars": 655190 }, "llama3.cc100-ko": { "vocab_size": 128256, "n_bytes": 1524839, "n_tokens": 412595, "n_chars": 655190 }, "llama_3_chinese_8b.cc100-ko": { "vocab_size": 128256, "n_bytes": 1524839, "n_tokens": 422595, "n_chars": 655190 }, "mistral_7b.cc100-ko": { "vocab_size": 32000, "n_bytes": 1524839, "n_tokens": 728766, "n_chars": 655190 }, "mixtral_8_7b.cc100-ko": { "vocab_size": 32000, "n_bytes": 1524839, "n_tokens": 728766, "n_chars": 655190 }, "mobilebert_uncased.cc100-ko": { "vocab_size": 30522, "n_bytes": 1524839, "n_tokens": 904756, "n_chars": 655190 }, "moss.cc100-ko": { "vocab_size": 106072, "n_bytes": 1524839, "n_tokens": 1305249, "n_chars": 655190 }, "mt5_large.cc100-ko": { "vocab_size": 250100, "n_bytes": 1524839, "n_tokens": 434586, "n_chars": 655190 }, "olmo_7b.cc100-ko": { "vocab_size": 50280, "n_bytes": 1524839, "n_tokens": 973288, "n_chars": 655190 }, "orion_14b_chat.cc100-ko": { "vocab_size": 84608, "n_bytes": 1524839, "n_tokens": 351149, "n_chars": 655190 }, "phi_1.cc100-ko": { "vocab_size": 50295, "n_bytes": 1524839, "n_tokens": 1308988, "n_chars": 655190 }, "phi_2.cc100-ko": { "vocab_size": 50295, "n_bytes": 1524839, "n_tokens": 1308988, "n_chars": 655190 }, "phi_3_mini.cc100-ko": { "vocab_size": 32011, "n_bytes": 1524839, "n_tokens": 964428, "n_chars": 655190 }, "pko_t5_large.cc100-ko": { "vocab_size": 50358, "n_bytes": 1524839, "n_tokens": 471643, "n_chars": 655190 }, "prompt_clue.cc100-ko": { "vocab_size": 32128, "n_bytes": 1524839, "n_tokens": 354411, "n_chars": 655190 }, "qwen1_5_14b_chat.cc100-ko": { "vocab_size": 151646, "n_bytes": 1524839, "n_tokens": 457492, "n_chars": 655190 }, "qwen_1_8b_chat.cc100-ko": { "vocab_size": 151851, "n_bytes": 1524839, "n_tokens": 457492, "n_chars": 655190 }, "qwen_72b_chat.cc100-ko": { "vocab_size": 151851, "n_bytes": 1524839, "n_tokens": 457492, "n_chars": 655190 }, "qwen_7b_chat.cc100-ko": { "vocab_size": 151851, "n_bytes": 1524839, "n_tokens": 457492, "n_chars": 655190 }, "roberta_chinese_clue.cc100-ko": { "vocab_size": 8021, "n_bytes": 1524839, "n_tokens": 226812, "n_chars": 655190 }, "skywork_13b_base.cc100-ko": { "vocab_size": 65519, "n_bytes": 1524839, "n_tokens": 962744, "n_chars": 655190 }, "skywork_13b_math.cc100-ko": { "vocab_size": 65519, "n_bytes": 1524839, "n_tokens": 962744, "n_chars": 655190 }, "solar_10_7b.cc100-ko": { "vocab_size": 32000, "n_bytes": 1524839, "n_tokens": 728766, "n_chars": 655190 }, "starchat_alpha.cc100-ko": { "vocab_size": 49156, "n_bytes": 1524839, "n_tokens": 580873, "n_chars": 655190 }, "switch_c_2048.cc100-ko": { "vocab_size": 32100, "n_bytes": 1524839, "n_tokens": 344457, "n_chars": 655190 }, "t5_base.cc100-ko": { "vocab_size": 32100, "n_bytes": 1524839, "n_tokens": 344457, "n_chars": 655190 }, "t5_large.cc100-ko": { "vocab_size": 32100, "n_bytes": 1524839, "n_tokens": 344457, "n_chars": 655190 }, "t5_small.cc100-ko": { "vocab_size": 32100, "n_bytes": 1524839, "n_tokens": 344457, "n_chars": 655190 }, "text_davinci_003.cc100-ko": { "vocab_size": 50281, "n_bytes": 1524839, "n_tokens": 1308993, "n_chars": 655190 }, "tigerbot_13b_chat_v2.cc100-ko": { "vocab_size": 60515, "n_bytes": 1524839, "n_tokens": 793053, "n_chars": 655190 }, "tigerbot_70b_chat_v4_4k.cc100-ko": { "vocab_size": 65110, "n_bytes": 1524839, "n_tokens": 484082, "n_chars": 655190 }, "wizardcoder_15b_v1.cc100-ko": { "vocab_size": 49153, "n_bytes": 1524839, "n_tokens": 580873, "n_chars": 655190 }, "wizardcoder_python_7b_v1.cc100-ko": { "vocab_size": 32001, "n_bytes": 1524839, "n_tokens": 964428, "n_chars": 655190 }, "wizardlm_7b_v1.cc100-ko": { "vocab_size": 32001, "n_bytes": 1524839, "n_tokens": 964428, "n_chars": 655190 }, "wizardmath_70b_v1.cc100-ko": { "vocab_size": 32002, "n_bytes": 1524839, "n_tokens": 964428, "n_chars": 655190 }, "xlm_roberta.cc100-ko": { "vocab_size": 250002, "n_bytes": 1524839, "n_tokens": 374571, "n_chars": 655190 }, "yi_34b.cc100-ko": { "vocab_size": 64000, "n_bytes": 1524839, "n_tokens": 1203134, "n_chars": 655190 }, "yi_6b.cc100-ko": { "vocab_size": 64000, "n_bytes": 1524839, "n_tokens": 1203134, "n_chars": 655190 }, "yi_vl34b.cc100-ko": { "vocab_size": 64000, "n_bytes": 1524839, "n_tokens": 1210021, "n_chars": 655190 }, "zephyr_7b_beta.cc100-ko": { "vocab_size": 32000, "n_bytes": 1524839, "n_tokens": 728766, "n_chars": 655190 }, "llama_3_chinese_8b.cc100-zh-Hans": { "vocab_size": 128256, "n_bytes": 2633047, "n_tokens": 757405, "n_chars": 927311 } }