{ "amber.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "aya_101.cc100-en": { "vocab_size": 250100, "n_bytes": 1124813, "n_tokens": 317881, "n_chars": 1121360 }, "baichuan.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 280108, "n_chars": 1121360 }, "baichuan2.cc100-en": { "vocab_size": 125696, "n_bytes": 1124813, "n_tokens": 269011, "n_chars": 1121360 }, "bert_base_cased.cc100-en": { "vocab_size": 28996, "n_bytes": 1124813, "n_tokens": 288022, "n_chars": 1121360 }, "bert_base_chinese.cc100-en": { "vocab_size": 21128, "n_bytes": 1124813, "n_tokens": 377068, "n_chars": 1121360 }, "bert_base_uncased.cc100-en": { "vocab_size": 30522, "n_bytes": 1124813, "n_tokens": 280575, "n_chars": 1121360 }, "bloom.cc100-en": { "vocab_size": 250680, "n_bytes": 1124813, "n_tokens": 257405, "n_chars": 1121360 }, "byt5_small.cc100-en": { "vocab_size": 384, "n_bytes": 1124813, "n_tokens": 1134813, "n_chars": 1121360 }, "character_glm_6b.cc100-en": { "vocab_size": 64789, "n_bytes": 1124813, "n_tokens": 289347, "n_chars": 1121360 }, "chatglm2_6b.cc100-en": { "vocab_size": 64787, "n_bytes": 1124813, "n_tokens": 289329, "n_chars": 1121360 }, "chatglm3_6b.cc100-en": { "vocab_size": 64796, "n_bytes": 1124813, "n_tokens": 289347, "n_chars": 1121360 }, "chatglm_6b.cc100-en": { "vocab_size": 150344, "n_bytes": 1124813, "n_tokens": 284761, "n_chars": 1121360 }, "chatyuan_large_v2.cc100-en": { "vocab_size": 32128, "n_bytes": 1124813, "n_tokens": 536033, "n_chars": 1121360 }, "chinese_llama.cc100-en": { "vocab_size": 49953, "n_bytes": 1124813, "n_tokens": 291514, "n_chars": 1121360 }, "chinese_llama2.cc100-en": { "vocab_size": 55296, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "code_davinci_002.cc100-en": { "vocab_size": 50281, "n_bytes": 1124813, "n_tokens": 258403, "n_chars": 1121360 }, "crystal_coder.cc100-en": { "vocab_size": 32022, "n_bytes": 1124813, "n_tokens": 284627, "n_chars": 1121360 }, "dbrx_instruct.cc100-en": { "vocab_size": 100280, "n_bytes": 1124813, "n_tokens": 254985, "n_chars": 1121360 }, "deepseek_coder_33b_instruct.cc100-en": { "vocab_size": 32022, "n_bytes": 1124813, "n_tokens": 287408, "n_chars": 1121360 }, "deepseek_llm_7b_base.cc100-en": { "vocab_size": 100015, "n_bytes": 1124813, "n_tokens": 272324, "n_chars": 1121360 }, "falcon_180b.cc100-en": { "vocab_size": 65024, "n_bytes": 1124813, "n_tokens": 262509, "n_chars": 1121360 }, "falcon_7b.cc100-en": { "vocab_size": 65024, "n_bytes": 1124813, "n_tokens": 262509, "n_chars": 1121360 }, "fastchat_t5_3b.cc100-en": { "vocab_size": 32110, "n_bytes": 1124813, "n_tokens": 484941, "n_chars": 1121360 }, "flan_t5_base.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "gemma_7b.cc100-en": { "vocab_size": 256000, "n_bytes": 1124813, "n_tokens": 268010, "n_chars": 1121360 }, "gpt2.cc100-en": { "vocab_size": 50257, "n_bytes": 1124813, "n_tokens": 258428, "n_chars": 1121360 }, "gpt2_chinese.cc100-en": { "vocab_size": 21128, "n_bytes": 1124813, "n_tokens": 392641, "n_chars": 1121360 }, "gpt_35_turbo.cc100-en": { "vocab_size": 100277, "n_bytes": 1124813, "n_tokens": 254985, "n_chars": 1121360 }, "gpt_4.cc100-en": { "vocab_size": 100277, "n_bytes": 1124813, "n_tokens": 254985, "n_chars": 1121360 }, "gpt_nexo_20b.cc100-en": { "vocab_size": 50277, "n_bytes": 1124813, "n_tokens": 259357, "n_chars": 1121360 }, "grok_1.cc100-en": { "vocab_size": 131072, "n_bytes": 1124813, "n_tokens": 258048, "n_chars": 1121360 }, "internlm2_chat_7b.cc100-en": { "vocab_size": 92544, "n_bytes": 1124813, "n_tokens": 271583, "n_chars": 1121360 }, "internlm2_math_7b.cc100-en": { "vocab_size": 92544, "n_bytes": 1124813, "n_tokens": 271583, "n_chars": 1121360 }, "internlm_chat_7b.cc100-en": { "vocab_size": 103168, "n_bytes": 1124813, "n_tokens": 271293, "n_chars": 1121360 }, "internlm_xcomposer_7b.cc100-en": { "vocab_size": 103168, "n_bytes": 1124813, "n_tokens": 271293, "n_chars": 1121360 }, "jamba_v0_1.cc100-en": { "vocab_size": 65536, "n_bytes": 1124813, "n_tokens": 274242, "n_chars": 1121360 }, "kplug.cc100-en": { "vocab_size": 10261, "n_bytes": 1124813, "n_tokens": 393564, "n_chars": 1121360 }, "llama.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "llama2.cc100-en": { "vocab_size": 32001, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "llama3.cc100-en": { "vocab_size": 128256, "n_bytes": 1124813, "n_tokens": 254944, "n_chars": 1121360 }, "mistral_7b.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "mixtral_8_7b.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "mobilebert_uncased.cc100-en": { "vocab_size": 30522, "n_bytes": 1124813, "n_tokens": 280575, "n_chars": 1121360 }, "moss.cc100-en": { "vocab_size": 106072, "n_bytes": 1124813, "n_tokens": 257070, "n_chars": 1121360 }, "mt5_large.cc100-en": { "vocab_size": 250100, "n_bytes": 1124813, "n_tokens": 317881, "n_chars": 1121360 }, "olmo_7b.cc100-en": { "vocab_size": 50280, "n_bytes": 1124813, "n_tokens": 259357, "n_chars": 1121360 }, "orion_14b_chat.cc100-en": { "vocab_size": 84608, "n_bytes": 1124813, "n_tokens": 265948, "n_chars": 1121360 }, "phi_1.cc100-en": { "vocab_size": 50295, "n_bytes": 1124813, "n_tokens": 258409, "n_chars": 1121360 }, "phi_2.cc100-en": { "vocab_size": 50295, "n_bytes": 1124813, "n_tokens": 258409, "n_chars": 1121360 }, "phi_3_mini.cc100-en": { "vocab_size": 32011, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "pko_t5_large.cc100-en": { "vocab_size": 50358, "n_bytes": 1124813, "n_tokens": 658985, "n_chars": 1121360 }, "prompt_clue.cc100-en": { "vocab_size": 32128, "n_bytes": 1124813, "n_tokens": 536033, "n_chars": 1121360 }, "qwen1_5_14b_chat.cc100-en": { "vocab_size": 151646, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "qwen_1_8b_chat.cc100-en": { "vocab_size": 151851, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "qwen_72b_chat.cc100-en": { "vocab_size": 151851, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "qwen_7b_chat.cc100-en": { "vocab_size": 151851, "n_bytes": 1124813, "n_tokens": 257983, "n_chars": 1121360 }, "roberta_chinese_clue.cc100-en": { "vocab_size": 8021, "n_bytes": 1124813, "n_tokens": 583058, "n_chars": 1121360 }, "skywork_13b_base.cc100-en": { "vocab_size": 65519, "n_bytes": 1124813, "n_tokens": 294617, "n_chars": 1121360 }, "skywork_13b_math.cc100-en": { "vocab_size": 65519, "n_bytes": 1124813, "n_tokens": 294617, "n_chars": 1121360 }, "solar_10_7b.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "starchat_alpha.cc100-en": { "vocab_size": 49156, "n_bytes": 1124813, "n_tokens": 288965, "n_chars": 1121360 }, "switch_c_2048.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "t5_base.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "t5_large.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "t5_small.cc100-en": { "vocab_size": 32100, "n_bytes": 1124813, "n_tokens": 290104, "n_chars": 1121360 }, "text_davinci_003.cc100-en": { "vocab_size": 50281, "n_bytes": 1124813, "n_tokens": 258403, "n_chars": 1121360 }, "tigerbot_13b_chat_v2.cc100-en": { "vocab_size": 60515, "n_bytes": 1124813, "n_tokens": 285652, "n_chars": 1121360 }, "tigerbot_70b_chat_v4_4k.cc100-en": { "vocab_size": 65110, "n_bytes": 1124813, "n_tokens": 286946, "n_chars": 1121360 }, "wizardcoder_15b_v1.cc100-en": { "vocab_size": 49153, "n_bytes": 1124813, "n_tokens": 288965, "n_chars": 1121360 }, "wizardcoder_python_7b_v1.cc100-en": { "vocab_size": 32001, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "wizardlm_7b_v1.cc100-en": { "vocab_size": 32001, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "wizardmath_70b_v1.cc100-en": { "vocab_size": 32002, "n_bytes": 1124813, "n_tokens": 294627, "n_chars": 1121360 }, "xlm_roberta.cc100-en": { "vocab_size": 250002, "n_bytes": 1124813, "n_tokens": 300026, "n_chars": 1121360 }, "yi_34b.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 270400, "n_chars": 1121360 }, "yi_6b.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 270400, "n_chars": 1121360 }, "yi_vl34b.cc100-en": { "vocab_size": 64000, "n_bytes": 1124813, "n_tokens": 269738, "n_chars": 1121360 }, "zephyr_7b_beta.cc100-en": { "vocab_size": 32000, "n_bytes": 1124813, "n_tokens": 285801, "n_chars": 1121360 }, "amber.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "aya_101.cc100-zh-Hans": { "vocab_size": 250100, "n_bytes": 2633047, "n_tokens": 631182, "n_chars": 927311 }, "baichuan.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 626117, "n_chars": 927311 }, "baichuan2.cc100-zh-Hans": { "vocab_size": 125696, "n_bytes": 2633047, "n_tokens": 541464, "n_chars": 927311 }, "bert_base_cased.cc100-zh-Hans": { "vocab_size": 28996, "n_bytes": 2633047, "n_tokens": 899709, "n_chars": 927311 }, "bert_base_chinese.cc100-zh-Hans": { "vocab_size": 21128, "n_bytes": 2633047, "n_tokens": 896599, "n_chars": 927311 }, "bert_base_uncased.cc100-zh-Hans": { "vocab_size": 30522, "n_bytes": 2633047, "n_tokens": 898554, "n_chars": 927311 }, "bloom.cc100-zh-Hans": { "vocab_size": 250680, "n_bytes": 2633047, "n_tokens": 573008, "n_chars": 927311 }, "byt5_small.cc100-zh-Hans": { "vocab_size": 384, "n_bytes": 2633047, "n_tokens": 2643047, "n_chars": 927311 }, "character_glm_6b.cc100-zh-Hans": { "vocab_size": 64789, "n_bytes": 2633047, "n_tokens": 583646, "n_chars": 927311 }, "chatglm2_6b.cc100-zh-Hans": { "vocab_size": 64787, "n_bytes": 2633047, "n_tokens": 583646, "n_chars": 927311 }, "chatglm3_6b.cc100-zh-Hans": { "vocab_size": 64796, "n_bytes": 2633047, "n_tokens": 583646, "n_chars": 927311 }, "chatglm_6b.cc100-zh-Hans": { "vocab_size": 150344, "n_bytes": 2633047, "n_tokens": 527384, "n_chars": 927311 }, "chatyuan_large_v2.cc100-zh-Hans": { "vocab_size": 32128, "n_bytes": 2633047, "n_tokens": 564905, "n_chars": 927311 }, "chinese_llama.cc100-zh-Hans": { "vocab_size": 49953, "n_bytes": 2633047, "n_tokens": 623219, "n_chars": 927311 }, "chinese_llama2.cc100-zh-Hans": { "vocab_size": 55296, "n_bytes": 2633047, "n_tokens": 625766, "n_chars": 927311 }, "code_davinci_002.cc100-zh-Hans": { "vocab_size": 50281, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "crystal_coder.cc100-zh-Hans": { "vocab_size": 32022, "n_bytes": 2633047, "n_tokens": 1320093, "n_chars": 927311 }, "dbrx_instruct.cc100-zh-Hans": { "vocab_size": 100280, "n_bytes": 2633047, "n_tokens": 1084939, "n_chars": 927311 }, "deepseek_coder_33b_instruct.cc100-zh-Hans": { "vocab_size": 32022, "n_bytes": 2633047, "n_tokens": 720577, "n_chars": 927311 }, "deepseek_llm_7b_base.cc100-zh-Hans": { "vocab_size": 100015, "n_bytes": 2633047, "n_tokens": 605081, "n_chars": 927311 }, "falcon_180b.cc100-zh-Hans": { "vocab_size": 65024, "n_bytes": 2633047, "n_tokens": 1124681, "n_chars": 927311 }, "falcon_7b.cc100-zh-Hans": { "vocab_size": 65024, "n_bytes": 2633047, "n_tokens": 1124681, "n_chars": 927311 }, "fastchat_t5_3b.cc100-zh-Hans": { "vocab_size": 32110, "n_bytes": 2633047, "n_tokens": 178974, "n_chars": 927311 }, "flan_t5_base.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173520, "n_chars": 927311 }, "gemma_7b.cc100-zh-Hans": { "vocab_size": 256000, "n_bytes": 2633047, "n_tokens": 641795, "n_chars": 927311 }, "gpt2.cc100-zh-Hans": { "vocab_size": 50257, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "gpt2_chinese.cc100-zh-Hans": { "vocab_size": 21128, "n_bytes": 2633047, "n_tokens": 899506, "n_chars": 927311 }, "gpt_35_turbo.cc100-zh-Hans": { "vocab_size": 100277, "n_bytes": 2633047, "n_tokens": 1084939, "n_chars": 927311 }, "gpt_4.cc100-zh-Hans": { "vocab_size": 100277, "n_bytes": 2633047, "n_tokens": 1084939, "n_chars": 927311 }, "gpt_nexo_20b.cc100-zh-Hans": { "vocab_size": 50277, "n_bytes": 2633047, "n_tokens": 1220529, "n_chars": 927311 }, "grok_1.cc100-zh-Hans": { "vocab_size": 131072, "n_bytes": 2633047, "n_tokens": 1414508, "n_chars": 927311 }, "internlm2_chat_7b.cc100-zh-Hans": { "vocab_size": 92544, "n_bytes": 2633047, "n_tokens": 579976, "n_chars": 927311 }, "internlm2_math_7b.cc100-zh-Hans": { "vocab_size": 92544, "n_bytes": 2633047, "n_tokens": 579976, "n_chars": 927311 }, "internlm_chat_7b.cc100-zh-Hans": { "vocab_size": 103168, "n_bytes": 2633047, "n_tokens": 579109, "n_chars": 927311 }, "internlm_xcomposer_7b.cc100-zh-Hans": { "vocab_size": 103168, "n_bytes": 2633047, "n_tokens": 579109, "n_chars": 927311 }, "jamba_v0_1.cc100-zh-Hans": { "vocab_size": 65536, "n_bytes": 2633047, "n_tokens": 1067054, "n_chars": 927311 }, "kplug.cc100-zh-Hans": { "vocab_size": 10261, "n_bytes": 2633047, "n_tokens": 902451, "n_chars": 927311 }, "llama.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "llama2.cc100-zh-Hans": { "vocab_size": 32001, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "llama3.cc100-zh-Hans": { "vocab_size": 128256, "n_bytes": 2633047, "n_tokens": 747405, "n_chars": 927311 }, "mistral_7b.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "mixtral_8_7b.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "mobilebert_uncased.cc100-zh-Hans": { "vocab_size": 30522, "n_bytes": 2633047, "n_tokens": 898554, "n_chars": 927311 }, "moss.cc100-zh-Hans": { "vocab_size": 106072, "n_bytes": 2633047, "n_tokens": 557455, "n_chars": 927311 }, "mt5_large.cc100-zh-Hans": { "vocab_size": 250100, "n_bytes": 2633047, "n_tokens": 631182, "n_chars": 927311 }, "olmo_7b.cc100-zh-Hans": { "vocab_size": 50280, "n_bytes": 2633047, "n_tokens": 1220529, "n_chars": 927311 }, "orion_14b_chat.cc100-zh-Hans": { "vocab_size": 84608, "n_bytes": 2633047, "n_tokens": 529926, "n_chars": 927311 }, "phi_1.cc100-zh-Hans": { "vocab_size": 50295, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "phi_2.cc100-zh-Hans": { "vocab_size": 50295, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "phi_3_mini.cc100-zh-Hans": { "vocab_size": 32011, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "pko_t5_large.cc100-zh-Hans": { "vocab_size": 50358, "n_bytes": 2633047, "n_tokens": 2533519, "n_chars": 927311 }, "prompt_clue.cc100-zh-Hans": { "vocab_size": 32128, "n_bytes": 2633047, "n_tokens": 564905, "n_chars": 927311 }, "qwen1_5_14b_chat.cc100-zh-Hans": { "vocab_size": 151646, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "qwen_1_8b_chat.cc100-zh-Hans": { "vocab_size": 151851, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "qwen_72b_chat.cc100-zh-Hans": { "vocab_size": 151851, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "qwen_7b_chat.cc100-zh-Hans": { "vocab_size": 151851, "n_bytes": 2633047, "n_tokens": 589211, "n_chars": 927311 }, "roberta_chinese_clue.cc100-zh-Hans": { "vocab_size": 8021, "n_bytes": 2633047, "n_tokens": 907144, "n_chars": 927311 }, "skywork_13b_base.cc100-zh-Hans": { "vocab_size": 65519, "n_bytes": 2633047, "n_tokens": 663923, "n_chars": 927311 }, "skywork_13b_math.cc100-zh-Hans": { "vocab_size": 65519, "n_bytes": 2633047, "n_tokens": 663923, "n_chars": 927311 }, "solar_10_7b.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "starchat_alpha.cc100-zh-Hans": { "vocab_size": 49156, "n_bytes": 2633047, "n_tokens": 882018, "n_chars": 927311 }, "switch_c_2048.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "t5_base.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "t5_large.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "t5_small.cc100-zh-Hans": { "vocab_size": 32100, "n_bytes": 2633047, "n_tokens": 173519, "n_chars": 927311 }, "text_davinci_003.cc100-zh-Hans": { "vocab_size": 50281, "n_bytes": 2633047, "n_tokens": 1876809, "n_chars": 927311 }, "tigerbot_13b_chat_v2.cc100-zh-Hans": { "vocab_size": 60515, "n_bytes": 2633047, "n_tokens": 577385, "n_chars": 927311 }, "tigerbot_70b_chat_v4_4k.cc100-zh-Hans": { "vocab_size": 65110, "n_bytes": 2633047, "n_tokens": 577211, "n_chars": 927311 }, "wizardcoder_15b_v1.cc100-zh-Hans": { "vocab_size": 49153, "n_bytes": 2633047, "n_tokens": 882018, "n_chars": 927311 }, "wizardcoder_python_7b_v1.cc100-zh-Hans": { "vocab_size": 32001, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "wizardlm_7b_v1.cc100-zh-Hans": { "vocab_size": 32001, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "wizardmath_70b_v1.cc100-zh-Hans": { "vocab_size": 32002, "n_bytes": 2633047, "n_tokens": 1330093, "n_chars": 927311 }, "xlm_roberta.cc100-zh-Hans": { "vocab_size": 250002, "n_bytes": 2633047, "n_tokens": 619844, "n_chars": 927311 }, "yi_34b.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 588729, "n_chars": 927311 }, "yi_6b.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 588729, "n_chars": 927311 }, "yi_vl34b.cc100-zh-Hans": { "vocab_size": 64000, "n_bytes": 2633047, "n_tokens": 596166, "n_chars": 927311 }, "zephyr_7b_beta.cc100-zh-Hans": { "vocab_size": 32000, "n_bytes": 2633047, "n_tokens": 1041023, "n_chars": 927311 }, "amber.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "aya_101.cc100-es": { "vocab_size": 250100, "n_bytes": 1664455, "n_tokens": 472231, "n_chars": 1630297 }, "baichuan.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 585804, "n_chars": 1630297 }, "baichuan2.cc100-es": { "vocab_size": 125696, "n_bytes": 1664455, "n_tokens": 551326, "n_chars": 1630297 }, "bert_base_cased.cc100-es": { "vocab_size": 28996, "n_bytes": 1664455, "n_tokens": 630231, "n_chars": 1630297 }, "bert_base_chinese.cc100-es": { "vocab_size": 21128, "n_bytes": 1664455, "n_tokens": 609419, "n_chars": 1630297 }, "bert_base_uncased.cc100-es": { "vocab_size": 30522, "n_bytes": 1664455, "n_tokens": 558042, "n_chars": 1630297 }, "bloom.cc100-es": { "vocab_size": 250680, "n_bytes": 1664455, "n_tokens": 350793, "n_chars": 1630297 }, "byt5_small.cc100-es": { "vocab_size": 384, "n_bytes": 1664455, "n_tokens": 1674455, "n_chars": 1630297 }, "character_glm_6b.cc100-es": { "vocab_size": 64789, "n_bytes": 1664455, "n_tokens": 566501, "n_chars": 1630297 }, "chatglm2_6b.cc100-es": { "vocab_size": 64787, "n_bytes": 1664455, "n_tokens": 566476, "n_chars": 1630297 }, "chatglm3_6b.cc100-es": { "vocab_size": 64796, "n_bytes": 1664455, "n_tokens": 566501, "n_chars": 1630297 }, "chatglm_6b.cc100-es": { "vocab_size": 150344, "n_bytes": 1664455, "n_tokens": 514848, "n_chars": 1630297 }, "chatyuan_large_v2.cc100-es": { "vocab_size": 32128, "n_bytes": 1664455, "n_tokens": 889530, "n_chars": 1630297 }, "chinese_llama.cc100-es": { "vocab_size": 49953, "n_bytes": 1664455, "n_tokens": 486672, "n_chars": 1630297 }, "chinese_llama2.cc100-es": { "vocab_size": 55296, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "code_davinci_002.cc100-es": { "vocab_size": 50281, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "crystal_coder.cc100-es": { "vocab_size": 32022, "n_bytes": 1664455, "n_tokens": 482235, "n_chars": 1630297 }, "dbrx_instruct.cc100-es": { "vocab_size": 100280, "n_bytes": 1664455, "n_tokens": 433875, "n_chars": 1630297 }, "deepseek_coder_33b_instruct.cc100-es": { "vocab_size": 32022, "n_bytes": 1664455, "n_tokens": 523884, "n_chars": 1630297 }, "deepseek_llm_7b_base.cc100-es": { "vocab_size": 100015, "n_bytes": 1664455, "n_tokens": 480877, "n_chars": 1630297 }, "falcon_180b.cc100-es": { "vocab_size": 65024, "n_bytes": 1664455, "n_tokens": 442138, "n_chars": 1630297 }, "falcon_7b.cc100-es": { "vocab_size": 65024, "n_bytes": 1664455, "n_tokens": 442138, "n_chars": 1630297 }, "fastchat_t5_3b.cc100-es": { "vocab_size": 32110, "n_bytes": 1664455, "n_tokens": 970105, "n_chars": 1630297 }, "flan_t5_base.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706405, "n_chars": 1630297 }, "gemma_7b.cc100-es": { "vocab_size": 256000, "n_bytes": 1664455, "n_tokens": 371321, "n_chars": 1630297 }, "gpt2.cc100-es": { "vocab_size": 50257, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "gpt2_chinese.cc100-es": { "vocab_size": 21128, "n_bytes": 1664455, "n_tokens": 703390, "n_chars": 1630297 }, "gpt_35_turbo.cc100-es": { "vocab_size": 100277, "n_bytes": 1664455, "n_tokens": 433875, "n_chars": 1630297 }, "gpt_4.cc100-es": { "vocab_size": 100277, "n_bytes": 1664455, "n_tokens": 433875, "n_chars": 1630297 }, "gpt_nexo_20b.cc100-es": { "vocab_size": 50277, "n_bytes": 1664455, "n_tokens": 494577, "n_chars": 1630297 }, "grok_1.cc100-es": { "vocab_size": 131072, "n_bytes": 1664455, "n_tokens": 449392, "n_chars": 1630297 }, "internlm2_chat_7b.cc100-es": { "vocab_size": 92544, "n_bytes": 1664455, "n_tokens": 518871, "n_chars": 1630297 }, "internlm2_math_7b.cc100-es": { "vocab_size": 92544, "n_bytes": 1664455, "n_tokens": 518871, "n_chars": 1630297 }, "internlm_chat_7b.cc100-es": { "vocab_size": 103168, "n_bytes": 1664455, "n_tokens": 516572, "n_chars": 1630297 }, "internlm_xcomposer_7b.cc100-es": { "vocab_size": 103168, "n_bytes": 1664455, "n_tokens": 516572, "n_chars": 1630297 }, "jamba_v0_1.cc100-es": { "vocab_size": 65536, "n_bytes": 1664455, "n_tokens": 420883, "n_chars": 1630297 }, "kplug.cc100-es": { "vocab_size": 10261, "n_bytes": 1664455, "n_tokens": 704804, "n_chars": 1630297 }, "llama.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "llama2.cc100-es": { "vocab_size": 32001, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "llama3.cc100-es": { "vocab_size": 128256, "n_bytes": 1664455, "n_tokens": 433289, "n_chars": 1630297 }, "mistral_7b.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "mixtral_8_7b.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "mobilebert_uncased.cc100-es": { "vocab_size": 30522, "n_bytes": 1664455, "n_tokens": 558042, "n_chars": 1630297 }, "moss.cc100-es": { "vocab_size": 106072, "n_bytes": 1664455, "n_tokens": 568539, "n_chars": 1630297 }, "mt5_large.cc100-es": { "vocab_size": 250100, "n_bytes": 1664455, "n_tokens": 472231, "n_chars": 1630297 }, "olmo_7b.cc100-es": { "vocab_size": 50280, "n_bytes": 1664455, "n_tokens": 494577, "n_chars": 1630297 }, "orion_14b_chat.cc100-es": { "vocab_size": 84608, "n_bytes": 1664455, "n_tokens": 628571, "n_chars": 1630297 }, "phi_1.cc100-es": { "vocab_size": 50295, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "phi_2.cc100-es": { "vocab_size": 50295, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "phi_3_mini.cc100-es": { "vocab_size": 32011, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "pko_t5_large.cc100-es": { "vocab_size": 50358, "n_bytes": 1664455, "n_tokens": 1134056, "n_chars": 1630297 }, "prompt_clue.cc100-es": { "vocab_size": 32128, "n_bytes": 1664455, "n_tokens": 889530, "n_chars": 1630297 }, "qwen1_5_14b_chat.cc100-es": { "vocab_size": 151646, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "qwen_1_8b_chat.cc100-es": { "vocab_size": 151851, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "qwen_72b_chat.cc100-es": { "vocab_size": 151851, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "qwen_7b_chat.cc100-es": { "vocab_size": 151851, "n_bytes": 1664455, "n_tokens": 434264, "n_chars": 1630297 }, "roberta_chinese_clue.cc100-es": { "vocab_size": 8021, "n_bytes": 1664455, "n_tokens": 866564, "n_chars": 1630297 }, "skywork_13b_base.cc100-es": { "vocab_size": 65519, "n_bytes": 1664455, "n_tokens": 492211, "n_chars": 1630297 }, "skywork_13b_math.cc100-es": { "vocab_size": 65519, "n_bytes": 1664455, "n_tokens": 492211, "n_chars": 1630297 }, "solar_10_7b.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "starchat_alpha.cc100-es": { "vocab_size": 49156, "n_bytes": 1664455, "n_tokens": 530592, "n_chars": 1630297 }, "switch_c_2048.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "t5_base.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "t5_large.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "t5_small.cc100-es": { "vocab_size": 32100, "n_bytes": 1664455, "n_tokens": 706400, "n_chars": 1630297 }, "text_davinci_003.cc100-es": { "vocab_size": 50281, "n_bytes": 1664455, "n_tokens": 569853, "n_chars": 1630297 }, "tigerbot_13b_chat_v2.cc100-es": { "vocab_size": 60515, "n_bytes": 1664455, "n_tokens": 482553, "n_chars": 1630297 }, "tigerbot_70b_chat_v4_4k.cc100-es": { "vocab_size": 65110, "n_bytes": 1664455, "n_tokens": 484099, "n_chars": 1630297 }, "wizardcoder_15b_v1.cc100-es": { "vocab_size": 49153, "n_bytes": 1664455, "n_tokens": 530592, "n_chars": 1630297 }, "wizardcoder_python_7b_v1.cc100-es": { "vocab_size": 32001, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "wizardlm_7b_v1.cc100-es": { "vocab_size": 32001, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "wizardmath_70b_v1.cc100-es": { "vocab_size": 32002, "n_bytes": 1664455, "n_tokens": 492235, "n_chars": 1630297 }, "xlm_roberta.cc100-es": { "vocab_size": 250002, "n_bytes": 1664455, "n_tokens": 399850, "n_chars": 1630297 }, "yi_34b.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 577018, "n_chars": 1630297 }, "yi_6b.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 577018, "n_chars": 1630297 }, "yi_vl34b.cc100-es": { "vocab_size": 64000, "n_bytes": 1664455, "n_tokens": 576794, "n_chars": 1630297 }, "zephyr_7b_beta.cc100-es": { "vocab_size": 32000, "n_bytes": 1664455, "n_tokens": 513915, "n_chars": 1630297 }, "aya_101.cc100-fr": { "vocab_size": 250100, "n_bytes": 1540504, "n_tokens": 470944, "n_chars": 1484970 }, "baichuan.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 540430, "n_chars": 1484970 }, "baichuan2.cc100-fr": { "vocab_size": 125696, "n_bytes": 1540504, "n_tokens": 512313, "n_chars": 1484970 }, "bert_base_cased.cc100-fr": { "vocab_size": 28996, "n_bytes": 1540504, "n_tokens": 583210, "n_chars": 1484970 }, "bert_base_chinese.cc100-fr": { "vocab_size": 21128, "n_bytes": 1540504, "n_tokens": 553134, "n_chars": 1484970 }, "bert_base_uncased.cc100-fr": { "vocab_size": 30522, "n_bytes": 1540504, "n_tokens": 504075, "n_chars": 1484970 }, "bloom.cc100-fr": { "vocab_size": 250680, "n_bytes": 1540504, "n_tokens": 321639, "n_chars": 1484970 }, "byt5_small.cc100-fr": { "vocab_size": 384, "n_bytes": 1540504, "n_tokens": 1550504, "n_chars": 1484970 }, "character_glm_6b.cc100-fr": { "vocab_size": 64789, "n_bytes": 1540504, "n_tokens": 515052, "n_chars": 1484970 }, "chatglm2_6b.cc100-fr": { "vocab_size": 64787, "n_bytes": 1540504, "n_tokens": 515028, "n_chars": 1484970 }, "chatglm3_6b.cc100-fr": { "vocab_size": 64796, "n_bytes": 1540504, "n_tokens": 515052, "n_chars": 1484970 }, "chatglm_6b.cc100-fr": { "vocab_size": 150344, "n_bytes": 1540504, "n_tokens": 499261, "n_chars": 1484970 }, "chatyuan_large_v2.cc100-fr": { "vocab_size": 32128, "n_bytes": 1540504, "n_tokens": 822012, "n_chars": 1484970 }, "chinese_llama.cc100-fr": { "vocab_size": 49953, "n_bytes": 1540504, "n_tokens": 450352, "n_chars": 1484970 }, "chinese_llama2.cc100-fr": { "vocab_size": 55296, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "code_davinci_002.cc100-fr": { "vocab_size": 50281, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "crystal_coder.cc100-fr": { "vocab_size": 32022, "n_bytes": 1540504, "n_tokens": 447243, "n_chars": 1484970 }, "dbrx_instruct.cc100-fr": { "vocab_size": 100280, "n_bytes": 1540504, "n_tokens": 412685, "n_chars": 1484970 }, "deepseek_coder_33b_instruct.cc100-fr": { "vocab_size": 32022, "n_bytes": 1540504, "n_tokens": 537538, "n_chars": 1484970 }, "deepseek_llm_7b_base.cc100-fr": { "vocab_size": 100015, "n_bytes": 1540504, "n_tokens": 507693, "n_chars": 1484970 }, "falcon_180b.cc100-fr": { "vocab_size": 65024, "n_bytes": 1540504, "n_tokens": 407853, "n_chars": 1484970 }, "falcon_7b.cc100-fr": { "vocab_size": 65024, "n_bytes": 1540504, "n_tokens": 407853, "n_chars": 1484970 }, "fastchat_t5_3b.cc100-fr": { "vocab_size": 32110, "n_bytes": 1540504, "n_tokens": 717675, "n_chars": 1484970 }, "flan_t5_base.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476135, "n_chars": 1484970 }, "gemma_7b.cc100-fr": { "vocab_size": 256000, "n_bytes": 1540504, "n_tokens": 374551, "n_chars": 1484970 }, "gpt2.cc100-fr": { "vocab_size": 50257, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "gpt2_chinese.cc100-fr": { "vocab_size": 21128, "n_bytes": 1540504, "n_tokens": 636442, "n_chars": 1484970 }, "gpt_35_turbo.cc100-fr": { "vocab_size": 100277, "n_bytes": 1540504, "n_tokens": 412685, "n_chars": 1484970 }, "gpt_4.cc100-fr": { "vocab_size": 100277, "n_bytes": 1540504, "n_tokens": 412685, "n_chars": 1484970 }, "gpt_nexo_20b.cc100-fr": { "vocab_size": 50277, "n_bytes": 1540504, "n_tokens": 458961, "n_chars": 1484970 }, "grok_1.cc100-fr": { "vocab_size": 131072, "n_bytes": 1540504, "n_tokens": 428298, "n_chars": 1484970 }, "internlm2_chat_7b.cc100-fr": { "vocab_size": 92544, "n_bytes": 1540504, "n_tokens": 496629, "n_chars": 1484970 }, "internlm2_math_7b.cc100-fr": { "vocab_size": 92544, "n_bytes": 1540504, "n_tokens": 496629, "n_chars": 1484970 }, "internlm_chat_7b.cc100-fr": { "vocab_size": 103168, "n_bytes": 1540504, "n_tokens": 495045, "n_chars": 1484970 }, "internlm_xcomposer_7b.cc100-fr": { "vocab_size": 103168, "n_bytes": 1540504, "n_tokens": 495045, "n_chars": 1484970 }, "jamba_v0_1.cc100-fr": { "vocab_size": 65536, "n_bytes": 1540504, "n_tokens": 412899, "n_chars": 1484970 }, "kplug.cc100-fr": { "vocab_size": 10261, "n_bytes": 1540504, "n_tokens": 638107, "n_chars": 1484970 }, "llama.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "llama2.cc100-fr": { "vocab_size": 32001, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "llama3.cc100-fr": { "vocab_size": 128256, "n_bytes": 1540504, "n_tokens": 412146, "n_chars": 1484970 }, "mistral_7b.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 }, "mixtral_8_7b.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 }, "mobilebert_uncased.cc100-fr": { "vocab_size": 30522, "n_bytes": 1540504, "n_tokens": 504075, "n_chars": 1484970 }, "moss.cc100-fr": { "vocab_size": 106072, "n_bytes": 1540504, "n_tokens": 515669, "n_chars": 1484970 }, "mt5_large.cc100-fr": { "vocab_size": 250100, "n_bytes": 1540504, "n_tokens": 470944, "n_chars": 1484970 }, "olmo_7b.cc100-fr": { "vocab_size": 50280, "n_bytes": 1540504, "n_tokens": 458961, "n_chars": 1484970 }, "orion_14b_chat.cc100-fr": { "vocab_size": 84608, "n_bytes": 1540504, "n_tokens": 564107, "n_chars": 1484970 }, "phi_1.cc100-fr": { "vocab_size": 50295, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "phi_2.cc100-fr": { "vocab_size": 50295, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "phi_3_mini.cc100-fr": { "vocab_size": 32011, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "pko_t5_large.cc100-fr": { "vocab_size": 50358, "n_bytes": 1540504, "n_tokens": 1044665, "n_chars": 1484970 }, "prompt_clue.cc100-fr": { "vocab_size": 32128, "n_bytes": 1540504, "n_tokens": 822012, "n_chars": 1484970 }, "qwen1_5_14b_chat.cc100-fr": { "vocab_size": 151646, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "qwen_1_8b_chat.cc100-fr": { "vocab_size": 151851, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "qwen_72b_chat.cc100-fr": { "vocab_size": 151851, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "qwen_7b_chat.cc100-fr": { "vocab_size": 151851, "n_bytes": 1540504, "n_tokens": 413637, "n_chars": 1484970 }, "roberta_chinese_clue.cc100-fr": { "vocab_size": 8021, "n_bytes": 1540504, "n_tokens": 787363, "n_chars": 1484970 }, "skywork_13b_base.cc100-fr": { "vocab_size": 65519, "n_bytes": 1540504, "n_tokens": 457233, "n_chars": 1484970 }, "skywork_13b_math.cc100-fr": { "vocab_size": 65519, "n_bytes": 1540504, "n_tokens": 457233, "n_chars": 1484970 }, "solar_10_7b.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 }, "starchat_alpha.cc100-fr": { "vocab_size": 49156, "n_bytes": 1540504, "n_tokens": 509958, "n_chars": 1484970 }, "switch_c_2048.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "t5_base.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "t5_large.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "t5_small.cc100-fr": { "vocab_size": 32100, "n_bytes": 1540504, "n_tokens": 476133, "n_chars": 1484970 }, "text_davinci_003.cc100-fr": { "vocab_size": 50281, "n_bytes": 1540504, "n_tokens": 521776, "n_chars": 1484970 }, "tigerbot_13b_chat_v2.cc100-fr": { "vocab_size": 60515, "n_bytes": 1540504, "n_tokens": 447372, "n_chars": 1484970 }, "tigerbot_70b_chat_v4_4k.cc100-fr": { "vocab_size": 65110, "n_bytes": 1540504, "n_tokens": 448567, "n_chars": 1484970 }, "wizardcoder_15b_v1.cc100-fr": { "vocab_size": 49153, "n_bytes": 1540504, "n_tokens": 509958, "n_chars": 1484970 }, "wizardcoder_python_7b_v1.cc100-fr": { "vocab_size": 32001, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "wizardlm_7b_v1.cc100-fr": { "vocab_size": 32001, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "wizardmath_70b_v1.cc100-fr": { "vocab_size": 32002, "n_bytes": 1540504, "n_tokens": 457243, "n_chars": 1484970 }, "xlm_roberta.cc100-fr": { "vocab_size": 250002, "n_bytes": 1540504, "n_tokens": 405041, "n_chars": 1484970 }, "yi_34b.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 533106, "n_chars": 1484970 }, "yi_6b.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 533106, "n_chars": 1484970 }, "yi_vl34b.cc100-fr": { "vocab_size": 64000, "n_bytes": 1540504, "n_tokens": 532288, "n_chars": 1484970 }, "zephyr_7b_beta.cc100-fr": { "vocab_size": 32000, "n_bytes": 1540504, "n_tokens": 476666, "n_chars": 1484970 } }