# LLAMA2 # [INST] <> # {{ system_prompt }} # <> # {{ user_msg_1 }} [/INST] {{ model_answer_1 }} [INST] {{ user_msg_2 }} [/INST] ZERO_SHOT_PROMPT = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. Human: {{ user_message }} Assistant: """ ZERO_SHOT_STOPWORD = "Human:" LM_PROMPT = """Give the best continuation of the following text: {{ user_message }}""" LLAMA2_PROMPT = """[INST] <> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. <> {{ user_message }} [/INST] """ LLAMA2_STOPWORD = "" MPT_PROMPT_7B = """<|im_start|>system - You are a helpful assistant chatbot trained by MosaicML. - You answer questions. - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|> <|im_start|>user {{ user_message }}<|im_end|> <|im_start|>assistant """ MPT_LM_PROMPT_7B = """<|im_start|>system - You are a helpful assistant chatbot trained by MosaicML. - You answer questions. - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|> <|im_start|>user {{ user_message }}<|im_end|> <|im_start|>assistant """ MPT_PROMPT_30B = """<|im_start|>system A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.<|im_end|> <|im_start|>user {{ user_message }}<|im_end|> <|im_start|>assistant """ MPT_STOPWORD = "<|im_end|>" FALCON_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins. User: {{ user_message }} Falcon: """ FALCON_STOPWORD = "User:" ALFRED_PROMPT = """Alfred is a large language model trained by LightOn. Knowledge cutoff: November 2022. Current date: 31 July, 2023 User: {{ user_message }} Alfred: """ ALFRED_STOPWORD = "User:" VICUNA_PROMPT = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {{ user_message }} ASSISTANT: """ VICUNA_STOPWORD = "" MODELS = { ################################################ # llama-2 # ################################################ "llama-2-70b": { "name": "llama-2-70b", "model_name": "NousResearch/llama-2-70b-hf", "model_path": "NousResearch-llama-2-70b-hf", "num_gpus": 4, "batch_size": 2, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "to_be_quantized": True, "to_be_watermarked": True, "model_size": 70e9, "model_family": "llama-2", }, "llama-2-13b": { "name": "llama-2-13b", "model_name": "NousResearch/llama-2-13b-hf", "model_path": "NousResearch-llama-2-13b-hf", "num_gpus": 2, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "to_be_quantized": True, "to_be_watermarked": True, "model_size": 13e9, "model_family": "llama-2", }, "llama-2-7b": { "name": "llama-2-7b", "model_name": "NousResearch/llama-2-7b-hf", "model_path": "NousResearch-llama-2-7b-hf", "num_gpus": 1, "batch_size": 4, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "to_be_quantized": True, "to_be_watermarked": True, "model_size": 7e9, "model_family": "llama-2", }, ################################################ # llama-2 # ################################################ "llama-2-70b-chat": { "name": "llama-2-70b-chat", "model_name": "NousResearch/llama-2-70b-chat-hf", "model_path": "NousResearch-llama-2-70b-chat-hf", "num_gpus": 4, "batch_size": 2, "is_chat": True, "prompt": LLAMA2_PROMPT, "stopword": LLAMA2_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "model_size": 70e9, "model_family": "llama-2", }, "llama-2-13b-chat": { "name": "llama-2-13b-chat", "model_name": "NousResearch/llama-2-13b-chat-hf", "model_path": "NousResearch-llama-2-13b-chat-hf", "num_gpus": 2, "batch_size": 8, "is_chat": True, "prompt": LLAMA2_PROMPT, "stopword": LLAMA2_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "model_size": 13e9, "model_family": "llama-2", }, "llama-2-7b-chat": { "name": "llama-2-7b-chat", "model_name": "NousResearch/llama-2-7b-chat-hf", "model_path": "NousResearch-llama-2-7b-chat-hf", "num_gpus": 1, "batch_size": 4, "is_chat": True, "prompt": LLAMA2_PROMPT, "stopword": LLAMA2_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "model_size": 7e9, "model_family": "llama-2", }, ################################################ # llama-1 # ################################################ "llama-65b": { "name": "llama-65b", "model_name": "huggyllama/llama-65b", "model_path": "huggyllama-llama-65b", "num_gpus": 4, "batch_size": 2, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "to_be_quantized": True, "to_be_watermarked": True, "model_size": 65e9, "model_family": "llama-1", }, "llama-30b": { "name": "llama-30b", "model_name": "huggyllama/llama-30b", "model_path": "huggyllama-llama-30b", "num_gpus": 2, "batch_size": 2, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "to_be_quantized": True, "to_be_watermarked": True, "model_size": 30e9, "model_family": "llama-1", }, "llama-13b": { "name": "llama-13b", "model_name": "huggyllama/llama-13b", "model_path": "huggyllama-llama-13b", "num_gpus": 2, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "to_be_quantized": True, "to_be_watermarked": True, "model_size": 13e9, "model_family": "llama-1", }, "llama-7b": { "name": "llama-7b", "model_name": "huggyllama/llama-7b", "model_path": "huggyllama-llama-7b", "num_gpus": 1, "batch_size": 4, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "to_be_quantized": True, "to_be_watermarked": True, "model_size": 7e9, "model_family": "llama-1", }, ################################################ # OPT # ################################################ "opt-66b": { "name": "opt-66b", "model_name": "facebook/opt-66b", "model_path": "facebook-opt-66b", "num_gpus": 4, "batch_size": 2, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 1024, "model_size": 66e9, "model_family": "opt", }, "opt-30b": { "name": "opt-30b", "model_name": "facebook/opt-30b", "model_path": "facebook-opt-30b", "num_gpus": 4, "batch_size": 1, "is_chat": False, "no_api": True, "model_size": 30e9, "model_family": "opt", }, "opt-13b": { "name": "opt-13b", "model_name": "facebook/opt-13b", "model_path": "facebook-opt-13b", "num_gpus": 2, "batch_size": 1, "is_chat": False, "no_api": True, "model_size": 13e9, "model_family": "opt", }, "opt-6.7b": { "name": "opt-6.7b", "model_name": "facebook/opt-6.7b", "model_path": "facebook-opt-6.7b", "num_gpus": 1, "batch_size": 4, "is_chat": False, "no_api": True, "model_size": 6.7e9, "model_family": "opt", }, "opt-2.7b": { "name": "opt-2.7b", "model_name": "facebook/opt-2.7b", "model_path": "facebook-opt-2.7b", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 2.7e9, "model_family": "opt", }, "opt-1.3b": { "name": "opt-1.3b", "model_name": "facebook/opt-1.3b", "model_path": "facebook-opt-1.3b", "num_gpus": 1, "batch_size": 16, "is_chat": False, "use_flash_attention": True, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 1.3e9, "model_family": "opt", }, "opt-350m": { "name": "opt-350m", "model_name": "facebook/opt-350m", "model_path": "facebook-opt-350m", "num_gpus": 1, "batch_size": 16, "is_chat": False, "no_api": True, "model_size": 350e6, "model_family": "opt", }, "opt-125m": { "name": "opt-125m", "model_name": "facebook/opt-125m", "model_path": "facebook-opt-125m", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 125e6, "model_family": "opt", }, ################################################ # MPT # ################################################ "mpt-30b": { "name": "mpt-30b", "model_name": "mosaicml/mpt-30b", "model_path": "mosaicml-mpt-30b", "num_gpus": 2, "batch_size": 2, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 1024, "model_size": 30e9, "model_family": "mpt", }, "mpt-7b": { "name": "mpt-7b", "model_name": "mosaicml/mpt-7b", "model_path": "mosaicml-mpt-7b", "num_gpus": 1, "batch_size": 4, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 7e9, "model_family": "mpt", }, ################################################ # MPT-Chat # ################################################ "mpt-30b-chat": { "name": "mpt-30b-chat", "model_name": "mosaicml/mpt-30b-chat", "model_path": "mosaicml-mpt-30b-chat", "num_gpus": 2, "batch_size": 2, "is_chat": True, "prompt": MPT_PROMPT_30B, "stopword": MPT_STOPWORD, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 30e9, "model_family": "mpt", }, "mpt-7b-chat": { "name": "mpt-7b-chat", "model_name": "mosaicml/mpt-7b-chat", "model_path": "mosaicml-mpt-7b-chat", "num_gpus": 1, "batch_size": 4, "is_chat": True, "prompt": MPT_PROMPT_7B, "stopword": MPT_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 7e9, "model_family": "mpt", }, ################################################ # OPENLLAMA # ################################################ "openllama-13b": { "name": "openllama-13b", "model_name": "openlm-research/open_llama_13b", "model_path": "openlm-research-open_llama_13b", "num_gpus": 2, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 13e9, "model_family": "openllama", }, "openllama-7b": { "name": "openllama-7b", "model_name": "openlm-research/open_llama_7b", "model_path": "openlm-research-open_llama_7b", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 7e9, "model_family": "openllama", }, "openllama-3b": { "name": "openllama-3b", "model_name": "openlm-research/open_llama_3b", "model_path": "openlm-research-open_llama_3b", "num_gpus": 1, "batch_size": 16, "is_chat": False, "use_flash_attention": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 3e9, "model_family": "openllama", }, ################################################ # OPENLLAMA-2 # ################################################ # "openllama-2-13b": { # "name": "openllama-2-13b", # "model_name": "openlm-research/open_llama_13b_v2", # "model_path": "openlm-research-open_llama_13b_v2", # "num_gpus": 2, # "batch_size": 1, # "is_chat": False, # }, "openllama-2-7b": { "name": "openllama-2-7b", "model_name": "openlm-research/open_llama_7b_v2", "model_path": "openlm-research-open_llama_7b_v2", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 7e9, "model_family": "openllama-2", }, "openllama-2-3b": { "name": "openllama-2-3b", "model_name": "openlm-research/open_llama_3b_v2", "model_path": "openlm-research-open_llama_3b_v2", "num_gpus": 1, "batch_size": 16, "is_chat": False, "use_flash_attention": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 3e9, "model_family": "openllama-2", }, ################################################ # Pythia # ################################################ "pythia-12b": { "name": "pythia-12b", "model_name": "EleutherAI/pythia-12b", "model_path": "EleutherAI-pythia-12b", "num_gpus": 2, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 12e9, "model_family": "pythia", }, "pythia-6.9b": { "name": "pythia-6.9b", "model_name": "EleutherAI/pythia-6.9b", "model_path": "EleutherAI-pythia-6.9b", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 6.9e9, "model_family": "pythia", }, "pythia-2.8b": { "name": "pythia-2.8b", "model_name": "EleutherAI/pythia-2.8b", "model_path": "EleutherAI-pythia-2.8b", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 2.8e9, "model_family": "pythia", }, "pythia-1.4b": { "name": "pythia-1.4b", "model_name": "EleutherAI/pythia-1.4b", "model_path": "EleutherAI-pythia-1.4b", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 1.4e9, "model_family": "pythia", }, "pythia-1b": { "name": "pythia-1b", "model_name": "EleutherAI/pythia-1b", "model_path": "EleutherAI-pythia-1b", "num_gpus": 1, "batch_size": 1, "is_chat": False, "use_flash_attention": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 1e9, "model_family": "pythia", }, "pythia-410m": { "name": "pythia-410m", "model_name": "EleutherAI/pythia-410m", "model_path": "EleutherAI-pythia-410m", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 410e6, "model_family": "pythia", }, "pythia-160m": { "name": "pythia-160m", "model_name": "EleutherAI/pythia-160m", "model_path": "EleutherAI-pythia-160m", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 160e6, "model_family": "pythia", }, "pythia-70m": { "name": "pythia-70m", "model_name": "EleutherAI/pythia-70m", "model_path": "EleutherAI-pythia-70m", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 70e6, "model_family": "pythia", }, ################################################ # Pythia-deduped # ################################################ "pythia-12b-deduped": { "name": "pythia-12b-deduped", "model_name": "EleutherAI/pythia-12b-deduped", "model_path": "EleutherAI-pythia-12b-deduped", "num_gpus": 2, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 12e9, }, "pythia-6.9b-deduped": { "name": "pythia-6.9b-deduped", "model_name": "EleutherAI/pythia-6.9b-deduped", "model_path": "EleutherAI-pythia-6.9b-deduped", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 6.9e9, }, "pythia-2.8b-deduped": { "name": "pythia-2.8b-deduped", "model_name": "EleutherAI/pythia-2.8b-deduped", "model_path": "EleutherAI-pythia-2.8b-deduped", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 2.8e9, }, "pythia-1.4b-deduped": { "name": "pythia-1.4b-deduped", "model_name": "EleutherAI/pythia-1.4b-deduped", "model_path": "EleutherAI-pythia-1.4b-deduped", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 1.4e9, }, "pythia-1b-deduped": { "name": "pythia-1b-deduped", "model_name": "EleutherAI/pythia-1b-deduped", "model_path": "EleutherAI-pythia-1b-deduped", "num_gpus": 1, "batch_size": 16, "is_chat": False, "use_flash_attention": False, "max_total_tokens": 2048, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 1e9, }, "pythia-410m-deduped": { "name": "pythia-410m-deduped", "model_name": "EleutherAI/pythia-410m-deduped", "model_path": "EleutherAI-pythia-410m-deduped", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 410e6, }, "pythia-160m-deduped": { "name": "pythia-160m-deduped", "model_name": "EleutherAI/pythia-160m-deduped", "model_path": "EleutherAI-pythia-160m-deduped", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 160e6, }, "pythia-70m-deduped": { "name": "pythia-70m-deduped", "model_name": "EleutherAI/pythia-70m-deduped", "model_path": "EleutherAI-pythia-70m-deduped", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "pythia-deduped", "model_size": 70e6, }, ################################################ # GPT2 # ################################################ "gpt2-xl": { "name": "gpt2-xl", "model_name": "gpt2-xl", "model_path": "gpt2-xl", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 1.5e9, "model_family": "gpt2", }, "gpt2-large": { "name": "gpt2-large", "model_name": "gpt2-large", "model_path": "gpt2-large", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 774e6, "model_family": "gpt2", }, "gpt2-medium": { "name": "gpt2-medium", "model_name": "gpt2-medium", "model_path": "gpt2-medium", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 355e6, "model_family": "gpt2", }, "gpt2": { "name": "gpt2", "model_name": "gpt2", "model_path": "gpt2", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 124e6, "model_family": "gpt2", }, ################################################ # CEREBRAS # ################################################ "cerebras-gpt-13b": { # add 2 gpus but sharded equals to false "name": "cerebras-gpt-13b", "model_name": "cerebras/Cerebras-GPT-13B", "model_path": "cerebras-Cerebras-GPT-13B", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "cerebras", "model_size": 13e9, }, "cerebras-gpt-6.7b": { "name": "cerebras-gpt-6.7b", "model_name": "cerebras/Cerebras-GPT-6.7B", "model_path": "cerebras-Cerebras-GPT-6.7B", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_family": "cerebras", "model_size": 6.7e9, }, "cerebras-gpt-2.7b": { "name": "cerebras-gpt-2.7b", "model_name": "cerebras/Cerebras-GPT-2.7B", "model_path": "cerebras-Cerebras-GPT-2.7B", "num_gpus": 1, "batch_size": 1, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "cerebras", "model_size": 2.7e9, }, "cerebras-gpt-1.3b": { "name": "cerebras-gpt-1.3b", "model_name": "cerebras/Cerebras-GPT-1.3B", "model_path": "cerebras-Cerebras-GPT-1.3B", "num_gpus": 1, "batch_size": 1, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_family": "cerebras", "model_size": 1.3e9, }, "cerebras-gpt-256m": { "name": "cerebras-gpt-256m", "model_name": "cerebras/Cerebras-GPT-256M", "model_path": "cerebras-Cerebras-GPT-256M", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "cerebras", "model_size": 256e6, }, "cerebras-gpt-111m": { "name": "cerebras-gpt-111m", "model_name": "cerebras/Cerebras-GPT-111M", "model_path": "cerebras-Cerebras-GPT-111M", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "cerebras", "model_size": 111e6, }, ################################################ # Bloom # ################################################ "bloom-7.1b": { "name": "bloom-7.1b", "model_name": "bigscience/bloom-7b1", "model_path": "bigscience-bloom-7b1", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 7.1e9, "model_family": "bloom", }, "bloom-3b": { "name": "bloom-3b", "model_name": "bigscience/bloom-3b", "model_path": "bigscience-bloom-3b", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 3e9, "model_family": "bloom", }, "bloom-1.7b": { "name": "bloom-1.7b", "model_name": "bigscience/bloom-1b7", "model_path": "bigscience-bloom-1b7", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 1.7e9, "model_family": "bloom", }, "bloom-1.1b": { "name": "bloom-1.1b", "model_name": "bigscience/bloom-1b1", "model_path": "bigscience-bloom-1b1", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 1.1e9, "model_family": "bloom", }, "bloom-560m": { "name": "bloom-560m", "model_name": "bigscience/bloom-560m", "model_path": "bigscience-bloom-560m", "num_gpus": 1, "batch_size": 16, "is_chat": False, "max_total_tokens": 1024, "max_input_length": 256, "max_batch_prefill_tokens": 4096, "model_size": 560e6, "model_family": "bloom", }, ################################################ # Falcon # ################################################ "falcon-40b": { "name": "falcon-40b", "model_name": "tiiuae/falcon-40b", "model_path": "tiiuae-falcon-40b", "num_gpus": 4, "batch_size": 4, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 40e9, "model_family": "falcon", }, "falcon-7b": { "name": "falcon-7b", "model_name": "tiiuae/falcon-7b", "model_path": "tiiuae-falcon-7b", "num_gpus": 1, "batch_size": 8, "is_chat": False, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_size": 7e9, "model_family": "falcon", }, ################################################ # Falcon-chat # ################################################ "falcon-40b-instruct": { "name": "falcon-40b-instruct", "model_name": "tiiuae/falcon-40b-instruct", "model_path": "tiiuae-falcon-40b-instruct", "num_gpus": 4, "batch_size": 4, "is_chat": True, "prompt": FALCON_PROMPT, "stopword": FALCON_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "falcon", "model_size": 40e9, }, "falcon-7b-instruct": { "name": "falcon-7b-instruct", "model_name": "tiiuae/falcon-7b-instruct", "model_path": "tiiuae-falcon-7b-instruct", "num_gpus": 1, "batch_size": 5, "is_chat": True, "prompt": FALCON_PROMPT, "stopword": FALCON_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "falcon", "model_size": 7e9, }, "alfred-40b-0723": { "name": "alfred-40b-0723", "model_name": "lightonai/alfred-40b-0723", "model_path": "lightonai-alfred-40b-0723", "num_gpus": 4, "batch_size": 4, "is_chat": True, "prompt": ALFRED_PROMPT, "stopword": ALFRED_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "falcon", "model_size": 40e9, }, ################################################ # Vicuna v1.3 # ################################################ "vicuna-33b-v1.3": { "name": "vicuna-33b-v1.3", "model_name": "lmsys/vicuna-33b-v1.3", "model_path": "lmsys-vicuna-33b-v1.3", "num_gpus": 2, "batch_size": 2, "is_chat": True, "prompt": VICUNA_PROMPT, "stopword": VICUNA_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "vicuna", "model_size": 33e9, }, "vicuna-13b-v1.3": { "name": "vicuna-13b-v1.3", "model_name": "lmsys/vicuna-13b-v1.3", "model_path": "lmsys-vicuna-13b-v1.3", "num_gpus": 2, "batch_size": 8, "is_chat": True, "prompt": VICUNA_PROMPT, "stopword": VICUNA_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "vicuna", "model_size": 13e9, }, "vicuna-7b-v1.3": { "name": "vicuna-7b-v1.3", "model_name": "lmsys/vicuna-7b-v1.3", "model_path": "lmsys-vicuna-7b-v1.3", "num_gpus": 1, "batch_size": 4, "is_chat": True, "prompt": VICUNA_PROMPT, "stopword": VICUNA_STOPWORD, "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_prefill_tokens": 4096, "model_family": "vicuna", "model_size": 7e9, }, } MODEL_FAMILY_PRETRAINING_DATASETS = { "llama-2": ["UNK-commoncrawl"], "llama-1": [ "llama", "c4", "github", "wikipedia", "books3", "gutenberg", "arxiv", "stackexchange", ], "openllama": [ "redpajama", "c4", "github", "wikipedia", "books3", "gutenberg", "arxiv", "stackexchange", ], "openllama-2": [ "refinedweb", "github", "wikipedia", "books3", "gutenberg", "arxiv", "stackexchange", ], "pythia": [ "thepile", "pubmed", "books3", "arxiv", "github", "openwebtext2", "freelaw", "wikipedia", "stackexchange", "uspto", "gutenberg", "opensubtitles", "mathematics", "bookcorpus2", "ubuntuIRC", "europarl", "philpapers", "nih-grants" "hackernews", "enron", ], "gpt2": ["openwebtext"], "cerebras": [ "thepile", "pubmed", "books3", "arxiv", "github", "openwebtext2", "freelaw", "wikipedia", "stackexchange", "uspto", "gutenberg", "opensubtitles", "mathematics", "bookcorpus2", "ubuntuIRC", "europarl", "philpapers", "nih-grants" "hackernews", "enron", ], "bloom": [ "oscar", "github", "commoncrawl-bloom", ], "falcon": [ "refinedweb", "pubmed", "books3", "arxiv", "github", "openwebtext2", "freelaw", "wikipedia", "stackexchange", "uspto", "gutenberg", "opensubtitles", "mathematics", "bookcorpus2", "ubuntuIRC", "europarl", "philpapers", "nih-grants" "hackernews", "enron", ], "mpt": [ "c4", "mc4", "redpajama", "github", "wikipedia", "books3", "gutenberg", "arxiv", "stackexchange", ], "opt": [ "cc-news", "cc-stories", "thepile", "reddit" "pubmed", "books3", "github", "openwebtext2", "wikipedia", "uspto", "gutenberg", "opensubtitles", "mathematics", "bookcorpus2", "hackernews", ], } if __name__ == "__main__": print(len(MODELS)) print("\n".join(MODELS.keys()))