# LLAMA2
# <s>[INST] <<SYS>>
# {{ system_prompt }}
# <</SYS>>

# {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]

ZERO_SHOT_PROMPT = """A chat between a curious human and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the human's questions.
Human: {{ user_message }}
Assistant: """

ZERO_SHOT_STOPWORD = "Human:"

LM_PROMPT = """Give the best continuation of the following text: {{ user_message }}"""

LLAMA2_PROMPT = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

{{ user_message }} [/INST] """

LLAMA2_STOPWORD = "</s>"

MPT_PROMPT_7B = """<|im_start|>system
- You are a helpful assistant chatbot trained by MosaicML.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
<|im_start|>user
{{ user_message }}<|im_end|>
<|im_start|>assistant
"""

MPT_LM_PROMPT_7B = """<|im_start|>system
- You are a helpful assistant chatbot trained by MosaicML.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
<|im_start|>user
{{ user_message }}<|im_end|>
<|im_start|>assistant
"""

MPT_PROMPT_30B = """<|im_start|>system
A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.<|im_end|>
<|im_start|>user
{{ user_message }}<|im_end|>
<|im_start|>assistant
"""

MPT_STOPWORD = "<|im_end|>"

FALCON_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
User: {{ user_message }}
Falcon: """

FALCON_STOPWORD = "User:"

ALFRED_PROMPT = """Alfred is a large language model trained by LightOn. Knowledge cutoff: November 2022. Current date: 31 July, 2023

User: {{ user_message }}
Alfred: """

ALFRED_STOPWORD = "User:"

VICUNA_PROMPT = """A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {{ user_message }} ASSISTANT: """

VICUNA_STOPWORD = ""

MODELS = {
    ################################################
    #                   llama-2                    #
    ################################################
    "llama-2-70b": {
        "name": "llama-2-70b",
        "model_name": "NousResearch/llama-2-70b-hf",
        "model_path": "NousResearch-llama-2-70b-hf",
        "num_gpus": 4,
        "batch_size": 2,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "to_be_quantized": True,
        "to_be_watermarked": True,
        "model_size": 70e9,
        "model_family": "llama-2",
    },
    "llama-2-13b": {
        "name": "llama-2-13b",
        "model_name": "NousResearch/llama-2-13b-hf",
        "model_path": "NousResearch-llama-2-13b-hf",
        "num_gpus": 2,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "to_be_quantized": True,
        "to_be_watermarked": True,
        "model_size": 13e9,
        "model_family": "llama-2",
    },
    "llama-2-7b": {
        "name": "llama-2-7b",
        "model_name": "NousResearch/llama-2-7b-hf",
        "model_path": "NousResearch-llama-2-7b-hf",
        "num_gpus": 1,
        "batch_size": 4,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "to_be_quantized": True,
        "to_be_watermarked": True,
        "model_size": 7e9,
        "model_family": "llama-2",
    },
    ################################################
    #                   llama-2                    #
    ################################################
    "llama-2-70b-chat": {
        "name": "llama-2-70b-chat",
        "model_name": "NousResearch/llama-2-70b-chat-hf",
        "model_path": "NousResearch-llama-2-70b-chat-hf",
        "num_gpus": 4,
        "batch_size": 2,
        "is_chat": True,
        "prompt": LLAMA2_PROMPT,
        "stopword": LLAMA2_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "model_size": 70e9,
        "model_family": "llama-2",
    },
    "llama-2-13b-chat": {
        "name": "llama-2-13b-chat",
        "model_name": "NousResearch/llama-2-13b-chat-hf",
        "model_path": "NousResearch-llama-2-13b-chat-hf",
        "num_gpus": 2,
        "batch_size": 8,
        "is_chat": True,
        "prompt": LLAMA2_PROMPT,
        "stopword": LLAMA2_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "model_size": 13e9,
        "model_family": "llama-2",
    },
    "llama-2-7b-chat": {
        "name": "llama-2-7b-chat",
        "model_name": "NousResearch/llama-2-7b-chat-hf",
        "model_path": "NousResearch-llama-2-7b-chat-hf",
        "num_gpus": 1,
        "batch_size": 4,
        "is_chat": True,
        "prompt": LLAMA2_PROMPT,
        "stopword": LLAMA2_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "model_size": 7e9,
        "model_family": "llama-2",
    },
    ################################################
    #                   llama-1                   #
    ################################################
    "llama-65b": {
        "name": "llama-65b",
        "model_name": "huggyllama/llama-65b",
        "model_path": "huggyllama-llama-65b",
        "num_gpus": 4,
        "batch_size": 2,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "to_be_quantized": True,
        "to_be_watermarked": True,
        "model_size": 65e9,
        "model_family": "llama-1",
    },
    "llama-30b": {
        "name": "llama-30b",
        "model_name": "huggyllama/llama-30b",
        "model_path": "huggyllama-llama-30b",
        "num_gpus": 2,
        "batch_size": 2,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "to_be_quantized": True,
        "to_be_watermarked": True,
        "model_size": 30e9,
        "model_family": "llama-1",
    },
    "llama-13b": {
        "name": "llama-13b",
        "model_name": "huggyllama/llama-13b",
        "model_path": "huggyllama-llama-13b",
        "num_gpus": 2,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "to_be_quantized": True,
        "to_be_watermarked": True,
        "model_size": 13e9,
        "model_family": "llama-1",
    },
    "llama-7b": {
        "name": "llama-7b",
        "model_name": "huggyllama/llama-7b",
        "model_path": "huggyllama-llama-7b",
        "num_gpus": 1,
        "batch_size": 4,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "to_be_quantized": True,
        "to_be_watermarked": True,
        "model_size": 7e9,
        "model_family": "llama-1",
    },
    ################################################
    #                   OPT                   #
    ################################################
    "opt-66b": {
        "name": "opt-66b",
        "model_name": "facebook/opt-66b",
        "model_path": "facebook-opt-66b",
        "num_gpus": 4,
        "batch_size": 2,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 1024,
        "model_size": 66e9,
        "model_family": "opt",
    },
    "opt-30b": {
        "name": "opt-30b",
        "model_name": "facebook/opt-30b",
        "model_path": "facebook-opt-30b",
        "num_gpus": 4,
        "batch_size": 1,
        "is_chat": False,
        "no_api": True,
        "model_size": 30e9,
        "model_family": "opt",
    },
    "opt-13b": {
        "name": "opt-13b",
        "model_name": "facebook/opt-13b",
        "model_path": "facebook-opt-13b",
        "num_gpus": 2,
        "batch_size": 1,
        "is_chat": False,
        "no_api": True,
        "model_size": 13e9,
        "model_family": "opt",
    },
    "opt-6.7b": {
        "name": "opt-6.7b",
        "model_name": "facebook/opt-6.7b",
        "model_path": "facebook-opt-6.7b",
        "num_gpus": 1,
        "batch_size": 4,
        "is_chat": False,
        "no_api": True,
        "model_size": 6.7e9,
        "model_family": "opt",
    },
    "opt-2.7b": {
        "name": "opt-2.7b",
        "model_name": "facebook/opt-2.7b",
        "model_path": "facebook-opt-2.7b",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 2.7e9,
        "model_family": "opt",
    },
    "opt-1.3b": {
        "name": "opt-1.3b",
        "model_name": "facebook/opt-1.3b",
        "model_path": "facebook-opt-1.3b",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "use_flash_attention": True,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 1.3e9,
        "model_family": "opt",
    },
    "opt-350m": {
        "name": "opt-350m",
        "model_name": "facebook/opt-350m",
        "model_path": "facebook-opt-350m",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "no_api": True,
        "model_size": 350e6,
        "model_family": "opt",
    },
    "opt-125m": {
        "name": "opt-125m",
        "model_name": "facebook/opt-125m",
        "model_path": "facebook-opt-125m",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 125e6,
        "model_family": "opt",
    },
    ################################################
    #                   MPT                   #
    ################################################
    "mpt-30b": {
        "name": "mpt-30b",
        "model_name": "mosaicml/mpt-30b",
        "model_path": "mosaicml-mpt-30b",
        "num_gpus": 2,
        "batch_size": 2,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 1024,
        "model_size": 30e9,
        "model_family": "mpt",
    },
    "mpt-7b": {
        "name": "mpt-7b",
        "model_name": "mosaicml/mpt-7b",
        "model_path": "mosaicml-mpt-7b",
        "num_gpus": 1,
        "batch_size": 4,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 7e9,
        "model_family": "mpt",
    },
    ################################################
    #                   MPT-Chat                   #
    ################################################
    "mpt-30b-chat": {
        "name": "mpt-30b-chat",
        "model_name": "mosaicml/mpt-30b-chat",
        "model_path": "mosaicml-mpt-30b-chat",
        "num_gpus": 2,
        "batch_size": 2,
        "is_chat": True,
        "prompt": MPT_PROMPT_30B,
        "stopword": MPT_STOPWORD,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 30e9,
        "model_family": "mpt",
    },
    "mpt-7b-chat": {
        "name": "mpt-7b-chat",
        "model_name": "mosaicml/mpt-7b-chat",
        "model_path": "mosaicml-mpt-7b-chat",
        "num_gpus": 1,
        "batch_size": 4,
        "is_chat": True,
        "prompt": MPT_PROMPT_7B,
        "stopword": MPT_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 7e9,
        "model_family": "mpt",
    },
    ################################################
    #                   OPENLLAMA                  #
    ################################################
    "openllama-13b": {
        "name": "openllama-13b",
        "model_name": "openlm-research/open_llama_13b",
        "model_path": "openlm-research-open_llama_13b",
        "num_gpus": 2,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 13e9,
        "model_family": "openllama",
    },
    "openllama-7b": {
        "name": "openllama-7b",
        "model_name": "openlm-research/open_llama_7b",
        "model_path": "openlm-research-open_llama_7b",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 7e9,
        "model_family": "openllama",
    },
    "openllama-3b": {
        "name": "openllama-3b",
        "model_name": "openlm-research/open_llama_3b",
        "model_path": "openlm-research-open_llama_3b",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "use_flash_attention": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 3e9,
        "model_family": "openllama",
    },
    ################################################
    #                   OPENLLAMA-2                #
    ################################################
    # "openllama-2-13b": {
    #     "name": "openllama-2-13b",
    #     "model_name": "openlm-research/open_llama_13b_v2",
    #     "model_path": "openlm-research-open_llama_13b_v2",
    #     "num_gpus": 2,
    #     "batch_size": 1,
    #     "is_chat": False,
    # },
    "openllama-2-7b": {
        "name": "openllama-2-7b",
        "model_name": "openlm-research/open_llama_7b_v2",
        "model_path": "openlm-research-open_llama_7b_v2",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 7e9,
        "model_family": "openllama-2",
    },
    "openllama-2-3b": {
        "name": "openllama-2-3b",
        "model_name": "openlm-research/open_llama_3b_v2",
        "model_path": "openlm-research-open_llama_3b_v2",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "use_flash_attention": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 3e9,
        "model_family": "openllama-2",
    },
    ################################################
    #                   Pythia                     #
    ################################################
    "pythia-12b": {
        "name": "pythia-12b",
        "model_name": "EleutherAI/pythia-12b",
        "model_path": "EleutherAI-pythia-12b",
        "num_gpus": 2,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 12e9,
        "model_family": "pythia",
    },
    "pythia-6.9b": {
        "name": "pythia-6.9b",
        "model_name": "EleutherAI/pythia-6.9b",
        "model_path": "EleutherAI-pythia-6.9b",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 6.9e9,
        "model_family": "pythia",
    },
    "pythia-2.8b": {
        "name": "pythia-2.8b",
        "model_name": "EleutherAI/pythia-2.8b",
        "model_path": "EleutherAI-pythia-2.8b",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 2.8e9,
        "model_family": "pythia",
    },
    "pythia-1.4b": {
        "name": "pythia-1.4b",
        "model_name": "EleutherAI/pythia-1.4b",
        "model_path": "EleutherAI-pythia-1.4b",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 1.4e9,
        "model_family": "pythia",
    },
    "pythia-1b": {
        "name": "pythia-1b",
        "model_name": "EleutherAI/pythia-1b",
        "model_path": "EleutherAI-pythia-1b",
        "num_gpus": 1,
        "batch_size": 1,
        "is_chat": False,
        "use_flash_attention": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 1e9,
        "model_family": "pythia",
    },
    "pythia-410m": {
        "name": "pythia-410m",
        "model_name": "EleutherAI/pythia-410m",
        "model_path": "EleutherAI-pythia-410m",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 410e6,
        "model_family": "pythia",
    },
    "pythia-160m": {
        "name": "pythia-160m",
        "model_name": "EleutherAI/pythia-160m",
        "model_path": "EleutherAI-pythia-160m",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 160e6,
        "model_family": "pythia",
    },
    "pythia-70m": {
        "name": "pythia-70m",
        "model_name": "EleutherAI/pythia-70m",
        "model_path": "EleutherAI-pythia-70m",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 70e6,
        "model_family": "pythia",
    },
    ################################################
    #                   Pythia-deduped             #
    ################################################
    "pythia-12b-deduped": {
        "name": "pythia-12b-deduped",
        "model_name": "EleutherAI/pythia-12b-deduped",
        "model_path": "EleutherAI-pythia-12b-deduped",
        "num_gpus": 2,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 12e9,
    },
    "pythia-6.9b-deduped": {
        "name": "pythia-6.9b-deduped",
        "model_name": "EleutherAI/pythia-6.9b-deduped",
        "model_path": "EleutherAI-pythia-6.9b-deduped",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 6.9e9,
    },
    "pythia-2.8b-deduped": {
        "name": "pythia-2.8b-deduped",
        "model_name": "EleutherAI/pythia-2.8b-deduped",
        "model_path": "EleutherAI-pythia-2.8b-deduped",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 2.8e9,
    },
    "pythia-1.4b-deduped": {
        "name": "pythia-1.4b-deduped",
        "model_name": "EleutherAI/pythia-1.4b-deduped",
        "model_path": "EleutherAI-pythia-1.4b-deduped",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 1.4e9,
    },
    "pythia-1b-deduped": {
        "name": "pythia-1b-deduped",
        "model_name": "EleutherAI/pythia-1b-deduped",
        "model_path": "EleutherAI-pythia-1b-deduped",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "use_flash_attention": False,
        "max_total_tokens": 2048,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 1e9,
    },
    "pythia-410m-deduped": {
        "name": "pythia-410m-deduped",
        "model_name": "EleutherAI/pythia-410m-deduped",
        "model_path": "EleutherAI-pythia-410m-deduped",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 410e6,
    },
    "pythia-160m-deduped": {
        "name": "pythia-160m-deduped",
        "model_name": "EleutherAI/pythia-160m-deduped",
        "model_path": "EleutherAI-pythia-160m-deduped",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 160e6,
    },
    "pythia-70m-deduped": {
        "name": "pythia-70m-deduped",
        "model_name": "EleutherAI/pythia-70m-deduped",
        "model_path": "EleutherAI-pythia-70m-deduped",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "pythia-deduped",
        "model_size": 70e6,
    },
    ################################################
    #                   GPT2                       #
    ################################################
    "gpt2-xl": {
        "name": "gpt2-xl",
        "model_name": "gpt2-xl",
        "model_path": "gpt2-xl",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 1.5e9,
        "model_family": "gpt2",
    },
    "gpt2-large": {
        "name": "gpt2-large",
        "model_name": "gpt2-large",
        "model_path": "gpt2-large",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 774e6,
        "model_family": "gpt2",
    },
    "gpt2-medium": {
        "name": "gpt2-medium",
        "model_name": "gpt2-medium",
        "model_path": "gpt2-medium",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 355e6,
        "model_family": "gpt2",
    },
    "gpt2": {
        "name": "gpt2",
        "model_name": "gpt2",
        "model_path": "gpt2",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 124e6,
        "model_family": "gpt2",
    },
    ################################################
    #                   CEREBRAS                   #
    ################################################
    "cerebras-gpt-13b": {  # add 2 gpus but sharded equals to false
        "name": "cerebras-gpt-13b",
        "model_name": "cerebras/Cerebras-GPT-13B",
        "model_path": "cerebras-Cerebras-GPT-13B",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "cerebras",
        "model_size": 13e9,
    },
    "cerebras-gpt-6.7b": {
        "name": "cerebras-gpt-6.7b",
        "model_name": "cerebras/Cerebras-GPT-6.7B",
        "model_path": "cerebras-Cerebras-GPT-6.7B",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_family": "cerebras",
        "model_size": 6.7e9,
    },
    "cerebras-gpt-2.7b": {
        "name": "cerebras-gpt-2.7b",
        "model_name": "cerebras/Cerebras-GPT-2.7B",
        "model_path": "cerebras-Cerebras-GPT-2.7B",
        "num_gpus": 1,
        "batch_size": 1,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "cerebras",
        "model_size": 2.7e9,
    },
    "cerebras-gpt-1.3b": {
        "name": "cerebras-gpt-1.3b",
        "model_name": "cerebras/Cerebras-GPT-1.3B",
        "model_path": "cerebras-Cerebras-GPT-1.3B",
        "num_gpus": 1,
        "batch_size": 1,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_family": "cerebras",
        "model_size": 1.3e9,
    },
    "cerebras-gpt-256m": {
        "name": "cerebras-gpt-256m",
        "model_name": "cerebras/Cerebras-GPT-256M",
        "model_path": "cerebras-Cerebras-GPT-256M",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "cerebras",
        "model_size": 256e6,
    },
    "cerebras-gpt-111m": {
        "name": "cerebras-gpt-111m",
        "model_name": "cerebras/Cerebras-GPT-111M",
        "model_path": "cerebras-Cerebras-GPT-111M",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "cerebras",
        "model_size": 111e6,
    },
    ################################################
    #                   Bloom                      #
    ################################################
    "bloom-7.1b": {
        "name": "bloom-7.1b",
        "model_name": "bigscience/bloom-7b1",
        "model_path": "bigscience-bloom-7b1",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 7.1e9,
        "model_family": "bloom",
    },
    "bloom-3b": {
        "name": "bloom-3b",
        "model_name": "bigscience/bloom-3b",
        "model_path": "bigscience-bloom-3b",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 3e9,
        "model_family": "bloom",
    },
    "bloom-1.7b": {
        "name": "bloom-1.7b",
        "model_name": "bigscience/bloom-1b7",
        "model_path": "bigscience-bloom-1b7",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 1.7e9,
        "model_family": "bloom",
    },
    "bloom-1.1b": {
        "name": "bloom-1.1b",
        "model_name": "bigscience/bloom-1b1",
        "model_path": "bigscience-bloom-1b1",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 1.1e9,
        "model_family": "bloom",
    },
    "bloom-560m": {
        "name": "bloom-560m",
        "model_name": "bigscience/bloom-560m",
        "model_path": "bigscience-bloom-560m",
        "num_gpus": 1,
        "batch_size": 16,
        "is_chat": False,
        "max_total_tokens": 1024,
        "max_input_length": 256,
        "max_batch_prefill_tokens": 4096,
        "model_size": 560e6,
        "model_family": "bloom",
    },
    ################################################
    #                   Falcon                     #
    ################################################
    "falcon-40b": {
        "name": "falcon-40b",
        "model_name": "tiiuae/falcon-40b",
        "model_path": "tiiuae-falcon-40b",
        "num_gpus": 4,
        "batch_size": 4,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 40e9,
        "model_family": "falcon",
    },
    "falcon-7b": {
        "name": "falcon-7b",
        "model_name": "tiiuae/falcon-7b",
        "model_path": "tiiuae-falcon-7b",
        "num_gpus": 1,
        "batch_size": 8,
        "is_chat": False,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_size": 7e9,
        "model_family": "falcon",
    },
    ################################################
    #                   Falcon-chat                #
    ################################################
    "falcon-40b-instruct": {
        "name": "falcon-40b-instruct",
        "model_name": "tiiuae/falcon-40b-instruct",
        "model_path": "tiiuae-falcon-40b-instruct",
        "num_gpus": 4,
        "batch_size": 4,
        "is_chat": True,
        "prompt": FALCON_PROMPT,
        "stopword": FALCON_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "falcon",
        "model_size": 40e9,
    },
    "falcon-7b-instruct": {
        "name": "falcon-7b-instruct",
        "model_name": "tiiuae/falcon-7b-instruct",
        "model_path": "tiiuae-falcon-7b-instruct",
        "num_gpus": 1,
        "batch_size": 5,
        "is_chat": True,
        "prompt": FALCON_PROMPT,
        "stopword": FALCON_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "falcon",
        "model_size": 7e9,
    },
    "alfred-40b-0723": {
        "name": "alfred-40b-0723",
        "model_name": "lightonai/alfred-40b-0723",
        "model_path": "lightonai-alfred-40b-0723",
        "num_gpus": 4,
        "batch_size": 4,
        "is_chat": True,
        "prompt": ALFRED_PROMPT,
        "stopword": ALFRED_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "falcon",
        "model_size": 40e9,
    },
    ################################################
    #                   Vicuna v1.3                #
    ################################################
    "vicuna-33b-v1.3": {
        "name": "vicuna-33b-v1.3",
        "model_name": "lmsys/vicuna-33b-v1.3",
        "model_path": "lmsys-vicuna-33b-v1.3",
        "num_gpus": 2,
        "batch_size": 2,
        "is_chat": True,
        "prompt": VICUNA_PROMPT,
        "stopword": VICUNA_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "vicuna",
        "model_size": 33e9,
    },
    "vicuna-13b-v1.3": {
        "name": "vicuna-13b-v1.3",
        "model_name": "lmsys/vicuna-13b-v1.3",
        "model_path": "lmsys-vicuna-13b-v1.3",
        "num_gpus": 2,
        "batch_size": 8,
        "is_chat": True,
        "prompt": VICUNA_PROMPT,
        "stopword": VICUNA_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "vicuna",
        "model_size": 13e9,
    },
    "vicuna-7b-v1.3": {
        "name": "vicuna-7b-v1.3",
        "model_name": "lmsys/vicuna-7b-v1.3",
        "model_path": "lmsys-vicuna-7b-v1.3",
        "num_gpus": 1,
        "batch_size": 4,
        "is_chat": True,
        "prompt": VICUNA_PROMPT,
        "stopword": VICUNA_STOPWORD,
        "max_total_tokens": 2048,
        "max_input_length": 1024,
        "max_batch_prefill_tokens": 4096,
        "model_family": "vicuna",
        "model_size": 7e9,
    },
}


MODEL_FAMILY_PRETRAINING_DATASETS = {
    "llama-2": ["UNK-commoncrawl"],
    "llama-1": [
        "llama",
        "c4",
        "github",
        "wikipedia",
        "books3",
        "gutenberg",
        "arxiv",
        "stackexchange",
    ],
    "openllama": [
        "redpajama",
        "c4",
        "github",
        "wikipedia",
        "books3",
        "gutenberg",
        "arxiv",
        "stackexchange",
    ],
    "openllama-2": [
        "refinedweb",
        "github",
        "wikipedia",
        "books3",
        "gutenberg",
        "arxiv",
        "stackexchange",
    ],
    "pythia": [
        "thepile",
        "pubmed",
        "books3",
        "arxiv",
        "github",
        "openwebtext2",
        "freelaw",
        "wikipedia",
        "stackexchange",
        "uspto",
        "gutenberg",
        "opensubtitles",
        "mathematics",
        "bookcorpus2",
        "ubuntuIRC",
        "europarl",
        "philpapers",
        "nih-grants" "hackernews",
        "enron",
    ],
    "gpt2": ["openwebtext"],
    "cerebras": [
        "thepile",
        "pubmed",
        "books3",
        "arxiv",
        "github",
        "openwebtext2",
        "freelaw",
        "wikipedia",
        "stackexchange",
        "uspto",
        "gutenberg",
        "opensubtitles",
        "mathematics",
        "bookcorpus2",
        "ubuntuIRC",
        "europarl",
        "philpapers",
        "nih-grants" "hackernews",
        "enron",
    ],
    "bloom": [
        "oscar",
        "github",
        "commoncrawl-bloom",
    ],
    "falcon": [
        "refinedweb",
        "pubmed",
        "books3",
        "arxiv",
        "github",
        "openwebtext2",
        "freelaw",
        "wikipedia",
        "stackexchange",
        "uspto",
        "gutenberg",
        "opensubtitles",
        "mathematics",
        "bookcorpus2",
        "ubuntuIRC",
        "europarl",
        "philpapers",
        "nih-grants" "hackernews",
        "enron",
    ],
    "mpt": [
        "c4",
        "mc4",
        "redpajama",
        "github",
        "wikipedia",
        "books3",
        "gutenberg",
        "arxiv",
        "stackexchange",
    ],
    "opt": [
        "cc-news",
        "cc-stories",
        "thepile",
        "reddit" "pubmed",
        "books3",
        "github",
        "openwebtext2",
        "wikipedia",
        "uspto",
        "gutenberg",
        "opensubtitles",
        "mathematics",
        "bookcorpus2",
        "hackernews",
    ],
}


if __name__ == "__main__":
    print(len(MODELS))
    print("\n".join(MODELS.keys()))