Spaces:

wissamantoun
/

LLM_Detection_Attribution

Runtime error

App Files Files Community

wissamantoun commited on Sep 26, 2023

Commit

88e0f7f

•

1 Parent(s): 7922adf

Upload 4 files

Browse files

Files changed (4) hide show

app.py +486 -0
deberta_results.csv +0 -0
exp_utils.py +1157 -0
visualize_utils.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,486 @@

+import json
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.figure_factory as ff
+import plotly.graph_objects as go
+import streamlit as st
+from plotly.subplots import make_subplots
+from exp_utils import MODELS
+from visualize_utils import viridis_rgb
+#
+st.set_page_config(
+    page_title="Results Viewer",
+    page_icon="📊",
+    initial_sidebar_state="expanded",
+    layout="wide",
+)
+MODELS_SIZE_MAPPING = {k: v["model_size"] for k, v in MODELS.items()}
+MODELS_FAMILY_MAPPING = {k: v["model_family"] for k, v in MODELS.items()}
+MODEL_FAMILES = set([model["model_family"] for model in MODELS.values()])
+MODEL_NAMES = list(MODELS.keys())
+MODEL_NAMES_SORTED_BY_NAME_AND_SIZE = sorted(
+    MODEL_NAMES, key=lambda x: (MODELS[x]["model_family"], MODELS[x]["model_size"])
+)
+MODEL_NAMES_SORTED_BY_SIZE = sorted(
+    MODEL_NAMES, key=lambda x: (MODELS[x]["model_size"], MODELS[x]["model_family"])
+)
+# sort MODELS_SIZE_MAPPING by value then by key
+MODELS_SIZE_MAPPING = {
+    k: v
+    for k, v in sorted(MODELS_SIZE_MAPPING.items(), key=lambda item: (item[1], item[0]))
+}
+MODELS_SIZE_MAPPING_LIST = list(MODELS_SIZE_MAPPING.keys())
+CHAT_MODELS = [x for x in MODEL_NAMES_SORTED_BY_NAME_AND_SIZE if MODELS[x]["is_chat"]]
+def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+    # remove all columns that have "_loss" and "_runtime" in them
+    words_to_remove = [
+        "epoch",
+        "loss",
+        "runtime",
+        "samples_per_second",
+        "steps_per_second",
+        "samples",
+        "results_dir",
+    ]
+    df = df.loc[
+        :,
+        ~df.columns.str.contains("|".join(words_to_remove), case=False, regex=True),
+    ]
+    # rename the rest of the columns by replacing "_roc_auc" with ""
+    df.columns = df.columns.str.replace("_roc_auc", "")
+    df.columns = df.columns.str.replace("eval_", "")
+    df["model_family"] = df["model_name"].map(MODELS_FAMILY_MAPPING)
+    # create a dict with the model_name and the model_family
+    model_family_dict = {
+        k: v
+        for k, v in zip(
+            df["model_name"].values.tolist(), df["model_family"].values.tolist()
+        )
+    }
+    # average the results over the 5 seeds for each model (seed column is exp_seed)
+    df_avg = df.groupby(["model_name"]).mean()
+    df_std = df.groupby(["model_name"]).std()
+    # remove the exp_seed column
+    df_avg = df_avg.drop(columns=["exp_seed"])
+    df_std = df_std.drop(columns=["exp_seed"])
+    df_avg["model_family"] = df_avg.index.map(model_family_dict)
+    df_std["model_family"] = df_std.index.map(model_family_dict)
+    df_avg["model_size"] = df_avg.index.map(MODELS_SIZE_MAPPING)
+    df_std["model_size"] = df_std.index.map(MODELS_SIZE_MAPPING)
+    # sort rows by model family then model size
+    df_avg = df_avg.sort_values(
+        by=["model_family", "model_size"], ascending=[True, True]
+    )
+    df_std = df_std.sort_values(
+        by=["model_family", "model_size"], ascending=[True, True]
+    )
+    availables_rows = [x for x in df_avg.columns if x in df_avg.index]
+    df_avg = df_avg.reindex(availables_rows)
+    availables_rows = [x for x in df_std.columns if x in df_std.index]
+    df_std = df_std.reindex(availables_rows)
+    return df_avg, df_std
+def get_data(path):
+    df, df_std = clean_dataframe(pd.read_csv(path, index_col=0))
+    return df, df_std
+def filter_df(
+    df: pd.DataFrame,
+    model_family_train: list,
+    model_family_test: list,
+    model_size_train: tuple,
+    model_size_test: tuple,
+    is_chat_train: bool,
+    is_chat_test: bool,
+    sort_by_size: bool,
+    split_chat_models: bool,
+    is_debug: bool,
+) -> pd.DataFrame:
+    # remove all columns and rows that have "pythia-70m" in the name
+    # filter rows
+    if is_debug:
+        st.write("No filters")
+        st.write(df)
+    df = df.loc[
+        (df["model_size"] >= model_size_train[0] * 1e9)
+        & (df["model_size"] <= model_size_train[1] * 1e9)
+    ]
+    if is_debug:
+        st.write("Filter model size train")
+        st.write(df)
+    df = df.loc[df["model_family"].isin(model_family_train)]
+    if is_debug:
+        st.write("Filter model family train")
+        st.write(df)
+    if is_chat_train != "Both":
+        df = df.loc[df["is_chat"] == is_chat_train]
+        if is_debug:
+            st.write("Filter is chat train")
+            st.write(df)
+    # filter columns
+    if is_debug:
+        st.write("No filters")
+        st.write(df)
+    columns_to_keep = []
+    for column in df.columns:
+        if column in MODELS.keys():
+            model_size = MODELS[column]["model_size"]
+            if (
+                model_size >= model_size_test[0] * 1e9
+                and model_size <= model_size_test[1] * 1e9
+            ):
+                columns_to_keep.append(column)
+    df = df[list(sorted(list(set(columns_to_keep))))]
+    if is_debug:
+        st.write("Filter model size test")
+        st.write(df)
+    # filter columns
+    columns_to_keep = []
+    for column in df.columns:
+        for model_family in model_family_test:
+            if model_family == MODELS[column]["model_family"]:
+                columns_to_keep.append(column)
+    df = df[list(sorted(list(set(columns_to_keep))))]
+    if is_debug:
+        st.write("Filter model family test")
+        st.write(df)
+    if is_chat_test != "Both":
+        # filter columns
+        columns_to_keep = []
+        for column in df.columns:
+            if MODELS[column]["is_chat"] == is_chat_test:
+                columns_to_keep.append(column)
+        df = df[list(sorted(list(set(columns_to_keep))))]
+        if is_debug:
+            st.write("Filter is chat test")
+            st.write(df)
+    df = df.select_dtypes(include="number")
+    if is_debug:
+        st.write("Select dtypes to be only numbers")
+        st.write(df)
+    if sort_by_size:
+        columns_in = [x for x in MODEL_NAMES_SORTED_BY_SIZE if x in df.columns]
+    else:
+        columns_in = [x for x in MODEL_NAMES_SORTED_BY_NAME_AND_SIZE if x in df.columns]
+    df = df[columns_in]
+    if is_debug:
+        st.write("Sort columns")
+        st.write(df)
+    # sort rows by size according the MODELS_SIZE_MAPPING_LIST
+    if sort_by_size:
+        availables_rows = [x for x in MODEL_NAMES_SORTED_BY_SIZE if x in df.index]
+        df = df.reindex(availables_rows)
+    else:
+        availables_rows = [
+            x for x in MODEL_NAMES_SORTED_BY_NAME_AND_SIZE if x in df.index
+        ]
+        df = df.reindex(availables_rows)
+    if is_debug:
+        st.write("Sort rows")
+        st.write(df)
+    if split_chat_models:
+        # put chat models at the end of the columns
+        chat_models = [x for x in CHAT_MODELS if x in df.columns]
+        # sort chat models by size
+        chat_models = sorted(chat_models, key=lambda x: MODELS[x]["model_size"])
+        df = df[[x for x in df.columns if x not in chat_models] + chat_models]
+        # put chat models at the end of the rows
+        chat_models = [x for x in CHAT_MODELS if x in df.index]
+        # sort chat models by size
+        chat_models = sorted(chat_models, key=lambda x: MODELS[x]["model_size"])
+        df = df.reindex([x for x in df.index if x not in chat_models] + chat_models)
+    if is_debug:
+        st.write("Split chat models")
+        st.write(df)
+    return df
+df, df_std = get_data("./deberta_results.csv")
+with open("./ood_results.json", "r") as f:
+    ood_results = json.load(f)
+ood_results = pd.DataFrame(ood_results)
+ood_results = ood_results.set_index("model_name")
+ood_results = ood_results.drop(
+    columns=["exp_name", "accuracy", "f1", "precision", "recall"]
+)
+ood_results.columns = ["seed", "Adversarial"]
+ood_results_avg = ood_results.groupby(["model_name"]).mean()
+ood_results_std = ood_results.groupby(["model_name"]).std()
+# filters
+show_diff = st.sidebar.checkbox("Show Diff", value=False)
+sort_by_size = st.sidebar.checkbox("Sort by size", value=False)
+split_chat_models = st.sidebar.checkbox("Split chat models", value=False)
+add_mean = st.sidebar.checkbox("Add mean", value=False)
+show_std = st.sidebar.checkbox("Show std", value=False)
+model_size_train = st.sidebar.slider(
+    "Train Model Size in Billion", min_value=0, max_value=100, value=(0, 100), step=1
+)
+model_size_test = st.sidebar.slider(
+    "Test Model Size in Billion", min_value=0, max_value=100, value=(0, 100), step=1
+)
+is_chat_train = st.sidebar.selectbox("(Train) Is Chat?", [True, False, "Both"], index=2)
+is_chat_test = st.sidebar.selectbox("(Test) Is Chat?", [True, False, "Both"], index=2)
+model_family_train = st.sidebar.multiselect(
+    "Model Family Train",
+    MODEL_FAMILES,
+    default=MODEL_FAMILES,
+)
+model_family_test = st.sidebar.multiselect(
+    "Model Family Test",
+    list(MODEL_FAMILES) + ["Adversarial"],
+    default=MODEL_FAMILES,
+)
+add_adversarial = False
+if "Adversarial" in model_family_test:
+    model_family_test.remove("Adversarial")
+    add_adversarial = True
+sort_by_adversarial = False
+if add_adversarial:
+    sort_by_adversarial = st.sidebar.checkbox("Sort by adversarial", value=False)
+if st.sidebar.checkbox("Use default color scale", value=False):
+    color_scale = "Viridis_r"
+else:
+    color_scale = viridis_rgb
+is_debug = st.sidebar.checkbox("Debug", value=False)
+if show_std:
+    selected_df = df_std.copy()
+else:
+    selected_df = df.copy()
+if show_diff:
+    # get those 3 columns {'model_size', 'model_family', 'is_chat'}
+    columns_to_keep = ["model_size", "model_family", "is_chat"]
+    to_be_added = selected_df[columns_to_keep]
+    selected_df = selected_df.drop(columns=columns_to_keep)
+    selected_df = selected_df.sub(selected_df.values.diagonal(), axis=1)
+    selected_df = selected_df.join(to_be_added)
+filtered_df = filter_df(
+    selected_df,
+    model_family_train,
+    model_family_test,
+    model_size_train,
+    model_size_test,
+    is_chat_train,
+    is_chat_test,
+    sort_by_size,
+    split_chat_models,
+    is_debug,
+)
+# subtract each row by the diagonal
+# if show_diff:
+#     filtered_df = filtered_df.sub(filtered_df.values.diagonal(), axis=1)
+if add_adversarial:
+    filtered_df = filtered_df.join(ood_results_avg)
+if add_mean:
+    col_mean = filtered_df.mean(axis=1)
+    row_mean = filtered_df.mean(axis=0)
+    diag = filtered_df.values.diagonal()
+    filtered_df["mean"] = col_mean
+    filtered_df.loc["mean"] = row_mean
+filtered_df = filtered_df * 100
+filtered_df = filtered_df.round(0)
+# sort by the column called Adversarial
+if sort_by_adversarial:
+    filtered_df = filtered_df.sort_values(by=["Adversarial"], ascending=False)
+# check if the df has columns and rows
+if filtered_df.shape[0] == 0:
+    st.write("No results found")
+    st.stop()
+if filtered_df.shape[1] == 0:
+    st.write("No results found")
+    st.stop()
+fig = px.imshow(
+    filtered_df.values,
+    x=list(filtered_df.columns),
+    y=list(filtered_df.index),
+    color_continuous_scale=color_scale,
+    contrast_rescaling=None,
+    text_auto=True,
+    aspect="auto",
+)
+width = st.sidebar.text_input("Width", "1920")
+height = st.sidebar.text_input("Height", "1080")
+scale = st.sidebar.text_input("Scale", "1.0")
+margin = st.sidebar.text_input("Margin[l,r,b,t]", "200,100,100,100")
+fig.update_traces(textfont_size=9)
+fig.update_layout(
+    xaxis={"side": "top"},
+    yaxis={"side": "left"},
+    margin=dict(
+        l=int(margin.split(",")[0]),
+        r=int(margin.split(",")[1]),
+        b=int(margin.split(",")[2]),
+        t=int(margin.split(",")[3]),
+    ),
+    font=dict(size=10),
+)
+fig.update_xaxes(tickangle=45)
+fig.update_xaxes(tickmode="linear")
+fig.update_yaxes(tickmode="linear")
+# change the font in the heatmap
+st.plotly_chart(fig, use_container_width=True)
+if st.sidebar.button("save", key="save"):
+    fig.write_image(
+        "fig1.pdf",
+        width=int(width),
+        height=int(height),
+        validate=True,
+        scale=float(scale),
+    )
+# plot the col mean vs model size
+if add_mean and not show_diff:
+    # check if any of the chat models are in the filtered df columns and index
+    if len([x for x in CHAT_MODELS if x in filtered_df.columns]) > 0 or len(
+        [x for x in CHAT_MODELS if x in filtered_df.index]
+    ):
+        st.warning(
+            "Chat models are in the filtered df columns or index."
+            "This will cause the mean graph to be skewed."
+        )
+    fig3 = px.scatter(
+        y=row_mean,
+        x=[MODELS[x]["model_size"] for x in filtered_df.columns if x not in ["mean"]],
+        # hover_data=[x for x in filtered_df.index if x not in ["mean"]],
+        color=[
+            MODELS[x]["model_family"] for x in filtered_df.columns if x not in ["mean"]
+        ],
+        color_discrete_sequence=px.colors.qualitative.Plotly,
+        title="",
+        # x axis title
+        labels={
+            "x": "Target Model Size",
+            "y": "Average ROC AUC",
+            "color": "Model Family",
+        },
+        log_x=True,
+        trendline="ols",
+    )
+    fig4 = px.scatter(
+        y=diag,
+        x=[MODELS[x]["model_size"] for x in filtered_df.columns if x not in ["mean"]],
+        # hover_data=[x for x in filtered_df.index if x not in ["mean"]],
+        color=[
+            MODELS[x]["model_family"] for x in filtered_df.columns if x not in ["mean"]
+        ],
+        color_discrete_sequence=px.colors.qualitative.Plotly,
+        title="",
+        # x axis title
+        labels={
+            "x": "Target Model Size",
+            "y": "Self ROC AUC",
+            "color": "Model Family",
+        },
+        log_x=True,
+        trendline="ols",
+    )
+    # put the two plots side by side
+    fig_subplot = make_subplots(
+        rows=1,
+        cols=2,
+        shared_yaxes=False,
+        subplot_titles=("Self Detection ROC AUC", "Average Target ROC AUC"),
+    )
+    for i, figure in enumerate([fig4, fig3]):
+        for trace in range(len(figure["data"])):
+            trace_data = figure["data"][trace]
+            if i == 1:
+                trace_data["showlegend"] = False
+            fig_subplot.append_trace(trace_data, row=1, col=i + 1)
+    fig_subplot.update_xaxes(type="log")
+    # y axis range
+    fig_subplot.update_yaxes(range=[0.90, 1])
+    fig_subplot.update_layout(
+        height=500,
+        width=1200,
+    )
+    # put the legend on the bottom
+    fig_subplot.update_layout(
+        legend=dict(orientation="h", yanchor="bottom", y=-0.2, x=0.09)
+    )
+    st.plotly_chart(fig_subplot, use_container_width=True)
+    fig2 = px.scatter(
+        y=col_mean,
+        x=[MODELS_SIZE_MAPPING[x] for x in filtered_df.index if x not in ["mean"]],
+        # hover_data=[x for x in filtered_df.index if x not in ["mean"]],
+        color=[
+            MODELS_FAMILY_MAPPING[x] for x in filtered_df.index if x not in ["mean"]
+        ],
+        color_discrete_sequence=px.colors.qualitative.Plotly,
+        title="Mean vs Train Model Size",
+        log_x=True,
+        trendline="ols",
+    )
+    fig2.update_layout(
+        height=600,
+        width=900,
+    )
+    st.plotly_chart(fig2, use_container_width=False)

deberta_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

exp_utils.py ADDED Viewed

	@@ -0,0 +1,1157 @@

+# LLAMA2
+# <s>[INST] <<SYS>>
+# {{ system_prompt }}
+# <</SYS>>
+# {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
+ZERO_SHOT_PROMPT = """A chat between a curious human and an artificial intelligence assistant.
+The assistant gives helpful, detailed, and polite answers to the human's questions.
+Human: {{ user_message }}
+Assistant: """
+ZERO_SHOT_STOPWORD = "Human:"
+LM_PROMPT = """Give the best continuation of the following text: {{ user_message }}"""
+LLAMA2_PROMPT = """<s>[INST] <<SYS>>
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
+<</SYS>>
+{{ user_message }} [/INST] """
+LLAMA2_STOPWORD = "</s>"
+MPT_PROMPT_7B = """<|im_start|>system
+- You are a helpful assistant chatbot trained by MosaicML.
+- You answer questions.
+- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
+- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
+<|im_start|>user
+{{ user_message }}<|im_end|>
+<|im_start|>assistant
+"""
+MPT_LM_PROMPT_7B = """<|im_start|>system
+- You are a helpful assistant chatbot trained by MosaicML.
+- You answer questions.
+- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
+- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
+<|im_start|>user
+{{ user_message }}<|im_end|>
+<|im_start|>assistant
+"""
+MPT_PROMPT_30B = """<|im_start|>system
+A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.<|im_end|>
+<|im_start|>user
+{{ user_message }}<|im_end|>
+<|im_start|>assistant
+"""
+MPT_STOPWORD = "<|im_end|>"
+FALCON_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
+User: {{ user_message }}
+Falcon: """
+FALCON_STOPWORD = "User:"
+ALFRED_PROMPT = """Alfred is a large language model trained by LightOn. Knowledge cutoff: November 2022. Current date: 31 July, 2023
+User: {{ user_message }}
+Alfred: """
+ALFRED_STOPWORD = "User:"
+VICUNA_PROMPT = """A chat between a curious user and an artificial intelligence assistant.
+The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {{ user_message }} ASSISTANT: """
+VICUNA_STOPWORD = ""
+MODELS = {
+    ################################################
+    #                   llama-2                    #
+    ################################################
+    "llama-2-70b": {
+        "name": "llama-2-70b",
+        "model_name": "NousResearch/llama-2-70b-hf",
+        "model_path": "NousResearch-llama-2-70b-hf",
+        "num_gpus": 4,
+        "batch_size": 2,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "to_be_quantized": True,
+        "to_be_watermarked": True,
+        "model_size": 70e9,
+        "model_family": "llama-2",
+    },
+    "llama-2-13b": {
+        "name": "llama-2-13b",
+        "model_name": "NousResearch/llama-2-13b-hf",
+        "model_path": "NousResearch-llama-2-13b-hf",
+        "num_gpus": 2,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "to_be_quantized": True,
+        "to_be_watermarked": True,
+        "model_size": 13e9,
+        "model_family": "llama-2",
+    },
+    "llama-2-7b": {
+        "name": "llama-2-7b",
+        "model_name": "NousResearch/llama-2-7b-hf",
+        "model_path": "NousResearch-llama-2-7b-hf",
+        "num_gpus": 1,
+        "batch_size": 4,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "to_be_quantized": True,
+        "to_be_watermarked": True,
+        "model_size": 7e9,
+        "model_family": "llama-2",
+    },
+    ################################################
+    #                   llama-2                    #
+    ################################################
+    "llama-2-70b-chat": {
+        "name": "llama-2-70b-chat",
+        "model_name": "NousResearch/llama-2-70b-chat-hf",
+        "model_path": "NousResearch-llama-2-70b-chat-hf",
+        "num_gpus": 4,
+        "batch_size": 2,
+        "is_chat": True,
+        "prompt": LLAMA2_PROMPT,
+        "stopword": LLAMA2_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "model_size": 70e9,
+        "model_family": "llama-2",
+    },
+    "llama-2-13b-chat": {
+        "name": "llama-2-13b-chat",
+        "model_name": "NousResearch/llama-2-13b-chat-hf",
+        "model_path": "NousResearch-llama-2-13b-chat-hf",
+        "num_gpus": 2,
+        "batch_size": 8,
+        "is_chat": True,
+        "prompt": LLAMA2_PROMPT,
+        "stopword": LLAMA2_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "model_size": 13e9,
+        "model_family": "llama-2",
+    },
+    "llama-2-7b-chat": {
+        "name": "llama-2-7b-chat",
+        "model_name": "NousResearch/llama-2-7b-chat-hf",
+        "model_path": "NousResearch-llama-2-7b-chat-hf",
+        "num_gpus": 1,
+        "batch_size": 4,
+        "is_chat": True,
+        "prompt": LLAMA2_PROMPT,
+        "stopword": LLAMA2_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "model_size": 7e9,
+        "model_family": "llama-2",
+    },
+    ################################################
+    #                   llama-1                   #
+    ################################################
+    "llama-65b": {
+        "name": "llama-65b",
+        "model_name": "huggyllama/llama-65b",
+        "model_path": "huggyllama-llama-65b",
+        "num_gpus": 4,
+        "batch_size": 2,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "to_be_quantized": True,
+        "to_be_watermarked": True,
+        "model_size": 65e9,
+        "model_family": "llama-1",
+    },
+    "llama-30b": {
+        "name": "llama-30b",
+        "model_name": "huggyllama/llama-30b",
+        "model_path": "huggyllama-llama-30b",
+        "num_gpus": 2,
+        "batch_size": 2,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "to_be_quantized": True,
+        "to_be_watermarked": True,
+        "model_size": 30e9,
+        "model_family": "llama-1",
+    },
+    "llama-13b": {
+        "name": "llama-13b",
+        "model_name": "huggyllama/llama-13b",
+        "model_path": "huggyllama-llama-13b",
+        "num_gpus": 2,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "to_be_quantized": True,
+        "to_be_watermarked": True,
+        "model_size": 13e9,
+        "model_family": "llama-1",
+    },
+    "llama-7b": {
+        "name": "llama-7b",
+        "model_name": "huggyllama/llama-7b",
+        "model_path": "huggyllama-llama-7b",
+        "num_gpus": 1,
+        "batch_size": 4,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "to_be_quantized": True,
+        "to_be_watermarked": True,
+        "model_size": 7e9,
+        "model_family": "llama-1",
+    },
+    ################################################
+    #                   OPT                   #
+    ################################################
+    "opt-66b": {
+        "name": "opt-66b",
+        "model_name": "facebook/opt-66b",
+        "model_path": "facebook-opt-66b",
+        "num_gpus": 4,
+        "batch_size": 2,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 1024,
+        "model_size": 66e9,
+        "model_family": "opt",
+    },
+    "opt-30b": {
+        "name": "opt-30b",
+        "model_name": "facebook/opt-30b",
+        "model_path": "facebook-opt-30b",
+        "num_gpus": 4,
+        "batch_size": 1,
+        "is_chat": False,
+        "no_api": True,
+        "model_size": 30e9,
+        "model_family": "opt",
+    },
+    "opt-13b": {
+        "name": "opt-13b",
+        "model_name": "facebook/opt-13b",
+        "model_path": "facebook-opt-13b",
+        "num_gpus": 2,
+        "batch_size": 1,
+        "is_chat": False,
+        "no_api": True,
+        "model_size": 13e9,
+        "model_family": "opt",
+    },
+    "opt-6.7b": {
+        "name": "opt-6.7b",
+        "model_name": "facebook/opt-6.7b",
+        "model_path": "facebook-opt-6.7b",
+        "num_gpus": 1,
+        "batch_size": 4,
+        "is_chat": False,
+        "no_api": True,
+        "model_size": 6.7e9,
+        "model_family": "opt",
+    },
+    "opt-2.7b": {
+        "name": "opt-2.7b",
+        "model_name": "facebook/opt-2.7b",
+        "model_path": "facebook-opt-2.7b",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 2.7e9,
+        "model_family": "opt",
+    },
+    "opt-1.3b": {
+        "name": "opt-1.3b",
+        "model_name": "facebook/opt-1.3b",
+        "model_path": "facebook-opt-1.3b",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "use_flash_attention": True,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 1.3e9,
+        "model_family": "opt",
+    },
+    "opt-350m": {
+        "name": "opt-350m",
+        "model_name": "facebook/opt-350m",
+        "model_path": "facebook-opt-350m",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "no_api": True,
+        "model_size": 350e6,
+        "model_family": "opt",
+    },
+    "opt-125m": {
+        "name": "opt-125m",
+        "model_name": "facebook/opt-125m",
+        "model_path": "facebook-opt-125m",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 125e6,
+        "model_family": "opt",
+    },
+    ################################################
+    #                   MPT                   #
+    ################################################
+    "mpt-30b": {
+        "name": "mpt-30b",
+        "model_name": "mosaicml/mpt-30b",
+        "model_path": "mosaicml-mpt-30b",
+        "num_gpus": 2,
+        "batch_size": 2,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 1024,
+        "model_size": 30e9,
+        "model_family": "mpt",
+    },
+    "mpt-7b": {
+        "name": "mpt-7b",
+        "model_name": "mosaicml/mpt-7b",
+        "model_path": "mosaicml-mpt-7b",
+        "num_gpus": 1,
+        "batch_size": 4,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 7e9,
+        "model_family": "mpt",
+    },
+    ################################################
+    #                   MPT-Chat                   #
+    ################################################
+    "mpt-30b-chat": {
+        "name": "mpt-30b-chat",
+        "model_name": "mosaicml/mpt-30b-chat",
+        "model_path": "mosaicml-mpt-30b-chat",
+        "num_gpus": 2,
+        "batch_size": 2,
+        "is_chat": True,
+        "prompt": MPT_PROMPT_30B,
+        "stopword": MPT_STOPWORD,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 30e9,
+        "model_family": "mpt",
+    },
+    "mpt-7b-chat": {
+        "name": "mpt-7b-chat",
+        "model_name": "mosaicml/mpt-7b-chat",
+        "model_path": "mosaicml-mpt-7b-chat",
+        "num_gpus": 1,
+        "batch_size": 4,
+        "is_chat": True,
+        "prompt": MPT_PROMPT_7B,
+        "stopword": MPT_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 7e9,
+        "model_family": "mpt",
+    },
+    ################################################
+    #                   OPENLLAMA                  #
+    ################################################
+    "openllama-13b": {
+        "name": "openllama-13b",
+        "model_name": "openlm-research/open_llama_13b",
+        "model_path": "openlm-research-open_llama_13b",
+        "num_gpus": 2,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 13e9,
+        "model_family": "openllama",
+    },
+    "openllama-7b": {
+        "name": "openllama-7b",
+        "model_name": "openlm-research/open_llama_7b",
+        "model_path": "openlm-research-open_llama_7b",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 7e9,
+        "model_family": "openllama",
+    },
+    "openllama-3b": {
+        "name": "openllama-3b",
+        "model_name": "openlm-research/open_llama_3b",
+        "model_path": "openlm-research-open_llama_3b",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "use_flash_attention": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 3e9,
+        "model_family": "openllama",
+    },
+    ################################################
+    #                   OPENLLAMA-2                #
+    ################################################
+    # "openllama-2-13b": {
+    #     "name": "openllama-2-13b",
+    #     "model_name": "openlm-research/open_llama_13b_v2",
+    #     "model_path": "openlm-research-open_llama_13b_v2",
+    #     "num_gpus": 2,
+    #     "batch_size": 1,
+    #     "is_chat": False,
+    # },
+    "openllama-2-7b": {
+        "name": "openllama-2-7b",
+        "model_name": "openlm-research/open_llama_7b_v2",
+        "model_path": "openlm-research-open_llama_7b_v2",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 7e9,
+        "model_family": "openllama-2",
+    },
+    "openllama-2-3b": {
+        "name": "openllama-2-3b",
+        "model_name": "openlm-research/open_llama_3b_v2",
+        "model_path": "openlm-research-open_llama_3b_v2",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "use_flash_attention": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 3e9,
+        "model_family": "openllama-2",
+    },
+    ################################################
+    #                   Pythia                     #
+    ################################################
+    "pythia-12b": {
+        "name": "pythia-12b",
+        "model_name": "EleutherAI/pythia-12b",
+        "model_path": "EleutherAI-pythia-12b",
+        "num_gpus": 2,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 12e9,
+        "model_family": "pythia",
+    },
+    "pythia-6.9b": {
+        "name": "pythia-6.9b",
+        "model_name": "EleutherAI/pythia-6.9b",
+        "model_path": "EleutherAI-pythia-6.9b",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 6.9e9,
+        "model_family": "pythia",
+    },
+    "pythia-2.8b": {
+        "name": "pythia-2.8b",
+        "model_name": "EleutherAI/pythia-2.8b",
+        "model_path": "EleutherAI-pythia-2.8b",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 2.8e9,
+        "model_family": "pythia",
+    },
+    "pythia-1.4b": {
+        "name": "pythia-1.4b",
+        "model_name": "EleutherAI/pythia-1.4b",
+        "model_path": "EleutherAI-pythia-1.4b",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 1.4e9,
+        "model_family": "pythia",
+    },
+    "pythia-1b": {
+        "name": "pythia-1b",
+        "model_name": "EleutherAI/pythia-1b",
+        "model_path": "EleutherAI-pythia-1b",
+        "num_gpus": 1,
+        "batch_size": 1,
+        "is_chat": False,
+        "use_flash_attention": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 1e9,
+        "model_family": "pythia",
+    },
+    "pythia-410m": {
+        "name": "pythia-410m",
+        "model_name": "EleutherAI/pythia-410m",
+        "model_path": "EleutherAI-pythia-410m",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 410e6,
+        "model_family": "pythia",
+    },
+    "pythia-160m": {
+        "name": "pythia-160m",
+        "model_name": "EleutherAI/pythia-160m",
+        "model_path": "EleutherAI-pythia-160m",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 160e6,
+        "model_family": "pythia",
+    },
+    "pythia-70m": {
+        "name": "pythia-70m",
+        "model_name": "EleutherAI/pythia-70m",
+        "model_path": "EleutherAI-pythia-70m",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 70e6,
+        "model_family": "pythia",
+    },
+    ################################################
+    #                   Pythia-deduped             #
+    ################################################
+    "pythia-12b-deduped": {
+        "name": "pythia-12b-deduped",
+        "model_name": "EleutherAI/pythia-12b-deduped",
+        "model_path": "EleutherAI-pythia-12b-deduped",
+        "num_gpus": 2,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 12e9,
+    },
+    "pythia-6.9b-deduped": {
+        "name": "pythia-6.9b-deduped",
+        "model_name": "EleutherAI/pythia-6.9b-deduped",
+        "model_path": "EleutherAI-pythia-6.9b-deduped",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 6.9e9,
+    },
+    "pythia-2.8b-deduped": {
+        "name": "pythia-2.8b-deduped",
+        "model_name": "EleutherAI/pythia-2.8b-deduped",
+        "model_path": "EleutherAI-pythia-2.8b-deduped",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 2.8e9,
+    },
+    "pythia-1.4b-deduped": {
+        "name": "pythia-1.4b-deduped",
+        "model_name": "EleutherAI/pythia-1.4b-deduped",
+        "model_path": "EleutherAI-pythia-1.4b-deduped",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 1.4e9,
+    },
+    "pythia-1b-deduped": {
+        "name": "pythia-1b-deduped",
+        "model_name": "EleutherAI/pythia-1b-deduped",
+        "model_path": "EleutherAI-pythia-1b-deduped",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "use_flash_attention": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 1e9,
+    },
+    "pythia-410m-deduped": {
+        "name": "pythia-410m-deduped",
+        "model_name": "EleutherAI/pythia-410m-deduped",
+        "model_path": "EleutherAI-pythia-410m-deduped",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 410e6,
+    },
+    "pythia-160m-deduped": {
+        "name": "pythia-160m-deduped",
+        "model_name": "EleutherAI/pythia-160m-deduped",
+        "model_path": "EleutherAI-pythia-160m-deduped",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 160e6,
+    },
+    "pythia-70m-deduped": {
+        "name": "pythia-70m-deduped",
+        "model_name": "EleutherAI/pythia-70m-deduped",
+        "model_path": "EleutherAI-pythia-70m-deduped",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "pythia-deduped",
+        "model_size": 70e6,
+    },
+    ################################################
+    #                   GPT2                       #
+    ################################################
+    "gpt2-xl": {
+        "name": "gpt2-xl",
+        "model_name": "gpt2-xl",
+        "model_path": "gpt2-xl",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 1.5e9,
+        "model_family": "gpt2",
+    },
+    "gpt2-large": {
+        "name": "gpt2-large",
+        "model_name": "gpt2-large",
+        "model_path": "gpt2-large",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 774e6,
+        "model_family": "gpt2",
+    },
+    "gpt2-medium": {
+        "name": "gpt2-medium",
+        "model_name": "gpt2-medium",
+        "model_path": "gpt2-medium",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 355e6,
+        "model_family": "gpt2",
+    },
+    "gpt2": {
+        "name": "gpt2",
+        "model_name": "gpt2",
+        "model_path": "gpt2",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 124e6,
+        "model_family": "gpt2",
+    },
+    ################################################
+    #                   CEREBRAS                   #
+    ################################################
+    "cerebras-gpt-13b": {  # add 2 gpus but sharded equals to false
+        "name": "cerebras-gpt-13b",
+        "model_name": "cerebras/Cerebras-GPT-13B",
+        "model_path": "cerebras-Cerebras-GPT-13B",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "cerebras",
+        "model_size": 13e9,
+    },
+    "cerebras-gpt-6.7b": {
+        "name": "cerebras-gpt-6.7b",
+        "model_name": "cerebras/Cerebras-GPT-6.7B",
+        "model_path": "cerebras-Cerebras-GPT-6.7B",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "cerebras",
+        "model_size": 6.7e9,
+    },
+    "cerebras-gpt-2.7b": {
+        "name": "cerebras-gpt-2.7b",
+        "model_name": "cerebras/Cerebras-GPT-2.7B",
+        "model_path": "cerebras-Cerebras-GPT-2.7B",
+        "num_gpus": 1,
+        "batch_size": 1,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "cerebras",
+        "model_size": 2.7e9,
+    },
+    "cerebras-gpt-1.3b": {
+        "name": "cerebras-gpt-1.3b",
+        "model_name": "cerebras/Cerebras-GPT-1.3B",
+        "model_path": "cerebras-Cerebras-GPT-1.3B",
+        "num_gpus": 1,
+        "batch_size": 1,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "cerebras",
+        "model_size": 1.3e9,
+    },
+    "cerebras-gpt-256m": {
+        "name": "cerebras-gpt-256m",
+        "model_name": "cerebras/Cerebras-GPT-256M",
+        "model_path": "cerebras-Cerebras-GPT-256M",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "cerebras",
+        "model_size": 256e6,
+    },
+    "cerebras-gpt-111m": {
+        "name": "cerebras-gpt-111m",
+        "model_name": "cerebras/Cerebras-GPT-111M",
+        "model_path": "cerebras-Cerebras-GPT-111M",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "cerebras",
+        "model_size": 111e6,
+    },
+    ################################################
+    #                   Bloom                      #
+    ################################################
+    "bloom-7.1b": {
+        "name": "bloom-7.1b",
+        "model_name": "bigscience/bloom-7b1",
+        "model_path": "bigscience-bloom-7b1",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 7.1e9,
+        "model_family": "bloom",
+    },
+    "bloom-3b": {
+        "name": "bloom-3b",
+        "model_name": "bigscience/bloom-3b",
+        "model_path": "bigscience-bloom-3b",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 3e9,
+        "model_family": "bloom",
+    },
+    "bloom-1.7b": {
+        "name": "bloom-1.7b",
+        "model_name": "bigscience/bloom-1b7",
+        "model_path": "bigscience-bloom-1b7",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 1.7e9,
+        "model_family": "bloom",
+    },
+    "bloom-1.1b": {
+        "name": "bloom-1.1b",
+        "model_name": "bigscience/bloom-1b1",
+        "model_path": "bigscience-bloom-1b1",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 1.1e9,
+        "model_family": "bloom",
+    },
+    "bloom-560m": {
+        "name": "bloom-560m",
+        "model_name": "bigscience/bloom-560m",
+        "model_path": "bigscience-bloom-560m",
+        "num_gpus": 1,
+        "batch_size": 16,
+        "is_chat": False,
+        "max_total_tokens": 1024,
+        "max_input_length": 256,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 560e6,
+        "model_family": "bloom",
+    },
+    ################################################
+    #                   Falcon                     #
+    ################################################
+    "falcon-40b": {
+        "name": "falcon-40b",
+        "model_name": "tiiuae/falcon-40b",
+        "model_path": "tiiuae-falcon-40b",
+        "num_gpus": 4,
+        "batch_size": 4,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 40e9,
+        "model_family": "falcon",
+    },
+    "falcon-7b": {
+        "name": "falcon-7b",
+        "model_name": "tiiuae/falcon-7b",
+        "model_path": "tiiuae-falcon-7b",
+        "num_gpus": 1,
+        "batch_size": 8,
+        "is_chat": False,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_size": 7e9,
+        "model_family": "falcon",
+    },
+    ################################################
+    #                   Falcon-chat                #
+    ################################################
+    "falcon-40b-instruct": {
+        "name": "falcon-40b-instruct",
+        "model_name": "tiiuae/falcon-40b-instruct",
+        "model_path": "tiiuae-falcon-40b-instruct",
+        "num_gpus": 4,
+        "batch_size": 4,
+        "is_chat": True,
+        "prompt": FALCON_PROMPT,
+        "stopword": FALCON_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "falcon",
+        "model_size": 40e9,
+    },
+    "falcon-7b-instruct": {
+        "name": "falcon-7b-instruct",
+        "model_name": "tiiuae/falcon-7b-instruct",
+        "model_path": "tiiuae-falcon-7b-instruct",
+        "num_gpus": 1,
+        "batch_size": 5,
+        "is_chat": True,
+        "prompt": FALCON_PROMPT,
+        "stopword": FALCON_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "falcon",
+        "model_size": 7e9,
+    },
+    "alfred-40b-0723": {
+        "name": "alfred-40b-0723",
+        "model_name": "lightonai/alfred-40b-0723",
+        "model_path": "lightonai-alfred-40b-0723",
+        "num_gpus": 4,
+        "batch_size": 4,
+        "is_chat": True,
+        "prompt": ALFRED_PROMPT,
+        "stopword": ALFRED_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "falcon",
+        "model_size": 40e9,
+    },
+    ################################################
+    #                   Vicuna v1.3                #
+    ################################################
+    "vicuna-33b-v1.3": {
+        "name": "vicuna-33b-v1.3",
+        "model_name": "lmsys/vicuna-33b-v1.3",
+        "model_path": "lmsys-vicuna-33b-v1.3",
+        "num_gpus": 2,
+        "batch_size": 2,
+        "is_chat": True,
+        "prompt": VICUNA_PROMPT,
+        "stopword": VICUNA_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "vicuna",
+        "model_size": 33e9,
+    },
+    "vicuna-13b-v1.3": {
+        "name": "vicuna-13b-v1.3",
+        "model_name": "lmsys/vicuna-13b-v1.3",
+        "model_path": "lmsys-vicuna-13b-v1.3",
+        "num_gpus": 2,
+        "batch_size": 8,
+        "is_chat": True,
+        "prompt": VICUNA_PROMPT,
+        "stopword": VICUNA_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "vicuna",
+        "model_size": 13e9,
+    },
+    "vicuna-7b-v1.3": {
+        "name": "vicuna-7b-v1.3",
+        "model_name": "lmsys/vicuna-7b-v1.3",
+        "model_path": "lmsys-vicuna-7b-v1.3",
+        "num_gpus": 1,
+        "batch_size": 4,
+        "is_chat": True,
+        "prompt": VICUNA_PROMPT,
+        "stopword": VICUNA_STOPWORD,
+        "max_total_tokens": 2048,
+        "max_input_length": 1024,
+        "max_batch_prefill_tokens": 4096,
+        "model_family": "vicuna",
+        "model_size": 7e9,
+    },
+}
+MODEL_FAMILY_PRETRAINING_DATASETS = {
+    "llama-2": ["UNK-commoncrawl"],
+    "llama-1": [
+        "llama",
+        "c4",
+        "github",
+        "wikipedia",
+        "books3",
+        "gutenberg",
+        "arxiv",
+        "stackexchange",
+    ],
+    "openllama": [
+        "redpajama",
+        "c4",
+        "github",
+        "wikipedia",
+        "books3",
+        "gutenberg",
+        "arxiv",
+        "stackexchange",
+    ],
+    "openllama-2": [
+        "refinedweb",
+        "github",
+        "wikipedia",
+        "books3",
+        "gutenberg",
+        "arxiv",
+        "stackexchange",
+    ],
+    "pythia": [
+        "thepile",
+        "pubmed",
+        "books3",
+        "arxiv",
+        "github",
+        "openwebtext2",
+        "freelaw",
+        "wikipedia",
+        "stackexchange",
+        "uspto",
+        "gutenberg",
+        "opensubtitles",
+        "mathematics",
+        "bookcorpus2",
+        "ubuntuIRC",
+        "europarl",
+        "philpapers",
+        "nih-grants" "hackernews",
+        "enron",
+    ],
+    "gpt2": ["openwebtext"],
+    "cerebras": [
+        "thepile",
+        "pubmed",
+        "books3",
+        "arxiv",
+        "github",
+        "openwebtext2",
+        "freelaw",
+        "wikipedia",
+        "stackexchange",
+        "uspto",
+        "gutenberg",
+        "opensubtitles",
+        "mathematics",
+        "bookcorpus2",
+        "ubuntuIRC",
+        "europarl",
+        "philpapers",
+        "nih-grants" "hackernews",
+        "enron",
+    ],
+    "bloom": [
+        "oscar",
+        "github",
+        "commoncrawl-bloom",
+    ],
+    "falcon": [
+        "refinedweb",
+        "pubmed",
+        "books3",
+        "arxiv",
+        "github",
+        "openwebtext2",
+        "freelaw",
+        "wikipedia",
+        "stackexchange",
+        "uspto",
+        "gutenberg",
+        "opensubtitles",
+        "mathematics",
+        "bookcorpus2",
+        "ubuntuIRC",
+        "europarl",
+        "philpapers",
+        "nih-grants" "hackernews",
+        "enron",
+    ],
+    "mpt": [
+        "c4",
+        "mc4",
+        "redpajama",
+        "github",
+        "wikipedia",
+        "books3",
+        "gutenberg",
+        "arxiv",
+        "stackexchange",
+    ],
+    "opt": [
+        "cc-news",
+        "cc-stories",
+        "thepile",
+        "reddit" "pubmed",
+        "books3",
+        "github",
+        "openwebtext2",
+        "wikipedia",
+        "uspto",
+        "gutenberg",
+        "opensubtitles",
+        "mathematics",
+        "bookcorpus2",
+        "hackernews",
+    ],
+}
+if __name__ == "__main__":
+    print(len(MODELS))
+    print("\n".join(MODELS.keys()))

visualize_utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numpy as np
+def hex_to_rgb(value):
+    """
+    Calculates rgb values from a hex color code.
+    :param (string) value: Hex color string
+    :rtype (tuple) (r_value, g_value, b_value): tuple of rgb values
+    """
+    value = value.lstrip("#")
+    hex_total_length = len(value)
+    rgb_section_length = hex_total_length // 3
+    return tuple(
+        int(value[i : i + rgb_section_length], 16)
+        for i in range(0, hex_total_length, rgb_section_length)
+    )
+viridis = [
+    [0, "#440154"],
+    [0.06274509803921569, "#48186a"],
+    [0.12549019607843137, "#472d7b"],
+    [0.18823529411764706, "#424086"],
+    [0.25098039215686274, "#3b528b"],
+    [0.3137254901960784, "#33638d"],
+    [0.3764705882352941, "#2c728e"],
+    [0.4392156862745098, "#26828e"],
+    [0.5019607843137255, "#21918c"],
+    [0.5647058823529412, "#1fa088"],
+    [0.6274509803921569, "#28ae80"],
+    [0.6901960784313725, "#3fbc73"],
+    [0.7529411764705882, "#5ec962"],
+    [0.8156862745098039, "#84d44b"],
+    [0.8784313725490196, "#addc30"],
+    [0.9411764705882353, "#d8e219"],
+    [1, "#fde725"],
+]
+# Define the power parameter for the transformation
+power = 0.23  # You can adjust this value as needed
+# Apply the power transformation to the values in the colorscale
+for i in range(len(viridis)):
+    viridis[i][0] = np.power(viridis[i][0], power)
+# Normalize the transformed values to [0, 1]
+max_value = max(v[0] for v in viridis)
+for i in range(len(viridis)):
+    viridis[i][0] /= max_value
+# Sort the colorscale by the normalized values
+viridis.sort(key=lambda x: x[0])
+viridis_rgb = [[x[0], "rgb" + str(hex_to_rgb(x[1]))] for x in viridis]
+# reverse the colorscale
+viridis_rgb = [[x[0], y[1]] for x, y in zip(viridis_rgb, viridis_rgb[::-1])]